[PATCH v1] Widening-Mul: Fix one ICE when iterate on phi node

2024-09-27 Thread pan2 . li
From: Pan Li 

We iterate all phi node of bb to try to match the SAT_* pattern
for scalar integer.  We also remove the phi mode when the relevant
pattern matched.

Unfortunately the iterator may have no idea the phi node is removed
and continue leverage the free data and then ICE similar as below.

[0] psi ptr 0x75216340c000
[0] psi ptr 0x75216340c400
[1] psi ptr 0xa5a5a5a5a5a5a5a5 <=== GC freed pointer.

during GIMPLE pass: widening_mul
tmp.c: In function ‘f’:
tmp.c:45:6: internal compiler error: Segmentation fault
   45 | void f(int rows, int cols) {
  |  ^
0x36e2788 internal_error(char const*, ...)
../../gcc/diagnostic-global-context.cc:517
0x18005f0 crash_signal
../../gcc/toplev.cc:321
0x752163c4531f ???
./signal/../sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c:0
0x103ae0e bool is_a_helper::test(gimple*)
../../gcc/gimple.h:1256
0x103f9a5 bool is_a(gimple*)
../../gcc/is-a.h:232
0x103dc78 gphi* as_a(gimple*)
../../gcc/is-a.h:255
0x104f12e gphi_iterator::phi() const
../../gcc/gimple-iterator.h:47
0x1a57bef after_dom_children
../../gcc/tree-ssa-math-opts.cc:6140
0x3344482 dom_walker::walk(basic_block_def*)
../../gcc/domwalk.cc:354
0x1a58601 execute
../../gcc/tree-ssa-math-opts.cc:6312

This patch would like to fix the iterate on modified collection problem
by backup the next phi in advance.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

PR middle-end/116861

gcc/ChangeLog:

* tree-ssa-math-opts.cc (math_opts_dom_walker::after_dom_children): 
Backup
the next psi iterator before remove the phi node.

gcc/testsuite/ChangeLog:

* gcc.dg/torture/pr116861-1.c: New test.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.dg/torture/pr116861-1.c | 76 +++
 gcc/tree-ssa-math-opts.cc |  9 ++-
 2 files changed, 83 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr116861-1.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr116861-1.c 
b/gcc/testsuite/gcc.dg/torture/pr116861-1.c
new file mode 100644
index 000..7dcfe664d89
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116861-1.c
@@ -0,0 +1,76 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void pm_message(void);
+struct CmdlineInfo {
+  _Bool wantCrop[4];
+  unsigned int margin;
+};
+typedef struct {
+  unsigned int removeSize;
+} CropOp;
+typedef struct {
+  CropOp op[4];
+} CropSet;
+static void divideAllBackgroundIntoBorders(unsigned int const totalSz,
+   _Bool const wantCropSideA,
+   _Bool const wantCropSideB,
+   unsigned int const wantMargin,
+   unsigned int *const sideASzP,
+   unsigned int *const sideBSzP) {
+  unsigned int sideASz, sideBSz;
+  if (wantCropSideA && wantCropSideB)
+  {
+sideASz = totalSz / 2;
+if (wantMargin)
+  sideBSz = totalSz - sideASz;
+  }
+  else if (wantCropSideB)
+  {
+sideBSz = 0;
+  }
+  *sideASzP = sideASz;
+  *sideBSzP = sideBSz;
+}
+static CropOp oneSideCrop(_Bool const wantCrop, unsigned int const borderSz,
+  unsigned int const margin) {
+  CropOp retval;
+  if (wantCrop)
+  {
+if (borderSz >= margin)
+  retval.removeSize = borderSz - margin;
+else
+  retval.removeSize = 0;
+  }
+  return retval;
+}
+struct CmdlineInfo cmdline1;
+void f(int rows, int cols) {
+  struct CmdlineInfo cmdline0 = cmdline1;
+  CropSet crop;
+  struct CmdlineInfo cmdline = cmdline0;
+  CropSet retval;
+  unsigned int leftBorderSz, rghtBorderSz;
+  unsigned int topBorderSz, botBorderSz;
+  divideAllBackgroundIntoBorders(cols, cmdline.wantCrop[0],
+ cmdline.wantCrop[1], cmdline.margin > 0,
+ &leftBorderSz, &rghtBorderSz);
+  divideAllBackgroundIntoBorders(rows, cmdline.wantCrop[2],
+ cmdline.wantCrop[3], cmdline.margin > 0,
+ &topBorderSz, &botBorderSz);
+  retval.op[0] =
+  oneSideCrop(cmdline.wantCrop[0], leftBorderSz, cmdline.margin);
+  retval.op[1] =
+  oneSideCrop(cmdline.wantCrop[1], rghtBorderSz, cmdline.margin);
+  retval.op[2] =
+  oneSideCrop(cmdline.wantCrop[2], topBorderSz, cmdline.margin);
+  retval.op[3] =
+  oneSideCrop(cmdline.wantCrop[3], botBorderSz, cmdline.margin);
+  crop = retval;
+  unsigned int i = 0;
+  for (i = 0; i < 4; ++i)
+  {
+if (crop.op[i].removeSize == 0)
+  pm_message();
+  }
+}
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 8c622514dbd..f1cfe7dfab0 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -6129,10 +6129,15 @@ math_opts_dom_walker::after_dom_children (basic_block 
bb)
 
   fma_d

[PATCH v1 1/2] Match: Support form 2 for scalar signed integer SAT_SUB

2024-09-26 Thread pan2 . li
From: Pan Li 

This patch would like to support the form 2 of the scalar signed
integer SAT_SUB.  Aka below example:

Form 2:
  #define DEF_SAT_S_SUB_FMT_2(T, UT, MIN, MAX) \
  T __attribute__((noinline))  \
  sat_s_sub_##T##_fmt_1 (T x, T y) \
  {\
T minus = (UT)x - (UT)y;   \
if ((x ^ y) >= 0 || (minus ^ x) >= 0)  \
  return minus;\
return x < 0 ? MIN : MAX;  \
  }

DEF_SAT_S_SUB_FMT_2(int8_t, uint8_t, INT8_MIN, INT8_MAX)

Before this patch:
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_sub_int8_t_fmt_2 (int8_t x, int8_t y)
   6   │ {
   7   │   int8_t minus;
   8   │   unsigned char x.0_1;
   9   │   unsigned char y.1_2;
  10   │   unsigned char _3;
  11   │   signed char _4;
  12   │   signed char _5;
  13   │   int8_t _6;
  14   │   _Bool _11;
  15   │   signed char _12;
  16   │   signed char _13;
  17   │   signed char _14;
  18   │   signed char _15;
  19   │
  20   │ ;;   basic block 2, loop depth 0
  21   │ ;;pred:   ENTRY
  22   │   x.0_1 = (unsigned char) x_7(D);
  23   │   y.1_2 = (unsigned char) y_8(D);
  24   │   _3 = x.0_1 - y.1_2;
  25   │   minus_9 = (int8_t) _3;
  26   │   _4 = x_7(D) ^ y_8(D);
  27   │   _5 = x_7(D) ^ minus_9;
  28   │   _15 = _4 & _5;
  29   │   if (_15 >= 0)
  30   │ goto ; [42.57%]
  31   │   else
  32   │ goto ; [57.43%]
  33   │ ;;succ:   4
  34   │ ;;3
  35   │
  36   │ ;;   basic block 3, loop depth 0
  37   │ ;;pred:   2
  38   │   _11 = x_7(D) < 0;
  39   │   _12 = (signed char) _11;
  40   │   _13 = -_12;
  41   │   _14 = _13 ^ 127;
  42   │ ;;succ:   4
  43   │
  44   │ ;;   basic block 4, loop depth 0
  45   │ ;;pred:   2
  46   │ ;;3
  47   │   # _6 = PHI 
  48   │   return _6;
  49   │ ;;succ:   EXIT
  50   │
  51   │ }

After this patch:
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_sub_int8_t_fmt_2 (int8_t x, int8_t y)
   6   │ {
   7   │   int8_t _6;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;pred:   ENTRY
  11   │   _6 = .SAT_SUB (x_7(D), y_8(D)); [tail call]
  12   │   return _6;
  13   │ ;;succ:   EXIT
  14   │
  15   │ }

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add case 2 matching pattern for signed SAT_SUB.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 63f7f3142c4..3baf209350b 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3372,6 +3372,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
@2)
  (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type
 
+/* Signed saturation sub, case 2:
+   T minus = (T)((UT)X - (UT)Y);
+   SAT_S_SUB = (X ^ Y) & (X ^ minus) < 0 ? (-(T)(X < 0) ^ MAX) : minus;
+
+   The T and UT are type pair like T=int8_t, UT=uint8_t.  */
+(match (signed_integer_sat_sub @0 @1)
+ (cond^ (ge (bit_and:c (bit_xor:c @0 @1)
+  (bit_xor @0 (nop_convert@2 (minus (nop_convert @0)
+(nop_convert @1)
+   integer_zerop)
+   @2
+   (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value))
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type
+
 /* Unsigned saturation truncate, case 1, sizeof (WT) > sizeof (NT).
SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))).  */
 (match (unsigned_integer_sat_trunc @0)
-- 
2.43.0



[PATCH v1 2/2] RISC-V: Add testcases for form 2 of scalar signed SAT_SUB

2024-09-26 Thread pan2 . li
From: Pan Li 

Form 2:
  #define DEF_SAT_S_SUB_FMT_2(T, UT, MIN, MAX) \
  T __attribute__((noinline))  \
  sat_s_sub_##T##_fmt_1 (T x, T y) \
  {\
T minus = (UT)x - (UT)y;   \
if ((x ^ y) >= 0 || (minus ^ x) >= 0)  \
  return minus;\
return x < 0 ? MIN : MAX;  \
  }

DEF_SAT_S_SUB_FMT_2(int8_t, uint8_t, INT8_MIN, INT8_MAX)

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_s_sub-2-i16.c: New test.
* gcc.target/riscv/sat_s_sub-2-i32.c: New test.
* gcc.target/riscv/sat_s_sub-2-i64.c: New test.
* gcc.target/riscv/sat_s_sub-2-i8.c: New test.
* gcc.target/riscv/sat_s_sub-run-2-i16.c: New test.
* gcc.target/riscv/sat_s_sub-run-2-i32.c: New test.
* gcc.target/riscv/sat_s_sub-run-2-i64.c: New test.
* gcc.target/riscv/sat_s_sub-run-2-i8.c: New test.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 15 ++
 .../gcc.target/riscv/sat_s_sub-2-i16.c| 30 +++
 .../gcc.target/riscv/sat_s_sub-2-i32.c| 28 +
 .../gcc.target/riscv/sat_s_sub-2-i64.c| 27 +
 .../gcc.target/riscv/sat_s_sub-2-i8.c | 28 +
 .../gcc.target/riscv/sat_s_sub-run-2-i16.c| 16 ++
 .../gcc.target/riscv/sat_s_sub-run-2-i32.c| 16 ++
 .../gcc.target/riscv/sat_s_sub-run-2-i64.c| 16 ++
 .../gcc.target/riscv/sat_s_sub-run-2-i8.c | 16 ++
 9 files changed, 192 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-2-i16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-2-i32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-2-i64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-2-i8.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-run-2-i16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-run-2-i32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-run-2-i64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-run-2-i8.c

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 587f3f8348c..66d393399a2 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -367,9 +367,24 @@ sat_s_sub_##T##_fmt_1 (T x, T y) \
 #define DEF_SAT_S_SUB_FMT_1_WRAP(T, UT, MIN, MAX) \
   DEF_SAT_S_SUB_FMT_1(T, UT, MIN, MAX)
 
+#define DEF_SAT_S_SUB_FMT_2(T, UT, MIN, MAX) \
+T __attribute__((noinline))  \
+sat_s_sub_##T##_fmt_2 (T x, T y) \
+{\
+  T minus = (UT)x - (UT)y;   \
+  if ((x ^ y) >= 0 || (minus ^ x) >= 0)  \
+return minus;\
+  return x < 0 ? MIN : MAX;  \
+}
+#define DEF_SAT_S_SUB_FMT_2_WRAP(T, UT, MIN, MAX) \
+  DEF_SAT_S_SUB_FMT_2(T, UT, MIN, MAX)
+
 #define RUN_SAT_S_SUB_FMT_1(T, x, y) sat_s_sub_##T##_fmt_1(x, y)
 #define RUN_SAT_S_SUB_FMT_1_WRAP(T, x, y) RUN_SAT_S_SUB_FMT_1(T, x, y)
 
+#define RUN_SAT_S_SUB_FMT_2(T, x, y) sat_s_sub_##T##_fmt_2(x, y)
+#define RUN_SAT_S_SUB_FMT_2_WRAP(T, x, y) RUN_SAT_S_SUB_FMT_2(T, x, y)
+
 
/**/
 /* Saturation Truncate (unsigned and signed)  
*/
 
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_sub-2-i16.c 
b/gcc/testsuite/gcc.target/riscv/sat_s_sub-2-i16.c
new file mode 100644
index 000..6aac2c71ba4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_sub-2-i16.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_s_sub_int16_t_fmt_2:
+** sub\s+[atx][0-9]+,\s*a0,\s*a1
+** xor\s+[atx][0-9]+,\s*a0,\s*a1
+** xor\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*15
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
+** srai\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63
+** li\s+[atx][0-9]+,\s*32768
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** xor\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** or\s+a0,\s*[atx][0-9]+,\

[PATCH v1] RISC-V: Cleanup debug code for SAT_* testcases [NFC]

2024-09-25 Thread pan2 . li
From: Pan Li 

Some print code for debugging is committed by mistake, remove them
from the test header file.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/scalar_sat_binary_run_xxx.h: Remove printf
code for debugging.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.target/riscv/scalar_sat_binary_run_xxx.h | 5 -
 1 file changed, 5 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/scalar_sat_binary_run_xxx.h 
b/gcc/testsuite/gcc.target/riscv/scalar_sat_binary_run_xxx.h
index 7578453e944..a7e0d988626 100644
--- a/gcc/testsuite/gcc.target/riscv/scalar_sat_binary_run_xxx.h
+++ b/gcc/testsuite/gcc.target/riscv/scalar_sat_binary_run_xxx.h
@@ -1,8 +1,6 @@
 #ifndef HAVE_DEFINED_SCALAR_SAT_BINARY_RUN_XXX
 #define HAVE_DEFINED_SCALAR_SAT_BINARY_RUN_XXX
 
-#include 
-
 int
 main ()
 {
@@ -14,10 +12,7 @@ main ()
   d = DATA[i];
 
   if (RUN_BINARY (d.a, d.b) != d.expect)
-  {
-   printf ("%d + %d = %d, but %d\n", d.a, d.b, d.expect, RUN_BINARY (d.a, 
d.b));
__builtin_abort ();
-  }
 }
 
   return 0;
-- 
2.43.0



[PATCH v1 2/3] RISC-V: Refine the testcase of vector SAT_SUB

2024-09-24 Thread pan2 . li
From: Pan Li 

Take scan-assembler-times for vsadd insn check instead of function body,
as we only care about if we can generate the fixed point insn vssub.

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-1.c: Remove
func body check and take scan asm times instead.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-10.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-11.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-12.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-13.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-14.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-17.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-18.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-19.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-20.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-21.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-23.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-24.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-25.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-26.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-27.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-28.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-29.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-30.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-31.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-32.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-33.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-34.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-35.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-36.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-37.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-38.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-39.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-40.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-8.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-9.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_zip.c: Ditto.

Signed-off-by: Pan Li 
---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-1.c  | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-10.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-11.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-12.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-13.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-14.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-15.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-16.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-17.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-18.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-19.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-2.c  | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-20.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-21.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-22.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-23.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-24.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-25.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-26.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-27.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_

[PATCH v1 3/3] RISC-V: Refine the testcase of vector SAT_TRUNC

2024-09-24 Thread pan2 . li
From: Pan Li 

Take scan-assembler-times for vsadd insn check instead of function body,
as we only care about if we can generate the fixed point insn vnclip.

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c: Remove
func body check and take scan asm times instead.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-11.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-12.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-14.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-17.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-18.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-20.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-21.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-23.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-24.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-8.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-9.c: Ditto.

Signed-off-by: Pan Li 
---
 .../rvv/autovec/unop/vec_sat_u_trunc-1.c  | 13 ++--
 .../rvv/autovec/unop/vec_sat_u_trunc-10.c | 13 ++--
 .../rvv/autovec/unop/vec_sat_u_trunc-11.c | 16 +-
 .../rvv/autovec/unop/vec_sat_u_trunc-12.c | 12 +--
 .../rvv/autovec/unop/vec_sat_u_trunc-13.c | 13 ++--
 .../rvv/autovec/unop/vec_sat_u_trunc-14.c | 17 ++-
 .../rvv/autovec/unop/vec_sat_u_trunc-15.c | 21 ++-
 .../rvv/autovec/unop/vec_sat_u_trunc-16.c | 13 ++--
 .../rvv/autovec/unop/vec_sat_u_trunc-17.c | 17 ++-
 .../rvv/autovec/unop/vec_sat_u_trunc-18.c | 13 ++--
 .../rvv/autovec/unop/vec_sat_u_trunc-19.c | 13 ++--
 .../rvv/autovec/unop/vec_sat_u_trunc-2.c  | 17 ++-
 .../rvv/autovec/unop/vec_sat_u_trunc-20.c | 17 ++-
 .../rvv/autovec/unop/vec_sat_u_trunc-21.c | 21 ++-
 .../rvv/autovec/unop/vec_sat_u_trunc-22.c | 13 ++--
 .../rvv/autovec/unop/vec_sat_u_trunc-23.c | 17 ++-
 .../rvv/autovec/unop/vec_sat_u_trunc-24.c | 13 ++--
 .../rvv/autovec/unop/vec_sat_u_trunc-3.c  | 21 ++-
 .../rvv/autovec/unop/vec_sat_u_trunc-4.c  | 13 ++--
 .../rvv/autovec/unop/vec_sat_u_trunc-5.c  | 17 ++-
 .../rvv/autovec/unop/vec_sat_u_trunc-6.c  | 13 ++--
 .../rvv/autovec/unop/vec_sat_u_trunc-7.c  | 13 ++--
 .../rvv/autovec/unop/vec_sat_u_trunc-8.c  | 17 ++-
 .../rvv/autovec/unop/vec_sat_u_trunc-9.c  | 21 ++-
 24 files changed, 46 insertions(+), 328 deletions(-)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c
index 186005733ec..3d29d26abff 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c
@@ -1,18 +1,9 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
-/* { dg-skip-if "" { *-*-* } { "-flto" } } */
-/* { dg-final { check-function-bodies "**" "" } } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
 
 #include "../vec_sat_arith.h"
 
-/*
-** vec_sat_u_trunc_uint8_t_uint16_t_fmt_1:
-** ...
-** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*mf2,\s*ta,\s*ma
-** ...
-** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0
-** ...
-*/
 DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint16_t)
 
 /* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 4 "expand" } } */
+/* { dg-final { scan-assembler-times {vnclipu\.wi} 1 } } */
diff --git 
a/gcc/te

[PATCH v1 1/3] RISC-V: Refine the testcase of vector SAT_ADD

2024-09-24 Thread pan2 . li
From: Pan Li 

Take scan-assembler-times for vsadd insn check instead of function body,
as we only care about if we can generate the fixed point insn vsadd.

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-1.c: Remove
func body check and take scan asm times instead.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-12.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-9.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-8.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-9.c: Ditto.

Signed-off-by: Pan Li 
---
 .../riscv/rvv/autovec/binop/vec_sat_s_add-1.c   | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_s_add-2.c   | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_s_add-3.c   | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_s_add-4.c   | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_add-1.c   | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_add-10.c  |  5 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_add-11.c  | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_add-12.c  | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_add-13.c  | 12 +---
 .../riscv/rvv/autovec/binop/vec_sat_u_add-14.c  | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_add-15.c

[PATCH v1 3/3] RISC-V: Add testcases for form 1 of scalar signed SAT_SUB

2024-09-24 Thread pan2 . li
From: Pan Li 

Form 1:
  #define DEF_SAT_S_SUB_FMT_1(T, UT, MIN, MAX) \
  T __attribute__((noinline))  \
  sat_s_sub_##T##_fmt_1 (T x, T y) \
  {\
T minus = (UT)x - (UT)y;   \
return (x ^ y) >= 0\
  ? minus  \
  : (minus ^ x) >= 0   \
? minus\
: x < 0 ? MIN : MAX;   \
  }

DEF_SAT_S_SUB_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX)

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_arith_data.h: Add test data for SAT_SUB.
* gcc.target/riscv/sat_s_sub-1-i16.c: New test.
* gcc.target/riscv/sat_s_sub-1-i32.c: New test.
* gcc.target/riscv/sat_s_sub-1-i64.c: New test.
* gcc.target/riscv/sat_s_sub-1-i8.c: New test.
* gcc.target/riscv/sat_s_sub-run-1-i16.c: New test.
* gcc.target/riscv/sat_s_sub-run-1-i32.c: New test.
* gcc.target/riscv/sat_s_sub-run-1-i64.c: New test.
* gcc.target/riscv/sat_s_sub-run-1-i8.c: New test.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 17 +
 .../gcc.target/riscv/sat_arith_data.h | 73 +++
 .../gcc.target/riscv/sat_s_sub-1-i16.c| 30 
 .../gcc.target/riscv/sat_s_sub-1-i32.c| 28 +++
 .../gcc.target/riscv/sat_s_sub-1-i64.c| 27 +++
 .../gcc.target/riscv/sat_s_sub-1-i8.c | 28 +++
 .../gcc.target/riscv/sat_s_sub-run-1-i16.c| 16 
 .../gcc.target/riscv/sat_s_sub-run-1-i32.c| 16 
 .../gcc.target/riscv/sat_s_sub-run-1-i64.c| 16 
 .../gcc.target/riscv/sat_s_sub-run-1-i8.c | 16 
 10 files changed, 267 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-1-i16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-1-i32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-1-i64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-1-i8.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-run-1-i16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-run-1-i32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-run-1-i64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-run-1-i8.c

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index a2617b6db70..587f3f8348c 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -353,6 +353,23 @@ sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_4 (T x)\
   return x > IMM ? x - IMM : 0;   \
 }
 
+#define DEF_SAT_S_SUB_FMT_1(T, UT, MIN, MAX) \
+T __attribute__((noinline))  \
+sat_s_sub_##T##_fmt_1 (T x, T y) \
+{\
+  T minus = (UT)x - (UT)y;   \
+  return (x ^ y) >= 0\
+? minus  \
+: (minus ^ x) >= 0   \
+  ? minus\
+  : x < 0 ? MIN : MAX;   \
+}
+#define DEF_SAT_S_SUB_FMT_1_WRAP(T, UT, MIN, MAX) \
+  DEF_SAT_S_SUB_FMT_1(T, UT, MIN, MAX)
+
+#define RUN_SAT_S_SUB_FMT_1(T, x, y) sat_s_sub_##T##_fmt_1(x, y)
+#define RUN_SAT_S_SUB_FMT_1_WRAP(T, x, y) RUN_SAT_S_SUB_FMT_1(T, x, y)
+
 
/**/
 /* Saturation Truncate (unsigned and signed)  
*/
 
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
index 75037c5d806..39a1e17cd3d 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
@@ -37,6 +37,11 @@ TEST_BINARY_STRUCT (int16_t, ssadd)
 TEST_BINARY_STRUCT (int32_t, ssadd)
 TEST_BINARY_STRUCT (int64_t, ssadd)
 
+TEST_BINARY_STRUCT (int8_t,  sssub)
+TEST_BINARY_STRUCT (int16_t, sssub)
+TEST_BINARY_STRUCT (int32_t, sssub)
+TEST_BINARY_STRUCT (int64_t, sssub)
+
 TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \
   TEST_UNARY_DATA(uint8_t, uint16_t)[] =
 {
@@ -189,4 +194,72 @@ TEST_BINARY_STRUCT_DECL(int64_t, ssadd) 
TEST_BINARY_DATA(int64_t, ssadd)[] =
   { -9223372036854775803ll,   9223372036854775805ll,   2},
 };
 
+TEST_BINARY_STRUCT_DECL(int8_t, sssub) TEST_BINARY_DATA(int8_t, sssub)[] =
+{
+  {   0,0,0},
+  {   2,4,   -2},
+  { 126,   -1,  127},
+  { 127,   -1,  127},
+  { 127, -127,  127},
+  {  -7,   -4,   -3},
+  

[PATCH v1 2/3] RISC-V: Implement scalar SAT_SUB for signed integer

2024-09-24 Thread pan2 . li
From: Pan Li 

This patch would like to implement the sssub form 1.  Aka:

Form 1:
  #define DEF_SAT_S_SUB_FMT_1(T, UT, MIN, MAX) \
  T __attribute__((noinline))  \
  sat_s_sub_##T##_fmt_1 (T x, T y) \
  {\
T minus = (UT)x - (UT)y;   \
return (x ^ y) >= 0\
  ? minus  \
  : (minus ^ x) >= 0   \
? minus\
: x < 0 ? MIN : MAX;   \
  }

DEF_SAT_S_SUB_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX)

Before this patch:
  10   │ sat_s_sub_int8_t_fmt_1:
  11   │ subwa5,a0,a1
  12   │ slliw   a5,a5,24
  13   │ sraiw   a5,a5,24
  14   │ xor a1,a0,a1
  15   │ xor a4,a0,a5
  16   │ and a1,a1,a4
  17   │ blt a1,zero,.L4
  18   │ mv  a0,a5
  19   │ ret
  20   │ .L4:
  21   │ sraia0,a0,63
  22   │ xoria5,a0,127
  23   │ mv  a0,a5
  24   │ ret

After this patch:
  10   │ sat_s_sub_int8_t_fmt_1:
  11   │ sub a4,a0,a1
  12   │ xor a5,a0,a4
  13   │ xor a1,a0,a1
  14   │ and a5,a5,a1
  15   │ srlia5,a5,7
  16   │ andia5,a5,1
  17   │ sraia0,a0,63
  18   │ xoria3,a0,127
  19   │ neg a0,a5
  20   │ addia5,a5,-1
  21   │ and a3,a3,a0
  22   │ and a0,a4,a5
  23   │ or  a0,a0,a3
  24   │ slliw   a0,a0,24
  25   │ sraiw   a0,a0,24
  26   │ ret

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (riscv_expand_sssub): Add new func
decl for expanding signed SAT_SUB.
* config/riscv/riscv.cc (riscv_expand_sssub): Add new func impl
for expanding signed SAT_SUB.
* config/riscv/riscv.md (sssub3): Add new pattern sssub
for scalar signed integer.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-protos.h |  1 +
 gcc/config/riscv/riscv.cc   | 69 +
 gcc/config/riscv/riscv.md   | 11 ++
 3 files changed, 81 insertions(+)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 07a4d42e3a5..3d8775e582d 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -136,6 +136,7 @@ extern void riscv_legitimize_poly_move (machine_mode, rtx, 
rtx, rtx);
 extern void riscv_expand_usadd (rtx, rtx, rtx);
 extern void riscv_expand_ssadd (rtx, rtx, rtx);
 extern void riscv_expand_ussub (rtx, rtx, rtx);
+extern void riscv_expand_sssub (rtx, rtx, rtx);
 extern void riscv_expand_ustrunc (rtx, rtx);
 
 #ifdef RTX_CODE
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 7be3939a7f9..8708a7b42c6 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -12329,6 +12329,75 @@ riscv_expand_ussub (rtx dest, rtx x, rtx y)
   emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
 }
 
+/* Implements the signed saturation sub standard name ssadd for int mode.
+
+   z = SAT_SUB(x, y).
+   =>
+   1.  minus = x - y
+   2.  xor_0 = x ^ y
+   3.  xor_1 = x ^ minus
+   4.  lt_0 = xor_1 < 0
+   5.  lt_1 = xor_0 < 0
+   6.  and = lt_0 & lt_1
+   7.  lt = x < 0
+   8.  neg = -lt
+   9.  max = INT_MAX
+   10. max = max ^ neg
+   11. neg = -and
+   12. max = max & neg
+   13. and = and - 1
+   14. z = minus & and
+   15. z = z | max  */
+
+void
+riscv_expand_sssub (rtx dest, rtx x, rtx y)
+{
+  machine_mode mode = GET_MODE (dest);
+  unsigned bitsize = GET_MODE_BITSIZE (mode).to_constant ();
+  rtx shift_bits = GEN_INT (bitsize - 1);
+  rtx xmode_x = gen_lowpart (Xmode, x);
+  rtx xmode_y = gen_lowpart (Xmode, y);
+  rtx xmode_minus = gen_reg_rtx (Xmode);
+  rtx xmode_xor_0 = gen_reg_rtx (Xmode);
+  rtx xmode_xor_1 = gen_reg_rtx (Xmode);
+  rtx xmode_lt_0 = gen_reg_rtx (Xmode);
+  rtx xmode_lt_1 = gen_reg_rtx (Xmode);
+  rtx xmode_and = gen_reg_rtx (Xmode);
+  rtx xmode_lt = gen_reg_rtx (Xmode);
+  rtx xmode_neg = gen_reg_rtx (Xmode);
+  rtx xmode_max = gen_reg_rtx (Xmode);
+  rtx xmode_dest = gen_reg_rtx (Xmode);
+
+  /* Step-1: mins = x - y, xor_0 = x ^ y, xor_1 = x ^ minus.  */
+  riscv_emit_binary (MINUS, xmode_minus, xmode_x, xmode_y);
+  riscv_emit_binary (XOR, xmode_xor_0, xmode_x, xmode_y);
+  riscv_emit_binary (XOR, xmode_xor_1, xmode_x, xmode_minus);
+
+  /* Step-2: and = xor_0 < 0 & xor_1 < 0.  */
+  riscv_emit_binary (LSHIFTRT, xmode_lt_0, xmode_xor_0, shift_bits);
+  riscv_emit_binary (LSHIFTRT, xmode_lt_1, xmode_xor_1, shift_bits);
+  riscv_emit_binary (AND, xmode_and, xmode_lt_0, xmode_lt_1);
+  riscv_emit_binary (AND, xmode_and, xmode_and, CONST1_RTX (Xmode));
+
+  /* Step-3: lt = x < 0, neg = -lt.  */
+  riscv_emit_binary (LT, xmode_lt, xmode_x, CONST0_RTX (Xmode));
+  riscv_emit_unary (NEG, xmode_neg, xmode_lt);
+
+  /* Step-4: max = 0x7f..., max = max ^ neg, neg = -and, max = max & neg.  */
+  riscv_emit_move (xmode_max

[PATCH v1 1/3] Match: Support form 1 for scalar signed integer SAT_SUB

2024-09-24 Thread pan2 . li
From: Pan Li 

This patch would like to support the form 1 of the scalar signed
integer SAT_SUB.  Aka below example:

Form 1:
  #define DEF_SAT_S_SUB_FMT_1(T, UT, MIN, MAX) \
  T __attribute__((noinline))  \
  sat_s_sub_##T##_fmt_1 (T x, T y) \
  {\
T minus = (UT)x - (UT)y;   \
return (x ^ y) >= 0\
  ? minus  \
  : (minus ^ x) >= 0   \
? minus\
: x < 0 ? MIN : MAX;   \
  }

DEF_SAT_S_SUB_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX)

Before this patch:
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_sub_int8_t_fmt_1 (int8_t x, int8_t y)
   6   │ {
   7   │   int8_t minus;
   8   │   unsigned char x.0_1;
   9   │   unsigned char y.1_2;
  10   │   unsigned char _3;
  11   │   signed char _4;
  12   │   signed char _5;
  13   │   int8_t _6;
  14   │   _Bool _11;
  15   │   signed char _12;
  16   │   signed char _13;
  17   │   signed char _14;
  18   │   signed char _15;
  19   │
  20   │ ;;   basic block 2, loop depth 0
  21   │ ;;pred:   ENTRY
  22   │   x.0_1 = (unsigned char) x_7(D);
  23   │   y.1_2 = (unsigned char) y_8(D);
  24   │   _3 = x.0_1 - y.1_2;
  25   │   minus_9 = (int8_t) _3;
  26   │   _4 = x_7(D) ^ y_8(D);
  27   │   _5 = x_7(D) ^ minus_9;
  28   │   _15 = _4 & _5;
  29   │   if (_15 < 0)
  30   │ goto ; [41.00%]
  31   │   else
  32   │ goto ; [59.00%]
  33   │ ;;succ:   3
  34   │ ;;4
  35   │
  36   │ ;;   basic block 3, loop depth 0
  37   │ ;;pred:   2
  38   │   _11 = x_7(D) < 0;
  39   │   _12 = (signed char) _11;
  40   │   _13 = -_12;
  41   │   _14 = _13 ^ 127;
  42   │ ;;succ:   4
  43   │
  44   │ ;;   basic block 4, loop depth 0
  45   │ ;;pred:   2
  46   │ ;;3
  47   │   # _6 = PHI 
  48   │   return _6;
  49   │ ;;succ:   EXIT
  50   │
  51   │ }

After this patch:
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_sub_int8_t_fmt_1 (int8_t x, int8_t y)
   6   │ {
   7   │   int8_t _6;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;pred:   ENTRY
  11   │   _6 = .SAT_SUB (x_7(D), y_8(D)); [tail call]
  12   │   return _6;
  13   │ ;;succ:   EXIT
  14   │
  15   │ }

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add case 1 matching pattern for signed SAT_SUB.
* tree-ssa-math-opts.cc (gimple_signed_integer_sat_sub): Add new
decl for generated SAT_SUB matching func.
(match_unsigned_saturation_sub): Rename from...
(match_saturation_sub): ...Rename to and add signed SAT_SUB matching.
(math_opts_dom_walker::after_dom_children): Leverage the named
match func for both the unsigned and signed SAT_SUB.

Signed-off-by: Pan Li 
---
 gcc/match.pd  | 14 ++
 gcc/tree-ssa-math-opts.cc |  8 +---
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 940292d0d49..63f7f3142c4 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3358,6 +3358,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   }
   (if (wi::eq_p (sum, wi::uhwi (0, precision)))
 
+/* Signed saturation sub, case 1:
+   T minus = (T)((UT)X - (UT)Y);
+   SAT_S_SUB = (X ^ Y) & (X ^ minus) < 0 ? (-(T)(X < 0) ^ MAX) : minus;
+
+   The T and UT are type pair like T=int8_t, UT=uint8_t.  */
+(match (signed_integer_sat_sub @0 @1)
+ (cond^ (lt (bit_and:c (bit_xor:c @0 @1)
+  (bit_xor @0 (nop_convert@2 (minus (nop_convert @0)
+(nop_convert @1)
+   integer_zerop)
+   (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
+   @2)
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type
+
 /* Unsigned saturation truncate, case 1, sizeof (WT) > sizeof (NT).
SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))).  */
 (match (unsigned_integer_sat_trunc @0)
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index d61668aacfc..f04b17101db 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4024,6 +4024,7 @@ extern bool gimple_unsigned_integer_sat_sub (tree, tree*, 
tree (*)(tree));
 extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
 
 extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
+extern bool gimple_signed_integer_sat_sub (tree, tree*, tree (*)(tree));
 
 static void
 build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
@@ -4162,7 +4163,7 @@ match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, 
gassign *stmt)
  *   [local count: 1073741824]:
  *  _1 = .SAT_SUB (x_2(D), y_3(D));  */
 static void
-match_unsigned_sat

[PATCH v3] Widening-Mul: Fix one ICE for SAT_SUB matching operand checking

2024-09-24 Thread pan2 . li
From: Pan Li 

This patch would like to fix the following ICE for -O2 -m32 of x86_64.

during RTL pass: expand
JackMidiAsyncWaitQueue.cpp.cpp: In function 'void DequeueEvent(unsigned
int)':
JackMidiAsyncWaitQueue.cpp.cpp:3:6: internal compiler error: in
expand_fn_using_insn, at internal-fn.cc:263
3 | void DequeueEvent(unsigned frame) {
  |  ^~~~
0x27b580d diagnostic_context::diagnostic_impl(rich_location*,
diagnostic_metadata const*, diagnostic_option_id, char const*,
__va_list_tag (*) [1], diagnostic_t)
???:0
0x27c4a3f internal_error(char const*, ...)
???:0
0x27b3994 fancy_abort(char const*, int, char const*)
???:0
0xf25ae5 expand_fn_using_insn(gcall*, insn_code, unsigned int, unsigned int)
???:0
0xf2a124 expand_direct_optab_fn(internal_fn, gcall*, optab_tag, unsigned int)
???:0
0xf2c87c expand_SAT_SUB(internal_fn, gcall*)
???:0

We allowed the operand convert when matching SAT_SUB in match.pd, to support
the zip benchmark SAT_SUB pattern.  Aka,

(convert? (minus (convert1? @0) (convert1? @1))) for below sample code.

void test (uint16_t *x, unsigned b, unsigned n)
{
  unsigned a = 0;
  register uint16_t *p = x;

  do {
a = *--p;
*p = (uint16_t)(a >= b ? a - b : 0); // Truncate after .SAT_SUB
  } while (--n);
}

The pattern match for SAT_SUB itself may also act on below scalar sample
code too.

unsigned long long GetTimeFromFrames(int);
unsigned long long GetMicroSeconds();

void DequeueEvent(unsigned frame) {
  long long frame_time = GetTimeFromFrames(frame);
  unsigned long long current_time = GetMicroSeconds();
  DequeueEvent(frame_time < current_time ? 0 : frame_time - current_time);
}

Aka:

uint32_t a = (uint32_t)SAT_SUB(uint64_t, uint64_t);

Then there will be a problem when ia32 or -m32 is given when compiling.
Because we only check the lhs (aka uint32_t) type is supported by ifn
instead of the operand (aka uint64_t).  Mostly DImode is disabled for
32 bits target like ia32 or rv32gcv, and then trigger ICE when expanding.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

PR middle-end/116814

gcc/ChangeLog:

* tree-ssa-math-opts.cc (build_saturation_binary_arith_call): Make
ifn is_supported type check based on operand instead of lhs.

gcc/testsuite/ChangeLog:

* g++.dg/torture/pr116814-1.C: New test.

Signed-off-by: Pan Li 
---
 gcc/testsuite/g++.dg/torture/pr116814-1.C | 12 
 gcc/tree-ssa-math-opts.cc |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/torture/pr116814-1.C

diff --git a/gcc/testsuite/g++.dg/torture/pr116814-1.C 
b/gcc/testsuite/g++.dg/torture/pr116814-1.C
new file mode 100644
index 000..dd6f29daa7c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr116814-1.C
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ia32 } } } */
+/* { dg-options "-O2" } */
+
+unsigned long long GetTimeFromFrames(int);
+unsigned long long GetMicroSeconds();
+
+void DequeueEvent(unsigned frame) {
+  long long frame_time = GetTimeFromFrames(frame);
+  unsigned long long current_time = GetMicroSeconds();
+
+  DequeueEvent(frame_time < current_time ? 0 : frame_time - current_time);
+}
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index d61668aacfc..8c622514dbd 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4042,7 +4042,7 @@ build_saturation_binary_arith_call (gimple_stmt_iterator 
*gsi, gphi *phi,
internal_fn fn, tree lhs, tree op_0,
tree op_1)
 {
-  if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH))
+  if (direct_internal_fn_supported_p (fn, TREE_TYPE (op_0), OPTIMIZE_FOR_BOTH))
 {
   gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
   gimple_call_set_lhs (call, lhs);
-- 
2.43.0



[PATCH v2] Widening-Mul: Fix one ICE for SAT_SUB matching operand checking

2024-09-24 Thread pan2 . li
From: Pan Li 

This patch would like to fix the following ICE for -O2 -m32 of x86_64.

during RTL pass: expand
JackMidiAsyncWaitQueue.cpp.cpp: In function 'void DequeueEvent(unsigned
int)':
JackMidiAsyncWaitQueue.cpp.cpp:3:6: internal compiler error: in
expand_fn_using_insn, at internal-fn.cc:263
3 | void DequeueEvent(unsigned frame) {
  |  ^~~~
0x27b580d diagnostic_context::diagnostic_impl(rich_location*,
diagnostic_metadata const*, diagnostic_option_id, char const*,
__va_list_tag (*) [1], diagnostic_t)
???:0
0x27c4a3f internal_error(char const*, ...)
???:0
0x27b3994 fancy_abort(char const*, int, char const*)
???:0
0xf25ae5 expand_fn_using_insn(gcall*, insn_code, unsigned int, unsigned int)
???:0
0xf2a124 expand_direct_optab_fn(internal_fn, gcall*, optab_tag, unsigned int)
???:0
0xf2c87c expand_SAT_SUB(internal_fn, gcall*)
???:0

We allowed the operand convert when matching SAT_SUB in match.pd, to support
the zip benchmark SAT_SUB pattern.  Aka,

(convert? (minus (convert1? @0) (convert1? @1))) for below sample code.

void test (uint16_t *x, unsigned b, unsigned n)
{
  unsigned a = 0;
  register uint16_t *p = x;

  do {
a = *--p;
*p = (uint16_t)(a >= b ? a - b : 0); // Truncate after .SAT_SUB
  } while (--n);
}

The pattern match for SAT_SUB itself may also act on below scalar sample
code too.

unsigned long long GetTimeFromFrames(int);
unsigned long long GetMicroSeconds();

void DequeueEvent(unsigned frame) {
  long long frame_time = GetTimeFromFrames(frame);
  unsigned long long current_time = GetMicroSeconds();
  DequeueEvent(frame_time < current_time ? 0 : frame_time - current_time);
}

Aka:

uint32_t a = (uint32_t)SAT_SUB(uint64_t, uint64_t);

Then there will be a problem when ia32 or -m32 is given when compiling.
Because we only check the lhs (aka uint32_t) type is supported by ifn
and missed the operand (aka uint64_t).  Mostly DImode is disabled for
32 bits target like ia32 or rv32gcv, and then trigger ICE when expanding.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

PR middle-end/116814

gcc/ChangeLog:

* tree-ssa-math-opts.cc (build_saturation_binary_arith_call): Add
ifn is_supported check for operand TREE type.

gcc/testsuite/ChangeLog:

* g++.dg/torture/pr116814-1.C: New test.

Signed-off-by: Pan Li 
---
 gcc/testsuite/g++.dg/torture/pr116814-1.C | 12 
 gcc/tree-ssa-math-opts.cc | 23 +++
 2 files changed, 27 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/torture/pr116814-1.C

diff --git a/gcc/testsuite/g++.dg/torture/pr116814-1.C 
b/gcc/testsuite/g++.dg/torture/pr116814-1.C
new file mode 100644
index 000..dd6f29daa7c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr116814-1.C
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ia32 } } } */
+/* { dg-options "-O2" } */
+
+unsigned long long GetTimeFromFrames(int);
+unsigned long long GetMicroSeconds();
+
+void DequeueEvent(unsigned frame) {
+  long long frame_time = GetTimeFromFrames(frame);
+  unsigned long long current_time = GetMicroSeconds();
+
+  DequeueEvent(frame_time < current_time ? 0 : frame_time - current_time);
+}
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index d61668aacfc..361761cedef 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4042,15 +4042,22 @@ build_saturation_binary_arith_call 
(gimple_stmt_iterator *gsi, gphi *phi,
internal_fn fn, tree lhs, tree op_0,
tree op_1)
 {
-  if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH))
-{
-  gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
-  gimple_call_set_lhs (call, lhs);
-  gsi_insert_before (gsi, call, GSI_SAME_STMT);
+  tree lhs_type = TREE_TYPE (lhs);
+  tree op_type = TREE_TYPE (op_0);
 
-  gimple_stmt_iterator psi = gsi_for_stmt (phi);
-  remove_phi_node (&psi, /* release_lhs_p */ false);
-}
+  if (!direct_internal_fn_supported_p (fn, lhs_type, OPTIMIZE_FOR_BOTH))
+return;
+
+  if (lhs_type != op_type
+  && !direct_internal_fn_supported_p (fn, op_type, OPTIMIZE_FOR_BOTH))
+return;
+
+  gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
+  gimple_call_set_lhs (call, lhs);
+  gsi_insert_before (gsi, call, GSI_SAME_STMT);
+
+  gimple_stmt_iterator psi = gsi_for_stmt (phi);
+  remove_phi_node (&psi, /* release_lhs_p */ false);
 }
 
 /*
-- 
2.43.0



[PATCH v1] RISC-V: Fix incorrect test macro for signed scalar SAT_ADD form 2 run test

2024-09-23 Thread pan2 . li
From: Pan Li 

This patch would like to fix one incorrect test macro usage for
form 2 of signed scalar SAT_ADD run test.  It should leverage the
_FMT_2 instead of _FMT_1 for form 2.

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macro.
* gcc.target/riscv/sat_s_add-run-5.c: Take form 2 for run test.
* gcc.target/riscv/sat_s_add-run-6.c: Ditto.
* gcc.target/riscv/sat_s_add-run-7.c: Ditto.
* gcc.target/riscv/sat_s_add-run-8.c: Ditto.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h   | 2 ++
 gcc/testsuite/gcc.target/riscv/sat_s_add-run-5.c | 4 ++--
 gcc/testsuite/gcc.target/riscv/sat_s_add-run-6.c | 4 ++--
 gcc/testsuite/gcc.target/riscv/sat_s_add-run-7.c | 4 ++--
 gcc/testsuite/gcc.target/riscv/sat_s_add-run-8.c | 4 ++--
 5 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index a2617b6db70..77b5ef1807b 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -141,6 +141,8 @@ sat_s_add_##T##_fmt_2 (T x, T y) \
 return sum;  \
   return x < 0 ? MIN : MAX;  \
 }
+#define DEF_SAT_S_ADD_FMT_2_WRAP(T, UT, MIN, MAX) \
+  DEF_SAT_S_ADD_FMT_2(T, UT, MIN, MAX)
 
 #define DEF_SAT_S_ADD_FMT_3(T, UT, MIN, MAX)   \
 T __attribute__((noinline))\
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-run-5.c 
b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-5.c
index 9a4ce338d0c..d57e0a0d195 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_s_add-run-5.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-5.c
@@ -7,10 +7,10 @@
 #define T1 int8_t
 #define T2 uint8_t
 
-DEF_SAT_S_ADD_FMT_1_WRAP(T1, T2, INT8_MIN, INT8_MAX)
+DEF_SAT_S_ADD_FMT_2_WRAP(T1, T2, INT8_MIN, INT8_MAX)
 
 #define DATA TEST_BINARY_DATA_WRAP(T1, ssadd)
 #define TTEST_BINARY_STRUCT_DECL(T1, ssadd)
-#define RUN_BINARY(x, y) RUN_SAT_S_ADD_FMT_1_WRAP(T1, x, y)
+#define RUN_BINARY(x, y) RUN_SAT_S_ADD_FMT_2_WRAP(T1, x, y)
 
 #include "scalar_sat_binary_run_xxx.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-run-6.c 
b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-6.c
index 34459b85e2b..cdac5bdb883 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_s_add-run-6.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-6.c
@@ -7,10 +7,10 @@
 #define T1 int16_t
 #define T2 uint16_t
 
-DEF_SAT_S_ADD_FMT_1_WRAP(T1, T2, INT16_MIN, INT16_MAX)
+DEF_SAT_S_ADD_FMT_2_WRAP(T1, T2, INT16_MIN, INT16_MAX)
 
 #define DATA TEST_BINARY_DATA_WRAP(T1, ssadd)
 #define TTEST_BINARY_STRUCT_DECL(T1, ssadd)
-#define RUN_BINARY(x, y) RUN_SAT_S_ADD_FMT_1_WRAP(T1, x, y)
+#define RUN_BINARY(x, y) RUN_SAT_S_ADD_FMT_2_WRAP(T1, x, y)
 
 #include "scalar_sat_binary_run_xxx.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-run-7.c 
b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-7.c
index 4d4841f4066..4ac952e27fa 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_s_add-run-7.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-7.c
@@ -7,10 +7,10 @@
 #define T1 int32_t
 #define T2 uint32_t
 
-DEF_SAT_S_ADD_FMT_1_WRAP(T1, T2, INT32_MIN, INT32_MAX)
+DEF_SAT_S_ADD_FMT_2_WRAP(T1, T2, INT32_MIN, INT32_MAX)
 
 #define DATA TEST_BINARY_DATA_WRAP(T1, ssadd)
 #define TTEST_BINARY_STRUCT_DECL(T1, ssadd)
-#define RUN_BINARY(x, y) RUN_SAT_S_ADD_FMT_1_WRAP(T1, x, y)
+#define RUN_BINARY(x, y) RUN_SAT_S_ADD_FMT_2_WRAP(T1, x, y)
 
 #include "scalar_sat_binary_run_xxx.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-run-8.c 
b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-8.c
index df818879628..4d25e7f171d 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_s_add-run-8.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-8.c
@@ -7,10 +7,10 @@
 #define T1 int64_t
 #define T2 uint64_t
 
-DEF_SAT_S_ADD_FMT_1_WRAP(T1, T2, INT64_MIN, INT64_MAX)
+DEF_SAT_S_ADD_FMT_2_WRAP(T1, T2, INT64_MIN, INT64_MAX)
 
 #define DATA TEST_BINARY_DATA_WRAP(T1, ssadd)
 #define TTEST_BINARY_STRUCT_DECL(T1, ssadd)
-#define RUN_BINARY(x, y) RUN_SAT_S_ADD_FMT_1_WRAP(T1, x, y)
+#define RUN_BINARY(x, y) RUN_SAT_S_ADD_FMT_2_WRAP(T1, x, y)
 
 #include "scalar_sat_binary_run_xxx.h"
-- 
2.43.0



[PATCH v1] Widening-Mul: Fix one ICE for SAT_SUB matching operand promotion

2024-09-23 Thread pan2 . li
From: Pan Li 

This patch would like to fix the following ICE for -O2 -m32 of x86_64.

during RTL pass: expand
JackMidiAsyncWaitQueue.cpp.cpp: In function 'void DequeueEvent(unsigned
int)':
JackMidiAsyncWaitQueue.cpp.cpp:3:6: internal compiler error: in
expand_fn_using_insn, at internal-fn.cc:263
3 | void DequeueEvent(unsigned frame) {
  |  ^~~~
0x27b580d diagnostic_context::diagnostic_impl(rich_location*,
diagnostic_metadata const*, diagnostic_option_id, char const*,
__va_list_tag (*) [1], diagnostic_t)
???:0
0x27c4a3f internal_error(char const*, ...)
???:0
0x27b3994 fancy_abort(char const*, int, char const*)
???:0
0xf25ae5 expand_fn_using_insn(gcall*, insn_code, unsigned int, unsigned int)
???:0
0xf2a124 expand_direct_optab_fn(internal_fn, gcall*, optab_tag, unsigned int)
???:0
0xf2c87c expand_SAT_SUB(internal_fn, gcall*)
???:0

We allowed the operand convert when matching SAT_SUB in match.pd, to support
the zip benchmark SAT_SUB pattern.  Aka,

(convert? (minus (convert1? @0) (convert1? @1))) for below sample code.

void test (uint16_t *x, unsigned b, unsigned n)
{
  unsigned a = 0;
  register uint16_t *p = x;

  do {
a = *--p;
*p = (uint16_t)(a >= b ? a - b : 0); // Truncate after .SAT_SUB
  } while (--n);
}

The pattern match for SAT_SUB itself may also act on below scalar sample
code too.

unsigned long long GetTimeFromFrames(int);
unsigned long long GetMicroSeconds();

void DequeueEvent(unsigned frame) {
  long long frame_time = GetTimeFromFrames(frame);
  unsigned long long current_time = GetMicroSeconds();
  DequeueEvent(frame_time < current_time ? 0 : frame_time - current_time);
}

Aka:

uint32_t a = (uint32_t)SAT_SUB(uint64_t, uint64_t);

Then there will be a problem when ia32 or -m32 is given when compiling.
Because we only check the lhs (aka uint32_t) type is supported by ifn
and missed the operand (aka uint64_t).  Mostly DImode is disabled for
32 bits target like ia32 or rv32gcv, and then trigger ICE when expanding.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

PR target/116814

gcc/ChangeLog:

* tree-ssa-math-opts.cc (build_saturation_binary_arith_call): Add
ifn is_supported check for operand TREE type.

gcc/testsuite/ChangeLog:

* g++.dg/torture/pr116814-1.C: New test.

Signed-off-by: Pan Li 
---
 gcc/testsuite/g++.dg/torture/pr116814-1.C | 12 
 gcc/tree-ssa-math-opts.cc | 23 +++
 2 files changed, 27 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/torture/pr116814-1.C

diff --git a/gcc/testsuite/g++.dg/torture/pr116814-1.C 
b/gcc/testsuite/g++.dg/torture/pr116814-1.C
new file mode 100644
index 000..8db5b020cfd
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr116814-1.C
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-O2 -m32" } */
+
+unsigned long long GetTimeFromFrames(int);
+unsigned long long GetMicroSeconds();
+
+void DequeueEvent(unsigned frame) {
+  long long frame_time = GetTimeFromFrames(frame);
+  unsigned long long current_time = GetMicroSeconds();
+
+  DequeueEvent(frame_time < current_time ? 0 : frame_time - current_time);
+}
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index d61668aacfc..361761cedef 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4042,15 +4042,22 @@ build_saturation_binary_arith_call 
(gimple_stmt_iterator *gsi, gphi *phi,
internal_fn fn, tree lhs, tree op_0,
tree op_1)
 {
-  if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH))
-{
-  gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
-  gimple_call_set_lhs (call, lhs);
-  gsi_insert_before (gsi, call, GSI_SAME_STMT);
+  tree lhs_type = TREE_TYPE (lhs);
+  tree op_type = TREE_TYPE (op_0);
 
-  gimple_stmt_iterator psi = gsi_for_stmt (phi);
-  remove_phi_node (&psi, /* release_lhs_p */ false);
-}
+  if (!direct_internal_fn_supported_p (fn, lhs_type, OPTIMIZE_FOR_BOTH))
+return;
+
+  if (lhs_type != op_type
+  && !direct_internal_fn_supported_p (fn, op_type, OPTIMIZE_FOR_BOTH))
+return;
+
+  gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
+  gimple_call_set_lhs (call, lhs);
+  gsi_insert_before (gsi, call, GSI_SAME_STMT);
+
+  gimple_stmt_iterator psi = gsi_for_stmt (phi);
+  remove_phi_node (&psi, /* release_lhs_p */ false);
 }
 
 /*
-- 
2.43.0



[PATCH v1] RISC-V: RISC-V: Add testcases for form 4 of signed vector SAT_ADD

2024-09-22 Thread pan2 . li
From: Pan Li 

Form 4:
  #define DEF_VEC_SAT_S_ADD_FMT_4(T, UT, MIN, MAX) \
  void __attribute__((noinline))   \
  vec_sat_s_add_##T##_fmt_4 (T *out, T *op_1, T *op_2, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  {\
T x = op_1[i]; \
T y = op_2[i]; \
T sum; \
bool overflow = __builtin_add_overflow (x, y, &sum);   \
out[i] = !overflow ? sum : x < 0 ? MIN : MAX;  \
  }\
  }

DEF_VEC_SAT_S_ADD_FMT_4 (int8_t, uint8_t, INT8_MIN, INT8_MAX)

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-13.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-14.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-15.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-16.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-13.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-14.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-15.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-16.c: New test.

Signed-off-by: Pan Li 
---
 .../rvv/autovec/binop/vec_sat_s_add-13.c  |  9 
 .../rvv/autovec/binop/vec_sat_s_add-14.c  |  9 
 .../rvv/autovec/binop/vec_sat_s_add-15.c  |  9 
 .../rvv/autovec/binop/vec_sat_s_add-16.c  |  9 
 .../rvv/autovec/binop/vec_sat_s_add-run-13.c  | 17 ++
 .../rvv/autovec/binop/vec_sat_s_add-run-14.c  | 17 ++
 .../rvv/autovec/binop/vec_sat_s_add-run-15.c  | 17 ++
 .../rvv/autovec/binop/vec_sat_s_add-run-16.c  | 17 ++
 .../riscv/rvv/autovec/vec_sat_arith.h | 22 +++
 9 files changed, 126 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-13.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-14.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-15.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-16.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-13.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-14.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-15.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-16.c

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-13.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-13.c
new file mode 100644
index 000..ec3f8aee434
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-13.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_ADD_FMT_4(int8_t, uint8_t, INT8_MIN, INT8_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vsadd\.vv} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-14.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-14.c
new file mode 100644
index 000..5542616c90a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-14.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_ADD_FMT_4(int16_t, uint16_t, INT16_MIN, INT16_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vsadd\.vv} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-15.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-15.c
new file mode 100644
index 000..091bfd15edf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-15.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march

[PATCH v1] Genmatch: Fix ICE for binary phi cfg mismatching [PR116795]

2024-09-22 Thread pan2 . li
From: Pan Li 

This patch would like to fix one ICE when try to match the binary
phi for below cfg.  We check the first edge of the Phi block comes
from b0, instead of check the only one edge of b1 comes from the
b0 too.  Thus, it will result in some code to be recog as .SAT_SUB
but it is not, and finally result the verify_ssa failure.

+--+
| b0:  |
| def  |   +-+
| ...  |   | b1: |
| cond |-->| def |
+--+   | ... |
   |   +-+
   |  |
   |  |
   v  |
+-+   |
| b2: |   |
| Phi |<--+
+-+

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

PR target/116795

gcc/ChangeLog:

* gimple-match-head.cc (match_cond_with_binary_phi): Fix the
incorrect cfg check as b0->b1 in above example.

gcc/testsuite/ChangeLog:

* gcc.dg/torture/pr116795-1.c: New test.

Signed-off-by: Pan Li 
---
 gcc/gimple-match-head.cc  |  2 +-
 gcc/testsuite/gcc.dg/torture/pr116795-1.c | 14 ++
 2 files changed, 15 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr116795-1.c

diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc
index b63b66e9485..b5d4a71ddc5 100644
--- a/gcc/gimple-match-head.cc
+++ b/gcc/gimple-match-head.cc
@@ -402,7 +402,7 @@ match_cond_with_binary_phi (gphi *phi, tree *true_arg, tree 
*false_arg)
   if (EDGE_COUNT (pred_b0->succs) == 2
   && EDGE_COUNT (pred_b1->succs) == 1
   && EDGE_COUNT (pred_b1->preds) == 1
-  && pred_b0 == EDGE_PRED (gimple_bb (phi), 0)->src)
+  && pred_b0 == EDGE_PRED (pred_b1, 0)->src)
 /*
  * +--+
  * | b0:  |
diff --git a/gcc/testsuite/gcc.dg/torture/pr116795-1.c 
b/gcc/testsuite/gcc.dg/torture/pr116795-1.c
new file mode 100644
index 000..629bdf4bacd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116795-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+volatile int a, b;
+int c;
+int main() {
+  unsigned e = 0;
+  for (; e < 2; e++) {
+a && b;
+if (c)
+  e = -(c ^ e);
+  }
+  return 0;
+}
-- 
2.43.0



[PATCH v1 2/2] RISC-V: Add testcases for form 3 of signed vector SAT_ADD

2024-09-21 Thread pan2 . li
From: Pan Li 

Form 3:
  #define DEF_VEC_SAT_S_ADD_FMT_3(T, UT, MIN, MAX) \
  void __attribute__((noinline))   \
  vec_sat_s_add_##T##_fmt_3 (T *out, T *op_1, T *op_2, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  {\
T x = op_1[i]; \
T y = op_2[i]; \
T sum; \
bool overflow = __builtin_add_overflow (x, y, &sum);   \
out[i] = overflow ? x < 0 ? MIN : MAX : sum;   \
  }\
  }

DEF_VEC_SAT_S_ADD_FMT_3 (int8_t, uint8_t, INT8_MIN, INT8_MAX)

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-10.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-11.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-12.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-9.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-10.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-11.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-12.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-9.c: New test.

Signed-off-by: Pan Li 
---
 .../rvv/autovec/binop/vec_sat_s_add-10.c  |  9 
 .../rvv/autovec/binop/vec_sat_s_add-11.c  |  9 
 .../rvv/autovec/binop/vec_sat_s_add-12.c  |  9 
 .../riscv/rvv/autovec/binop/vec_sat_s_add-9.c |  9 
 .../rvv/autovec/binop/vec_sat_s_add-run-10.c  | 17 ++
 .../rvv/autovec/binop/vec_sat_s_add-run-11.c  | 17 ++
 .../rvv/autovec/binop/vec_sat_s_add-run-12.c  | 17 ++
 .../rvv/autovec/binop/vec_sat_s_add-run-9.c   | 17 ++
 .../riscv/rvv/autovec/vec_sat_arith.h | 22 +++
 9 files changed, 126 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-10.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-11.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-12.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-9.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-10.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-11.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-12.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-9.c

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-10.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-10.c
new file mode 100644
index 000..5dfecdb1732
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-10.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_ADD_FMT_3(int16_t, uint16_t, INT16_MIN, INT16_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vsadd\.vv} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-11.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-11.c
new file mode 100644
index 000..ebf825e0dd8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-11.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_ADD_FMT_3(int32_t, uint32_t, INT32_MIN, INT32_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vsadd\.vv} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-12.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-12.c
new file mode 100644
index 000..82b29a089f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-12.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march

[PATCH v1 1/2] Match: Support form 3 for vector signed integer .SAT_ADD

2024-09-21 Thread pan2 . li
From: Pan Li 

This patch would like to support the form 3 of the vector signed
integer .SAT_ADD.  Aka below example:

Form 3:
  #define DEF_VEC_SAT_S_ADD_FMT_3(T, UT, MIN, MAX) \
  void __attribute__((noinline))   \
  vec_sat_s_add_##T##_fmt_3 (T *out, T *op_1, T *op_2, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  {\
T x = op_1[i]; \
T y = op_2[i]; \
T sum; \
bool overflow = __builtin_add_overflow (x, y, &sum);   \
out[i] = overflow ? x < 0 ? MIN : MAX : sum;   \
  }\
  }

DEF_VEC_SAT_S_ADD_FMT_3(int8_t, uint8_t, INT8_MIN, INT8_MAX)

Before this patch:
  40   │   # ivtmp.7_34 = PHI <0(3), ivtmp.7_30(7)>
  41   │   _26 = op_1_12(D) + ivtmp.7_34;
  42   │   x_29 = MEM[(int8_t *)_26];
  43   │   _1 = op_2_14(D) + ivtmp.7_34;
  44   │   y_24 = MEM[(int8_t *)_1];
  45   │   _9 = .ADD_OVERFLOW (y_24, x_29);
  46   │   _7 = IMAGPART_EXPR <_9>;
  47   │   if (_7 != 0)
  48   │ goto ; [50.00%]
  49   │   else
  50   │ goto ; [50.00%]
  51   │ ;;succ:   6
  52   │ ;;5
  53   │
  54   │ ;;   basic block 5, loop depth 1
  55   │ ;;pred:   4
  56   │   _42 = REALPART_EXPR <_9>;
  57   │   _2 = out_17(D) + ivtmp.7_34;
  58   │   MEM[(int8_t *)_2] = _42;
  59   │   ivtmp.7_27 = ivtmp.7_34 + 1;
  60   │   if (_13 != ivtmp.7_27)
  61   │ goto ; [89.00%]
  62   │   else
  63   │ goto ; [11.00%]
  64   │ ;;succ:   7
  65   │ ;;8
  66   │
  67   │ ;;   basic block 6, loop depth 1
  68   │ ;;pred:   4
  69   │   _38 = x_29 < 0;
  70   │   _39 = (signed char) _38;
  71   │   _40 = -_39;
  72   │   _41 = _40 ^ 127;
  73   │   _33 = out_17(D) + ivtmp.7_34;
  74   │   MEM[(int8_t *)_33] = _41;
  75   │   ivtmp.7_25 = ivtmp.7_34 + 1;
  76   │   if (_13 != ivtmp.7_25)

After this patch:
  77   │   _94 = .SELECT_VL (ivtmp_92, POLY_INT_CST [16, 16]);
  78   │   vect_x_13.9_81 = .MASK_LEN_LOAD (vectp_op_1.7_79, 8B, { -1, ... }, 
_94, 0);
  79   │   vect_y_15.12_85 = .MASK_LEN_LOAD (vectp_op_2.10_83, 8B, { -1, ... }, 
_94, 0);
  80   │   vect_patt_49.13_86 = .SAT_ADD (vect_x_13.9_81, vect_y_15.12_85);
  81   │   .MASK_LEN_STORE (vectp_out.14_88, 8B, { -1, ... }, _94, 0, 
vect_patt_49.13_86);
  82   │   vectp_op_1.7_80 = vectp_op_1.7_79 + _94;
  83   │   vectp_op_2.10_84 = vectp_op_2.10_83 + _94;
  84   │   vectp_out.14_89 = vectp_out.14_88 + _94;
  85   │   ivtmp_93 = ivtmp_92 - _94;

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add optional nop_convert for signed SAT_ADD case 4.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 940292d0d49..c271a8e4c9d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3246,7 +3246,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
SAT_S_ADD = IMAGPART_EXPR (Z) != 0 ? (-(T)(X < 0) ^ MAX) : sum;  */
 (match (signed_integer_sat_add @0 @1)
  (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop)
-   (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
+   (bit_xor:c (nop_convert?
+   (negate (nop_convert? (convert (lt @0 integer_zerop)
+  max_value)
(realpart @2))
  (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
   && types_match (type, @0, @1
-- 
2.43.0



[PATCH v1 1/2] Match: Support form 2 for vector signed integer .SAT_ADD

2024-09-20 Thread pan2 . li
From: Pan Li 

This patch would like to support the form 2 of the vector signed
integer .SAT_ADD.  Aka below example:

Form 2:
  #define DEF_VEC_SAT_S_ADD_FMT_2(T, UT, MIN, MAX) \
  void __attribute__((noinline))   \
  vec_sat_s_add_##T##_fmt_2 (T *out, T *op_1, T *op_2, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  {\
T x = op_1[i]; \
T y = op_2[i]; \
T sum = (UT)x + (UT)y; \
if ((x ^ y) < 0 || (sum ^ x) >= 0) \
  out[i] = sum;\
else   \
  out[i] = x < 0 ? MIN : MAX;  \
  }\
  }

DEF_VEC_SAT_S_ADD_FMT_2(int8_t, uint8_t, INT8_MIN, INT8_MAX)

Before this patch:
 104   │   loop_len_79 = MIN_EXPR ;
 105   │   _50 = &MEM  [(int8_t *)vectp_op_1.9_77];
 106   │   vect_x_18.11_80 = .MASK_LEN_LOAD (_50, 8B, { -1, ... }, loop_len_79, 
0);
 107   │   _70 = vect_x_18.11_80 >> 7;
 108   │   vect_x.12_81 = VIEW_CONVERT_EXPR(vect_x_18.11_80);
 109   │   _26 = (void *) ivtmp.47_20;
 110   │   _27 = &MEM  [(int8_t *)_26];
 111   │   vect_y_20.15_84 = .MASK_LEN_LOAD (_27, 8B, { -1, ... }, loop_len_79, 
0);
 112   │   vect__7.21_90 = vect_x_18.11_80 ^ vect_y_20.15_84;
 113   │   mask__50.23_92 = vect__7.21_90 >= { 0, ... };
 114   │   vect_y.16_85 = VIEW_CONVERT_EXPR(vect_y_20.15_84);
 115   │   vect__6.17_86 = vect_x.12_81 + vect_y.16_85;
 116   │   vect_sum_21.18_87 = VIEW_CONVERT_EXPR(vect__6.17_86);
 117   │   vect__8.19_88 = vect_x_18.11_80 ^ vect_sum_21.18_87;
 118   │   mask__45.20_89 = vect__8.19_88 < { 0, ... };
 119   │   mask__44.24_93 = mask__45.20_89 & mask__50.23_92;
 120   │   _40 = .COND_XOR (mask__44.24_93, _70, { 127, ... }, 
vect_sum_21.18_87);
 121   │   _60 = (void *) ivtmp.49_6;
 122   │   _61 = &MEM  [(int8_t *)_60];
 123   │   .MASK_LEN_STORE (_61, 8B, { -1, ... }, loop_len_79, 0, _40);
 124   │   vectp_op_1.9_78 = vectp_op_1.9_77 + POLY_INT_CST [16, 16];
 125   │   ivtmp.47_4 = ivtmp.47_20 + POLY_INT_CST [16, 16];
 126   │   ivtmp.49_21 = ivtmp.49_6 + POLY_INT_CST [16, 16];
 127   │   ivtmp.51_98 = ivtmp.51_53;
 128   │   ivtmp.51_8 = ivtmp.51_53 + POLY_INT_CST [18446744073709551600, 
18446744073709551600];

After this patch:
  88   │   _103 = .SELECT_VL (ivtmp_101, POLY_INT_CST [16, 16]);
  89   │   vect_x_18.11_90 = .MASK_LEN_LOAD (vectp_op_1.9_88, 8B, { -1, ... }, 
_103, 0);
  90   │   vect_y_20.14_94 = .MASK_LEN_LOAD (vectp_op_2.12_92, 8B, { -1, ... }, 
_103, 0);
  91   │   vect_patt_49.15_95 = .SAT_ADD (vect_x_18.11_90, vect_y_20.14_94);
  92   │   .MASK_LEN_STORE (vectp_out.16_97, 8B, { -1, ... }, _103, 0, 
vect_patt_49.15_95);
  93   │   vectp_op_1.9_89 = vectp_op_1.9_88 + _103;
  94   │   vectp_op_2.12_93 = vectp_op_2.12_92 + _103;
  95   │   vectp_out.16_98 = vectp_out.16_97 + _103;
  96   │   ivtmp_102 = ivtmp_101 - _103;

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add the case 3 for signed .SAT_ADD matching.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 16 
 1 file changed, 16 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index fdb59ff0d44..940292d0d49 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3251,6 +3251,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
   && types_match (type, @0, @1
 
+/* Signed saturation add, case 5:
+   T sum = (T)((UT)X + (UT)Y);
+   SAT_S_ADD = (X ^ sum) < 0 & ~((X ^ Y) < 0) ? (-(T)(X < 0) ^ MAX) : sum;
+
+   The T and UT are type pair like T=int8_t, UT=uint8_t.  */
+(match (signed_integer_sat_add @0 @1)
+ (cond^ (bit_and:c (lt (bit_xor @0 (nop_convert@2 (plus (nop_convert @0)
+(nop_convert @1
+  integer_zerop)
+  (bit_not (lt (bit_xor:c @0 @1) integer_zerop)))
+   (bit_xor:c (nop_convert (negate (nop_convert (convert
+ (lt @0 integer_zerop)
+  max_value)
+   @2)
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type
+
 /* Unsigned saturation sub, case 1 (branch with gt):
SAT_U_SUB = X > Y ? X - Y : 0  */
 (match (unsigned_integer_sat_sub @0 @1)
-- 
2.43.0



[PATCH v1 2/2] RISC-V: Add testcases for form 2 of signed vector SAT_ADD

2024-09-20 Thread pan2 . li
From: Pan Li 

Form 2:
  #define DEF_VEC_SAT_S_ADD_FMT_2(T, UT, MIN, MAX) \
  void __attribute__((noinline))   \
  vec_sat_s_add_##T##_fmt_2 (T *out, T *op_1, T *op_2, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  {\
T x = op_1[i]; \
T y = op_2[i]; \
T sum = (UT)x + (UT)y; \
if ((x ^ y) < 0 || (sum ^ x) >= 0) \
  out[i] = sum;\
else   \
  out[i] = x < 0 ? MIN : MAX;  \
  }\
  }

DEF_VEC_SAT_S_ADD_FMT_2 (int8_t, uint8_t, INT8_MIN, INT8_MAX)

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macro.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-5.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-6.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-7.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-8.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-5.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-6.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-7.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-8.c: New test.

Signed-off-by: Pan Li 
---
 .../riscv/rvv/autovec/binop/vec_sat_s_add-5.c |  9 +++
 .../riscv/rvv/autovec/binop/vec_sat_s_add-6.c |  9 +++
 .../riscv/rvv/autovec/binop/vec_sat_s_add-7.c |  9 +++
 .../riscv/rvv/autovec/binop/vec_sat_s_add-8.c |  9 +++
 .../rvv/autovec/binop/vec_sat_s_add-run-5.c   | 17 +
 .../rvv/autovec/binop/vec_sat_s_add-run-6.c   | 17 +
 .../rvv/autovec/binop/vec_sat_s_add-run-7.c   | 17 +
 .../rvv/autovec/binop/vec_sat_s_add-run-8.c   | 17 +
 .../riscv/rvv/autovec/vec_sat_arith.h | 24 +++
 9 files changed, 128 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-5.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-6.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-7.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-8.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-5.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-6.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-7.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-8.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-5.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-5.c
new file mode 100644
index 000..8cf0d06efdb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-5.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_ADD_FMT_2(int8_t, uint8_t, INT8_MIN, INT8_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vsadd\.vv} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-6.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-6.c
new file mode 100644
index 000..a26d3943e27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-6.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_ADD_FMT_2(int16_t, uint16_t, INT16_MIN, INT16_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vsadd\.vv} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-7.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-7.c
new file mode 100644
index 000..4ef1351dd29
--- /dev/null
+++ b/gcc/testsuite/gcc.targe

[PATCH v1 2/2] RISC-V: Add testcases for form 4 of signed scalar SAT_ADD

2024-09-19 Thread pan2 . li
From: Pan Li 

Form 4:
  #define DEF_SAT_S_ADD_FMT_4(T, UT, MIN, MAX)   \
  T __attribute__((noinline))\
  sat_s_add_##T##_fmt_4 (T x, T y)   \
  {  \
T sum;   \
bool overflow = __builtin_add_overflow (x, y, &sum); \
return !overflow ? sum : x < 0 ? MIN : MAX;  \
  }

DEF_SAT_S_ADD_FMT_4 (int64_t, uint64_t, INT64_MIN, INT64_MAX)

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_s_add-13.c: New test.
* gcc.target/riscv/sat_s_add-14.c: New test.
* gcc.target/riscv/sat_s_add-15.c: New test.
* gcc.target/riscv/sat_s_add-16.c: New test.
* gcc.target/riscv/sat_s_add-run-13.c: New test.
* gcc.target/riscv/sat_s_add-run-14.c: New test.
* gcc.target/riscv/sat_s_add-run-15.c: New test.
* gcc.target/riscv/sat_s_add-run-16.c: New test.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 14 
 gcc/testsuite/gcc.target/riscv/sat_s_add-13.c | 30 +
 gcc/testsuite/gcc.target/riscv/sat_s_add-14.c | 32 +++
 gcc/testsuite/gcc.target/riscv/sat_s_add-15.c | 31 ++
 gcc/testsuite/gcc.target/riscv/sat_s_add-16.c | 29 +
 .../gcc.target/riscv/sat_s_add-run-13.c   | 16 ++
 .../gcc.target/riscv/sat_s_add-run-14.c   | 16 ++
 .../gcc.target/riscv/sat_s_add-run-15.c   | 16 ++
 .../gcc.target/riscv/sat_s_add-run-16.c   | 16 ++
 9 files changed, 200 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-13.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-14.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-15.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-13.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-14.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-15.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-16.c

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index ab141bb1779..a2617b6db70 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -153,6 +153,17 @@ sat_s_add_##T##_fmt_3 (T x, T y)   \
 #define DEF_SAT_S_ADD_FMT_3_WRAP(T, UT, MIN, MAX) \
   DEF_SAT_S_ADD_FMT_3(T, UT, MIN, MAX)
 
+#define DEF_SAT_S_ADD_FMT_4(T, UT, MIN, MAX)   \
+T __attribute__((noinline))\
+sat_s_add_##T##_fmt_4 (T x, T y)   \
+{  \
+  T sum;   \
+  bool overflow = __builtin_add_overflow (x, y, &sum); \
+  return !overflow ? sum : x < 0 ? MIN : MAX;  \
+}
+#define DEF_SAT_S_ADD_FMT_4_WRAP(T, UT, MIN, MAX) \
+  DEF_SAT_S_ADD_FMT_4(T, UT, MIN, MAX)
+
 #define RUN_SAT_S_ADD_FMT_1(T, x, y) sat_s_add_##T##_fmt_1(x, y)
 #define RUN_SAT_S_ADD_FMT_1_WRAP(T, x, y) RUN_SAT_S_ADD_FMT_1(T, x, y)
 
@@ -162,6 +173,9 @@ sat_s_add_##T##_fmt_3 (T x, T y)   \
 #define RUN_SAT_S_ADD_FMT_3(T, x, y) sat_s_add_##T##_fmt_3(x, y)
 #define RUN_SAT_S_ADD_FMT_3_WRAP(T, x, y) RUN_SAT_S_ADD_FMT_3(T, x, y)
 
+#define RUN_SAT_S_ADD_FMT_4(T, x, y) sat_s_add_##T##_fmt_4(x, y)
+#define RUN_SAT_S_ADD_FMT_4_WRAP(T, x, y) RUN_SAT_S_ADD_FMT_4(T, x, y)
+
 
/**/
 /* Saturation Sub (Unsigned and Signed)   
*/
 
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-13.c 
b/gcc/testsuite/gcc.target/riscv/sat_s_add-13.c
new file mode 100644
index 000..0923764cde4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-13.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_s_add_int8_t_fmt_4:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** xor\s+[atx][0-9]+,\s*a0,\s*a1
+** xor\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*7
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*7
+** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
+** srai\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63
+** xori\s+[atx][0-9]+,\s*[atx][0-

[PATCH v1 1/2] RISC-V: Add testcases for form 3 of signed scalar SAT_ADD

2024-09-19 Thread pan2 . li
From: Pan Li 

This patch would like to add testcases of the signed scalar SAT_ADD
for form 3.  Aka:

Form 3:
  #define DEF_SAT_S_ADD_FMT_3(T, UT, MIN, MAX)   \
  T __attribute__((noinline))\
  sat_s_add_##T##_fmt_3 (T x, T y)   \
  {  \
T sum;   \
bool overflow = __builtin_add_overflow (x, y, &sum); \
return overflow ? x < 0 ? MIN : MAX : sum;   \
  }

DEF_SAT_S_ADD_FMT_3 (int64_t, uint64_t, INT64_MIN, INT64_MAX)

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_s_add-10.c: New test.
* gcc.target/riscv/sat_s_add-11.c: New test.
* gcc.target/riscv/sat_s_add-12.c: New test.
* gcc.target/riscv/sat_s_add-9.c: New test.
* gcc.target/riscv/sat_s_add-run-10.c: New test.
* gcc.target/riscv/sat_s_add-run-11.c: New test.
* gcc.target/riscv/sat_s_add-run-12.c: New test.
* gcc.target/riscv/sat_s_add-run-9.c: New test.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 14 
 gcc/testsuite/gcc.target/riscv/sat_s_add-10.c | 32 +++
 gcc/testsuite/gcc.target/riscv/sat_s_add-11.c | 31 ++
 gcc/testsuite/gcc.target/riscv/sat_s_add-12.c | 29 +
 gcc/testsuite/gcc.target/riscv/sat_s_add-9.c  | 30 +
 .../gcc.target/riscv/sat_s_add-run-10.c   | 16 ++
 .../gcc.target/riscv/sat_s_add-run-11.c   | 16 ++
 .../gcc.target/riscv/sat_s_add-run-12.c   | 16 ++
 .../gcc.target/riscv/sat_s_add-run-9.c| 16 ++
 9 files changed, 200 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-10.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-11.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-12.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-9.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-10.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-11.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-12.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-9.c

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index b4fbf5dc662..ab141bb1779 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -142,12 +142,26 @@ sat_s_add_##T##_fmt_2 (T x, T y) \
   return x < 0 ? MIN : MAX;  \
 }
 
+#define DEF_SAT_S_ADD_FMT_3(T, UT, MIN, MAX)   \
+T __attribute__((noinline))\
+sat_s_add_##T##_fmt_3 (T x, T y)   \
+{  \
+  T sum;   \
+  bool overflow = __builtin_add_overflow (x, y, &sum); \
+  return overflow ? x < 0 ? MIN : MAX : sum;   \
+}
+#define DEF_SAT_S_ADD_FMT_3_WRAP(T, UT, MIN, MAX) \
+  DEF_SAT_S_ADD_FMT_3(T, UT, MIN, MAX)
+
 #define RUN_SAT_S_ADD_FMT_1(T, x, y) sat_s_add_##T##_fmt_1(x, y)
 #define RUN_SAT_S_ADD_FMT_1_WRAP(T, x, y) RUN_SAT_S_ADD_FMT_1(T, x, y)
 
 #define RUN_SAT_S_ADD_FMT_2(T, x, y) sat_s_add_##T##_fmt_2(x, y)
 #define RUN_SAT_S_ADD_FMT_2_WRAP(T, x, y) RUN_SAT_S_ADD_FMT_2(T, x, y)
 
+#define RUN_SAT_S_ADD_FMT_3(T, x, y) sat_s_add_##T##_fmt_3(x, y)
+#define RUN_SAT_S_ADD_FMT_3_WRAP(T, x, y) RUN_SAT_S_ADD_FMT_3(T, x, y)
+
 
/**/
 /* Saturation Sub (Unsigned and Signed)   
*/
 
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-10.c 
b/gcc/testsuite/gcc.target/riscv/sat_s_add-10.c
new file mode 100644
index 000..45329619f9d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-10.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_s_add_int16_t_fmt_3:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** xor\s+[atx][0-9]+,\s*a0,\s*a1
+** xor\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*15
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*15
+** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
+** srai\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63
+** li\s+[atx][0-9]+,\s*32768
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+*

[PATCH v5 3/4] Match: Support form 3 for scalar signed integer .SAT_ADD

2024-09-18 Thread pan2 . li
From: Pan Li 

This patch would like to support the form 3 of the scalar signed
integer .SAT_ADD.  Aka below example:

Form 3:
  #define DEF_SAT_S_ADD_FMT_3(T, UT, MIN, MAX)   \
  T __attribute__((noinline))\
  sat_s_add_##T##_fmt_3 (T x, T y)   \
  {  \
T sum;   \
bool overflow = __builtin_add_overflow (x, y, &sum); \
return overflow ? x < 0 ? MIN : MAX : sum;   \
  }

DEF_SAT_S_ADD_FMT_3(int8_t, uint8_t, INT8_MIN, INT8_MAX)

We can tell the difference before and after this patch if backend
implemented the ssadd3 pattern similar as below.

Before this patch:
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y)
   6   │ {
   7   │   signed char _1;
   8   │   signed char _2;
   9   │   int8_t _3;
  10   │   __complex__ signed char _6;
  11   │   _Bool _8;
  12   │   signed char _9;
  13   │   signed char _10;
  14   │   signed char _11;
  15   │
  16   │ ;;   basic block 2, loop depth 0
  17   │ ;;pred:   ENTRY
  18   │   _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
  19   │   _2 = IMAGPART_EXPR <_6>;
  20   │   if (_2 != 0)
  21   │ goto ; [50.00%]
  22   │   else
  23   │ goto ; [50.00%]
  24   │ ;;succ:   4
  25   │ ;;3
  26   │
  27   │ ;;   basic block 3, loop depth 0
  28   │ ;;pred:   2
  29   │   _1 = REALPART_EXPR <_6>;
  30   │   goto ; [100.00%]
  31   │ ;;succ:   5
  32   │
  33   │ ;;   basic block 4, loop depth 0
  34   │ ;;pred:   2
  35   │   _8 = x_4(D) < 0;
  36   │   _9 = (signed char) _8;
  37   │   _10 = -_9;
  38   │   _11 = _10 ^ 127;
  39   │ ;;succ:   5
  40   │
  41   │ ;;   basic block 5, loop depth 0
  42   │ ;;pred:   3
  43   │ ;;4
  44   │   # _3 = PHI <_1(3), _11(4)>
  45   │   return _3;
  46   │ ;;succ:   EXIT
  47   │
  48   │ }

After this patch:
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y)
   6   │ {
   7   │   int8_t _3;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;pred:   ENTRY
  11   │   _3 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
  12   │   return _3;
  13   │ ;;succ:   EXIT
  14   │
  15   │ }

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add the form 3 of signed .SAT_ADD matching.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 4aa610e2270..fdb59ff0d44 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3241,6 +3241,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
@2)
  (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type
 
+/* Signed saturation add, case 4:
+   Z = .ADD_OVERFLOW (X, Y)
+   SAT_S_ADD = IMAGPART_EXPR (Z) != 0 ? (-(T)(X < 0) ^ MAX) : sum;  */
+(match (signed_integer_sat_add @0 @1)
+ (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop)
+   (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
+   (realpart @2))
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
+
 /* Unsigned saturation sub, case 1 (branch with gt):
SAT_U_SUB = X > Y ? X - Y : 0  */
 (match (unsigned_integer_sat_sub @0 @1)
-- 
2.43.0



[PATCH v5 4/4] RISC-V: Fix vector SAT_ADD dump check due to middle-end change

2024-09-18 Thread pan2 . li
From: Pan Li 

This patch would like fix the dump check times of vector SAT_ADD.  The
middle-end change makes the match times from 2 to 4 times.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Adjust
the dump check times from 2 to 4.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c| 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c| 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c| 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c| 2 +-
 16 files changed, 16 insertions(+), 16 deletions(-)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c
index c525ba97c52..47dd5012cc6 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c
@@ -15,4 +15,4 @@
 */
 DEF_VEC_SAT_U_ADD_FMT_6(uint8_t)
 
-/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c
index 41372d08e52..df8d5a8d275 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c
@@ -15,4 +15,4 @@
 */
 DEF_VEC_SAT_U_ADD_FMT_6(uint16_t)
 
-/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c
index dddebb54426..f286bd10e4b 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c
@@ -15,4 +15,4 @@
 */
 DEF_VEC_SAT_U_ADD_FMT_6(uint32_t)
 
-/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c
index ad5162d10a0..307ff36cc35 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c
@@ -15,4 +15,4 @@
 */
 DEF_VEC_SAT_U_ADD_FMT_6(uint64_t)
 
-/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c
index 39c20b3cea6..3218962724c 10064

[PATCH v5 2/4] Genmatch: Refine the gen_phi_on_cond by match_cond_with_binary_phi

2024-09-18 Thread pan2 . li
From: Pan Li 

This patch would like to leverage the match_cond_with_binary_phi to
match the phi on cond, and get the true/false arg if matched.  This
helps a lot to simplify the implementation of gen_phi_on_cond.

Before this patch:
basic_block _b1 = gimple_bb (_a1);
if (gimple_phi_num_args (_a1) == 2)
  {
basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src;
basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src;
basic_block _db_1 = safe_dyn_cast  (*gsi_last_bb (_pb_0_1)) ? 
_pb_0_1 : _pb_1_1;
basic_block _other_db_1 = safe_dyn_cast  (*gsi_last_bb (_pb_0_1)) 
? _pb_1_1 : _pb_0_1;
gcond *_ct_1 = safe_dyn_cast  (*gsi_last_bb (_db_1));
if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1
&& EDGE_COUNT (_other_db_1->succs) == 1
&& EDGE_PRED (_other_db_1, 0)->src == _db_1)
{
  tree _cond_lhs_1 = gimple_cond_lhs (_ct_1);
  tree _cond_rhs_1 = gimple_cond_rhs (_ct_1);
  tree _p0 = build2 (gimple_cond_code (_ct_1), boolean_type_node, 
_cond_lhs_1, _cond_rhs_1);
  bool _arg_0_is_true_1 = gimple_phi_arg_edge (_a1, 0)->flags & 
EDGE_TRUE_VALUE;
  tree _p1 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 0 : 1);
  tree _p2 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 1 : 0);
...

After this patch:
basic_block _b1 = gimple_bb (_a1);
tree _p1, _p2;
gcond *_cond_1 = match_cond_with_binary_phi (_a1, &_p1, &_p2);
if (_cond_1 && _p1 && _p2)
  {
tree _cond_lhs_1 = gimple_cond_lhs (_cond_1);
tree _cond_rhs_1 = gimple_cond_rhs (_cond_1);
tree _p0 = build2 (gimple_cond_code (_cond_1), boolean_type_node, 
_cond_lhs_1, _cond_rhs_1);
...

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* genmatch.cc (dt_operand::gen_phi_on_cond): Leverage the
match_cond_with_binary_phi API to get cond gimple, true and
false TREE arg.

Signed-off-by: Pan Li 
---
 gcc/genmatch.cc | 67 +++--
 1 file changed, 15 insertions(+), 52 deletions(-)

diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
index f1ff1d18265..149458fffe1 100644
--- a/gcc/genmatch.cc
+++ b/gcc/genmatch.cc
@@ -3516,79 +3516,42 @@ dt_operand::gen (FILE *f, int indent, bool gimple, int 
depth)
 void
 dt_operand::gen_phi_on_cond (FILE *f, int indent, int depth)
 {
-  fprintf_indent (f, indent,
-"basic_block _b%d = gimple_bb (_a%d);\n", depth, depth);
-
-  fprintf_indent (f, indent, "if (gimple_phi_num_args (_a%d) == 2)\n", depth);
+  char opname_0[20];
+  char opname_1[20];
+  char opname_2[20];
 
-  indent += 2;
-  fprintf_indent (f, indent, "{\n");
-  indent += 2;
+  gen_opname (opname_0, 0);
+  gen_opname (opname_1, 1);
+  gen_opname (opname_2, 2);
 
   fprintf_indent (f, indent,
-"basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth);
-  fprintf_indent (f, indent,
-"basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth);
-  fprintf_indent (f, indent,
-"basic_block _db_%d = safe_dyn_cast  (*gsi_last_bb (_pb_0_%d)) ? "
-"_pb_0_%d : _pb_1_%d;\n", depth, depth, depth, depth);
+"basic_block _b%d = gimple_bb (_a%d);\n", depth, depth);
+  fprintf_indent (f, indent, "tree %s, %s;\n", opname_1, opname_2);
   fprintf_indent (f, indent,
-"basic_block _other_db_%d = safe_dyn_cast  "
-"(*gsi_last_bb (_pb_0_%d)) ? _pb_1_%d : _pb_0_%d;\n",
-depth, depth, depth, depth);
+"gcond *_cond_%d = match_cond_with_binary_phi (_a%d, &%s, &%s);\n",
+depth, depth, opname_1, opname_2);
 
-  fprintf_indent (f, indent,
-"gcond *_ct_%d = safe_dyn_cast  (*gsi_last_bb (_db_%d));\n",
-depth, depth);
-  fprintf_indent (f, indent, "if (_ct_%d"
-" && EDGE_COUNT (_other_db_%d->preds) == 1\n", depth, depth);
-  fprintf_indent (f, indent,
-"  && EDGE_COUNT (_other_db_%d->succs) == 1\n", depth);
-  fprintf_indent (f, indent,
-"  && EDGE_PRED (_other_db_%d, 0)->src == _db_%d)\n", depth, depth);
+  fprintf_indent (f, indent, "if (_cond_%d && %s && %s)\n",
+depth, opname_1, opname_2);
 
   indent += 2;
   fprintf_indent (f, indent, "{\n");
   indent += 2;
 
   fprintf_indent (f, indent,
-"tree _cond_lhs_%d = gimple_cond_lhs (_ct_%d);\n", depth, depth);
+"tree _cond_lhs_%d = gimple_cond_lhs (_cond_%d);\n", depth, depth);
   fprintf_indent (f, indent,
-"tree _cond_rhs_%d = gimple_cond_rhs (_ct_%d);\n", depth, depth);
-
-  char opname_0[20];
-  char opname_1[20];
-  char opname_2[20];
-  gen_opname (opname_0, 0);
-
+"tree _cond_rhs_%d = gimple_cond_rhs (_cond_%d);\n", depth, depth);
   fprintf_indent (f, indent,
-"tree %s = build2 (gimple_cond_code (_ct_%d), "
+"tree %s = build2 (gimple_cond_code (_cond_%d), "
 "boolean_type_node, _cond_lhs_%d, _cond_rhs_%d);\n",
 opname_0, depth, depth, depth);
 
-  fprintf_indent (f, indent,
-"bool _arg_0_is_true_%d = gimple_phi_arg_edge (_a%d, 0)->flags"
-" & EDGE_TRUE_VALUE;\n", depth, depth);
-
-  ge

[PATCH v5 1/4] Match: Add interface match_cond_with_binary_phi for true/false arg

2024-09-18 Thread pan2 . li
From: Pan Li 

When matching the cond with 2 args phi node, we need to figure out
which arg of phi node comes from the true edge of cond block, as
well as the false edge.  This patch would like to add interface
to perform the action and return the true and false arg in TREE type.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* gimple-match-head.cc (match_cond_with_binary_phi): Add new func
impl to match binary phi for true and false arg.

Signed-off-by: Pan Li 
---
 gcc/gimple-match-head.cc | 120 +++
 1 file changed, 120 insertions(+)

diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc
index 924d3f1e710..b63b66e9485 100644
--- a/gcc/gimple-match-head.cc
+++ b/gcc/gimple-match-head.cc
@@ -375,3 +375,123 @@ gimple_bitwise_inverted_equal_p (tree expr1, tree expr2, 
bool &wascmp, tree (*va
 return true;
   return false;
 }
+
+/*
+ * Return the relevant gcond * of the given phi, as well as the true
+ * and false TREE args of the phi.  Or return nullptr.
+ *
+ * If matched the gcond *, the output argument TREE true_arg and false_arg
+ * will be updated to the relevant args of phi.
+ *
+ * If failed to match, nullptr gcond * will be returned, as well as the output
+ * arguments will be set to NULL_TREE.
+ */
+
+static inline gcond *
+match_cond_with_binary_phi (gphi *phi, tree *true_arg, tree *false_arg)
+{
+  *true_arg = *false_arg = NULL_TREE;
+
+  if (gimple_phi_num_args (phi) != 2)
+return nullptr;
+
+  basic_block pred_b0 = EDGE_PRED (gimple_bb (phi), 0)->src;
+  basic_block pred_b1 = EDGE_PRED (gimple_bb (phi), 1)->src;
+  edge edge_for_pred_0 = nullptr;
+
+  if (EDGE_COUNT (pred_b0->succs) == 2
+  && EDGE_COUNT (pred_b1->succs) == 1
+  && EDGE_COUNT (pred_b1->preds) == 1
+  && pred_b0 == EDGE_PRED (gimple_bb (phi), 0)->src)
+/*
+ * +--+
+ * | b0:  |
+ * | def  |   +-+
+ * | ...  |   | b1: |
+ * | cond |-->| def |
+ * +--+   | ... |
+ *|   +-+
+ *#  |
+ *|  |
+ *v  |
+ * +-+   |
+ * | b2: |   |
+ * | def |<--+
+ * +-+
+ * #: edge_for_pred_0.
+ */
+edge_for_pred_0 = EDGE_PRED (gimple_bb (phi), 0);
+  else if (EDGE_COUNT (pred_b1->succs) == 2
+  && EDGE_COUNT (pred_b0->succs) == 1
+  && EDGE_COUNT (pred_b0->preds) == 1
+  && pred_b1 == EDGE_PRED (pred_b0, 0)->src)
+/*
+ *+--+
+ *| b1:  |
+ * +-+| def  |
+ * | b0: || ...  |
+ * | def |<---#---| cond |
+ * | ... |+--+
+ * +-+   |
+ *|  |
+ *|  |
+ *|  |
+ *v  |
+ * +-+   |
+ * | b2: |   |
+ * | def |<--+
+ * +-+
+ * #: edge_for_pred_0.
+ */
+edge_for_pred_0 = EDGE_PRED (pred_b0, 0);
+  else if (EDGE_COUNT (pred_b0->succs) == 1
+  && EDGE_COUNT (pred_b1->succs) == 1
+  && EDGE_COUNT (pred_b0->preds) == 1
+  && EDGE_COUNT (pred_b1->preds) == 1
+  && EDGE_COUNT (EDGE_PRED (pred_b0, 0)->src->succs) == 2
+  && EDGE_PRED (pred_b0, 0)->src == EDGE_PRED (pred_b1, 0)->src)
+/* +--+
+ * | b0:  |
+ * | ...  |   +-+
+ * | cond |-->| b2: |
+ * +--+   | ... |
+ *|   +-+
+ *#  |
+ *|  |
+ *v  |
+ * +-+   |
+ * | b1: |   |
+ * | ... |   |
+ * +-+   |
+ *|  |
+ *|  |
+ *v  |
+ * +-+   |
+ * | b3: |<--+
+ * | ... |
+ * +-+
+ * #: edge_for_pred_0.
+ */
+edge_for_pred_0 = EDGE_PRED (pred_b0, 0);
+
+  if (!edge_for_pred_0)
+return nullptr;
+
+  gcond *cond = safe_dyn_cast  (*gsi_last_bb (edge_for_pred_0->src));
+
+  if (!cond)
+return nullptr;
+
+  if (edge_for_pred_0->flags & EDGE_TRUE_VALUE)
+{
+  *true_arg = gimple_phi_arg_def (phi, 0);
+  *false_arg = gimple_phi_arg_def (phi, 1);
+}
+  else /* Aka edge_for_pred_0->flags & EDGE_FALSE_VALUE  */
+{
+  *false_arg = gimple_phi_arg_def (phi, 0);
+  *true_arg = gimple_phi_arg_def (phi, 1);
+}
+
+  return cond;
+}
-- 
2.43.0



[PATCH v1] Match: Remove unnecessary types_match for case 1 of signed SAT_ADD

2024-09-12 Thread pan2 . li
From: Pan Li 

Given all commutative binary operators requires types matching
for both operands.  Remove the types_match check for case 1 of
the signed SAT_ADD, because we have (bit_xor @0 @1), which ensure
the operands have the correct TREE type.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Remove the types_match check for signed SAT_ADD
case 1.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 4cef965c9c7..5566c0e4c41 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3204,8 +3204,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
integer_zerop)
(bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
@2)
- (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
-  && types_match (type, @0, @1
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type
 
 /* Signed saturation add, case 2:
T sum = (T)((UT)X + (UT)Y)
-- 
2.43.0



[PATCH v1] RISC-V: Add testcases for form 2 of signed scalar SAT_ADD

2024-09-12 Thread pan2 . li
From: Pan Li 

This patch would like to add testcases of the signed scalar SAT_ADD
for form 2.  Aka:

Form 2:
  #define DEF_SAT_S_ADD_FMT_2(T, UT, MIN, MAX) \
  T __attribute__((noinline))  \
  sat_s_add_##T##_fmt_2 (T x, T y) \
  {\
T sum = (UT)x + (UT)y; \
if ((x ^ y) < 0 || (sum ^ x) >= 0) \
  return sum;  \
return x < 0 ? MIN : MAX;  \
  }

DEF_SAT_S_ADD_FMT_2 (int64_t, uint64_t, INT64_MIN, INT64_MAX)

The below test are passed for this patch.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_s_add-5.c: New test.
* gcc.target/riscv/sat_s_add-6.c: New test.
* gcc.target/riscv/sat_s_add-7.c: New test.
* gcc.target/riscv/sat_s_add-8.c: New test.
* gcc.target/riscv/sat_s_add-run-5.c: New test.
* gcc.target/riscv/sat_s_add-run-6.c: New test.
* gcc.target/riscv/sat_s_add-run-7.c: New test.
* gcc.target/riscv/sat_s_add-run-8.c: New test.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 13 
 gcc/testsuite/gcc.target/riscv/sat_s_add-5.c  | 30 +
 gcc/testsuite/gcc.target/riscv/sat_s_add-6.c  | 32 +++
 gcc/testsuite/gcc.target/riscv/sat_s_add-7.c  | 31 ++
 gcc/testsuite/gcc.target/riscv/sat_s_add-8.c  | 29 +
 .../gcc.target/riscv/sat_s_add-run-5.c| 16 ++
 .../gcc.target/riscv/sat_s_add-run-6.c| 16 ++
 .../gcc.target/riscv/sat_s_add-run-7.c| 16 ++
 .../gcc.target/riscv/sat_s_add-run-8.c| 16 ++
 9 files changed, 199 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-7.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-8.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-7.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-8.c

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index a8672f66322..b4fbf5dc662 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -132,9 +132,22 @@ sat_s_add_##T##_fmt_1 (T x, T y) \
 #define DEF_SAT_S_ADD_FMT_1_WRAP(T, UT, MIN, MAX) \
   DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX)
 
+#define DEF_SAT_S_ADD_FMT_2(T, UT, MIN, MAX) \
+T __attribute__((noinline))  \
+sat_s_add_##T##_fmt_2 (T x, T y) \
+{\
+  T sum = (UT)x + (UT)y; \
+  if ((x ^ y) < 0 || (sum ^ x) >= 0) \
+return sum;  \
+  return x < 0 ? MIN : MAX;  \
+}
+
 #define RUN_SAT_S_ADD_FMT_1(T, x, y) sat_s_add_##T##_fmt_1(x, y)
 #define RUN_SAT_S_ADD_FMT_1_WRAP(T, x, y) RUN_SAT_S_ADD_FMT_1(T, x, y)
 
+#define RUN_SAT_S_ADD_FMT_2(T, x, y) sat_s_add_##T##_fmt_2(x, y)
+#define RUN_SAT_S_ADD_FMT_2_WRAP(T, x, y) RUN_SAT_S_ADD_FMT_2(T, x, y)
+
 
/**/
 /* Saturation Sub (Unsigned and Signed)   
*/
 
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-5.c 
b/gcc/testsuite/gcc.target/riscv/sat_s_add-5.c
new file mode 100644
index 000..b644022eb4e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-5.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_s_add_int8_t_fmt_2:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** xor\s+[atx][0-9]+,\s*a0,\s*a1
+** xor\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*7
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*7
+** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
+** srai\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63
+** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*127
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** or\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slliw\s+a0,\s*a0,\s*24
+** sraiw\s+a0,\s*a0,\s*24
+** ret
+*/
+DEF_SAT_S_ADD_FMT_2(int8_t, uint8_t, INT8_MIN, INT8_MAX)
+
+/* { dg-final { scan-rtl-dump-ti

[PATCH v1] RISC-V: Fix signed SAT_ADD test case for int64_t

2024-09-12 Thread pan2 . li
From: Pan Li 

The int8_t test for signed SAT_ADD is sat_s_add-1.c, the sat_s_add-4.c
should be for int64_t.  Thus, update sat_s_add-4.c for int64_t type.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_s_add-4.c: Update test for int64_t
instead of int8_t.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.target/riscv/sat_s_add-4.c | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-4.c 
b/gcc/testsuite/gcc.target/riscv/sat_s_add-4.c
index f85675c1a05..12c9540eaec 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_s_add-4.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-4.c
@@ -5,26 +5,25 @@
 #include "sat_arith.h"
 
 /*
-** sat_s_add_int8_t_fmt_1:
+** sat_s_add_int64_t_fmt_1:
 ** add\s+[atx][0-9]+,\s*a0,\s*a1
 ** xor\s+[atx][0-9]+,\s*a0,\s*a1
 ** xor\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
-** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*7
-** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*7
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63
 ** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
 ** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
-** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
 ** srai\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63
-** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*127
+** li\s+[atx][0-9]+,\s*-1
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
+** xor\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
 ** neg\s+[atx][0-9]+,\s*[atx][0-9]+
 ** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
 ** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
 ** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
 ** or\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+
-** slliw\s+a0,\s*a0,\s*24
-** sraiw\s+a0,\s*a0,\s*24
 ** ret
 */
-DEF_SAT_S_ADD_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX)
+DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)
 
 /* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
-- 
2.43.0



[PATCH v4 4/4] RISC-V: Fix vector SAT_ADD dump check due to middle-end change

2024-09-12 Thread pan2 . li
From: Pan Li 

This patch would like fix the dump check times of vector SAT_ADD.  The
middle-end change makes the match times from 2 to 4 times.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Adjust
the dump check times from 2 to 4.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c| 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c| 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c| 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c| 2 +-
 16 files changed, 16 insertions(+), 16 deletions(-)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c
index c525ba97c52..47dd5012cc6 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c
@@ -15,4 +15,4 @@
 */
 DEF_VEC_SAT_U_ADD_FMT_6(uint8_t)
 
-/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c
index 41372d08e52..df8d5a8d275 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c
@@ -15,4 +15,4 @@
 */
 DEF_VEC_SAT_U_ADD_FMT_6(uint16_t)
 
-/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c
index dddebb54426..f286bd10e4b 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c
@@ -15,4 +15,4 @@
 */
 DEF_VEC_SAT_U_ADD_FMT_6(uint32_t)
 
-/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c
index ad5162d10a0..307ff36cc35 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c
@@ -15,4 +15,4 @@
 */
 DEF_VEC_SAT_U_ADD_FMT_6(uint64_t)
 
-/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c
index 39c20b3cea6..3218962724c 10064

[PATCH v4 3/4] Match: Support form 3 for scalar signed integer .SAT_ADD

2024-09-12 Thread pan2 . li
From: Pan Li 

This patch would like to support the form 3 of the scalar signed
integer .SAT_ADD.  Aka below example:

Form 3:
  #define DEF_SAT_S_ADD_FMT_3(T, UT, MIN, MAX)   \
  T __attribute__((noinline))\
  sat_s_add_##T##_fmt_3 (T x, T y)   \
  {  \
T sum;   \
bool overflow = __builtin_add_overflow (x, y, &sum); \
return overflow ? x < 0 ? MIN : MAX : sum;   \
  }

DEF_SAT_S_ADD_FMT_3(int8_t, uint8_t, INT8_MIN, INT8_MAX)

We can tell the difference before and after this patch if backend
implemented the ssadd3 pattern similar as below.

Before this patch:
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y)
   6   │ {
   7   │   signed char _1;
   8   │   signed char _2;
   9   │   int8_t _3;
  10   │   __complex__ signed char _6;
  11   │   _Bool _8;
  12   │   signed char _9;
  13   │   signed char _10;
  14   │   signed char _11;
  15   │
  16   │ ;;   basic block 2, loop depth 0
  17   │ ;;pred:   ENTRY
  18   │   _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
  19   │   _2 = IMAGPART_EXPR <_6>;
  20   │   if (_2 != 0)
  21   │ goto ; [50.00%]
  22   │   else
  23   │ goto ; [50.00%]
  24   │ ;;succ:   4
  25   │ ;;3
  26   │
  27   │ ;;   basic block 3, loop depth 0
  28   │ ;;pred:   2
  29   │   _1 = REALPART_EXPR <_6>;
  30   │   goto ; [100.00%]
  31   │ ;;succ:   5
  32   │
  33   │ ;;   basic block 4, loop depth 0
  34   │ ;;pred:   2
  35   │   _8 = x_4(D) < 0;
  36   │   _9 = (signed char) _8;
  37   │   _10 = -_9;
  38   │   _11 = _10 ^ 127;
  39   │ ;;succ:   5
  40   │
  41   │ ;;   basic block 5, loop depth 0
  42   │ ;;pred:   3
  43   │ ;;4
  44   │   # _3 = PHI <_1(3), _11(4)>
  45   │   return _3;
  46   │ ;;succ:   EXIT
  47   │
  48   │ }

After this patch:
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y)
   6   │ {
   7   │   int8_t _3;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;pred:   ENTRY
  11   │   _3 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
  12   │   return _3;
  13   │ ;;succ:   EXIT
  14   │
  15   │ }

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add the form 3 of signed .SAT_ADD matching.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 4cef965c9c7..167b1b106dd 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3237,6 +3237,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
@2)
  (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type
 
+/* Signed saturation add, case 4:
+   Z = .ADD_OVERFLOW (X, Y)
+   SAT_S_ADD = IMAGPART_EXPR (Z) != 0 ? (-(T)(X < 0) ^ MAX) : sum;  */
+(match (signed_integer_sat_add @0 @1)
+ (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop)
+   (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
+   (realpart @2))
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
+
 /* Unsigned saturation sub, case 1 (branch with gt):
SAT_U_SUB = X > Y ? X - Y : 0  */
 (match (unsigned_integer_sat_sub @0 @1)
-- 
2.43.0



[PATCH v4 2/4] Genmatch: Refine the gen_phi_on_cond by match_cond_with_binary_phi

2024-09-12 Thread pan2 . li
From: Pan Li 

This patch would like to leverage the match_cond_with_binary_phi to
match the phi on cond, and get the true/false arg if matched.  This
helps a lot to simplify the implementation of gen_phi_on_cond.

Before this patch:
basic_block _b1 = gimple_bb (_a1);
if (gimple_phi_num_args (_a1) == 2)
  {
basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src;
basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src;
basic_block _db_1 = safe_dyn_cast  (*gsi_last_bb (_pb_0_1)) ? 
_pb_0_1 : _pb_1_1;
basic_block _other_db_1 = safe_dyn_cast  (*gsi_last_bb (_pb_0_1)) 
? _pb_1_1 : _pb_0_1;
gcond *_ct_1 = safe_dyn_cast  (*gsi_last_bb (_db_1));
if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1
&& EDGE_COUNT (_other_db_1->succs) == 1
&& EDGE_PRED (_other_db_1, 0)->src == _db_1)
{
  tree _cond_lhs_1 = gimple_cond_lhs (_ct_1);
  tree _cond_rhs_1 = gimple_cond_rhs (_ct_1);
  tree _p0 = build2 (gimple_cond_code (_ct_1), boolean_type_node, 
_cond_lhs_1, _cond_rhs_1);
  bool _arg_0_is_true_1 = gimple_phi_arg_edge (_a1, 0)->flags & 
EDGE_TRUE_VALUE;
  tree _p1 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 0 : 1);
  tree _p2 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 1 : 0);
...

After this patch:
basic_block _b1 = gimple_bb (_a1);
tree _p1, _p2;
gcond *_cond_1 = match_cond_with_binary_phi (_a1, &_p1, &_p2);
if (_cond_1 && _p1 && _p2)
  {
tree _cond_lhs_1 = gimple_cond_lhs (_cond_1);
tree _cond_rhs_1 = gimple_cond_rhs (_cond_1);
tree _p0 = build2 (gimple_cond_code (_cond_1), boolean_type_node, 
_cond_lhs_1, _cond_rhs_1);
...

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* genmatch.cc (dt_operand::gen_phi_on_cond): Leverage the
match_cond_with_binary_phi API to get cond gimple, true and
false TREE arg.

Signed-off-by: Pan Li 
---
 gcc/genmatch.cc | 67 +++--
 1 file changed, 15 insertions(+), 52 deletions(-)

diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
index a56bd90cb2c..e3d2ecc6266 100644
--- a/gcc/genmatch.cc
+++ b/gcc/genmatch.cc
@@ -3516,79 +3516,42 @@ dt_operand::gen (FILE *f, int indent, bool gimple, int 
depth)
 void
 dt_operand::gen_phi_on_cond (FILE *f, int indent, int depth)
 {
-  fprintf_indent (f, indent,
-"basic_block _b%d = gimple_bb (_a%d);\n", depth, depth);
-
-  fprintf_indent (f, indent, "if (gimple_phi_num_args (_a%d) == 2)\n", depth);
+  char opname_0[20];
+  char opname_1[20];
+  char opname_2[20];
 
-  indent += 2;
-  fprintf_indent (f, indent, "{\n");
-  indent += 2;
+  gen_opname (opname_0, 0);
+  gen_opname (opname_1, 1);
+  gen_opname (opname_2, 2);
 
   fprintf_indent (f, indent,
-"basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth);
-  fprintf_indent (f, indent,
-"basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth);
-  fprintf_indent (f, indent,
-"basic_block _db_%d = safe_dyn_cast  (*gsi_last_bb (_pb_0_%d)) ? "
-"_pb_0_%d : _pb_1_%d;\n", depth, depth, depth, depth);
+"basic_block _b%d = gimple_bb (_a%d);\n", depth, depth);
+  fprintf_indent (f, indent, "tree %s, %s;\n", opname_1, opname_2);
   fprintf_indent (f, indent,
-"basic_block _other_db_%d = safe_dyn_cast  "
-"(*gsi_last_bb (_pb_0_%d)) ? _pb_1_%d : _pb_0_%d;\n",
-depth, depth, depth, depth);
+"gcond *_cond_%d = match_cond_with_binary_phi (_a%d, &%s, &%s);\n",
+depth, depth, opname_1, opname_2);
 
-  fprintf_indent (f, indent,
-"gcond *_ct_%d = safe_dyn_cast  (*gsi_last_bb (_db_%d));\n",
-depth, depth);
-  fprintf_indent (f, indent, "if (_ct_%d"
-" && EDGE_COUNT (_other_db_%d->preds) == 1\n", depth, depth);
-  fprintf_indent (f, indent,
-"  && EDGE_COUNT (_other_db_%d->succs) == 1\n", depth);
-  fprintf_indent (f, indent,
-"  && EDGE_PRED (_other_db_%d, 0)->src == _db_%d)\n", depth, depth);
+  fprintf_indent (f, indent, "if (_cond_%d && %s && %s)\n",
+depth, opname_1, opname_2);
 
   indent += 2;
   fprintf_indent (f, indent, "{\n");
   indent += 2;
 
   fprintf_indent (f, indent,
-"tree _cond_lhs_%d = gimple_cond_lhs (_ct_%d);\n", depth, depth);
+"tree _cond_lhs_%d = gimple_cond_lhs (_cond_%d);\n", depth, depth);
   fprintf_indent (f, indent,
-"tree _cond_rhs_%d = gimple_cond_rhs (_ct_%d);\n", depth, depth);
-
-  char opname_0[20];
-  char opname_1[20];
-  char opname_2[20];
-  gen_opname (opname_0, 0);
-
+"tree _cond_rhs_%d = gimple_cond_rhs (_cond_%d);\n", depth, depth);
   fprintf_indent (f, indent,
-"tree %s = build2 (gimple_cond_code (_ct_%d), "
+"tree %s = build2 (gimple_cond_code (_cond_%d), "
 "boolean_type_node, _cond_lhs_%d, _cond_rhs_%d);\n",
 opname_0, depth, depth, depth);
 
-  fprintf_indent (f, indent,
-"bool _arg_0_is_true_%d = gimple_phi_arg_edge (_a%d, 0)->flags"
-" & EDGE_TRUE_VALUE;\n", depth, depth);
-
-  ge

[PATCH v4 1/4] Match: Add interface match_cond_with_binary_phi for true/false arg

2024-09-12 Thread pan2 . li
From: Pan Li 

When matching the cond with 2 args phi node, we need to figure out
which arg of phi node comes from the true edge of cond block, as
well as the false edge.  This patch would like to add interface
to perform the action and return the true and false arg in TREE type.

There will be some additional handling if one of the arg is INTEGER_CST.
Because the INTEGER_CST args may have no source block, thus its' edge
source points to the condition block.  See below example in line 31,
the 255 INTEGER_CST has block 2 as source.  Thus, we need to find
the non-INTEGER_CST (aka _1) to tell which one is the true/false edge.
For example, the _1(3) takes block 3 as source, which is the dest
of false edge of the condition block.

   4   │ __attribute__((noinline))
   5   │ uint8_t sat_u_add_imm_type_check_uint8_t_fmt_2 (uint8_t x)
   6   │ {
   7   │   unsigned char _1;
   8   │   unsigned char _2;
   9   │   uint8_t _3;
  10   │   __complex__ unsigned char _5;
  11   │
  12   │ ;;   basic block 2, loop depth 0
  13   │ ;;pred:   ENTRY
  14   │   _5 = .ADD_OVERFLOW (x_4(D), 9);
  15   │   _2 = IMAGPART_EXPR <_5>;
  16   │   if (_2 != 0)
  17   │ goto ; [35.00%]
  18   │   else
  19   │ goto ; [65.00%]
  20   │ ;;succ:   3
  21   │ ;;4
  22   │
  23   │ ;;   basic block 3, loop depth 0
  24   │ ;;pred:   2
  25   │   _1 = REALPART_EXPR <_5>;
  26   │ ;;succ:   4
  27   │
  28   │ ;;   basic block 4, loop depth 0
  29   │ ;;pred:   2
  30   │ ;;3
  31   │   # _3 = PHI <255(2), _1(3)>
  32   │   return _3;
  33   │ ;;succ:   EXIT
  34   │
  35   │ }

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* gimple-match-head.cc (match_cond_with_binary_phi): Add new func
impl to match binary phi for true and false arg.

Signed-off-by: Pan Li 
---
 gcc/gimple-match-head.cc | 118 +++
 1 file changed, 118 insertions(+)

diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc
index 924d3f1e710..6e7a3a0d62e 100644
--- a/gcc/gimple-match-head.cc
+++ b/gcc/gimple-match-head.cc
@@ -375,3 +375,121 @@ gimple_bitwise_inverted_equal_p (tree expr1, tree expr2, 
bool &wascmp, tree (*va
 return true;
   return false;
 }
+
+/*
+ * Return the relevant gcond * of the given phi, as well as the true
+ * and false TREE args of the phi.  Or return NULL.
+ *
+ * If matched the gcond *, the output argument TREE true_arg and false_arg
+ * will be updated to the relevant args of phi.
+ *
+ * If failed to match, NULL gcond * will be returned, as well as the output
+ * arguments will be set to NULL_TREE.
+ */
+
+static inline gcond *
+match_cond_with_binary_phi (gphi *phi, tree *true_arg, tree *false_arg)
+{
+  *true_arg = *false_arg = NULL_TREE;
+
+  if (gimple_phi_num_args (phi) != 2
+  || EDGE_COUNT (gimple_bb (phi)->preds) != 2)
+return NULL;
+
+  basic_block pred_0 = EDGE_PRED (gimple_bb (phi), 0)->src;
+  basic_block pred_1 = EDGE_PRED (gimple_bb (phi), 1)->src;
+  basic_block cond_block = NULL;
+
+  if ((EDGE_COUNT (pred_0->succs) == 2 && EDGE_COUNT (pred_1->succs) == 1)
+ || (EDGE_COUNT (pred_0->succs) == 1 && EDGE_COUNT (pred_1->succs) == 2))
+{
+  /* For below control flow graph:
+   *|
+   *v
+   * +--+
+   * | b0:  |
+   * | def  |   +-+
+   * | ...  |   | b1: |
+   * | cond |-->| def |
+   * +--+   | ... |
+   *|   +-+
+   *|  |
+   *v  |
+   * +-+   |
+   * | b2: |   |
+   * | def |<--+
+   * +-+
+   */
+  basic_block b0 = EDGE_COUNT (pred_0->succs) == 2 ? pred_0 : pred_1;
+  basic_block b1 = EDGE_COUNT (pred_0->succs) == 1 ? pred_0 : pred_1;
+
+  if (EDGE_COUNT (b1->preds) == 1 && EDGE_PRED (b1, 0)->src == b0)
+   cond_block = b0;
+}
+
+  if (EDGE_COUNT (pred_0->succs) == 1 && EDGE_COUNT (pred_0->preds) == 1
+  && EDGE_COUNT (pred_1->succs) == 1 && EDGE_COUNT (pred_1->preds) == 1)
+{
+  /* For below control flow graph:
+   *|
+   *v
+   * +--+
+   * | b0:  |
+   * | ...  |   +-+
+   * | cond |-->| b2: |
+   * +--+   | ... |
+   *|   +-+
+   *|  |
+   *v  |
+   * +-+   |
+   * | b1: |   |
+   * | ... |   |
+   * +-+   |
+   *|  |
+   *|  |
+   *v  |
+   * +-+   |
+   * | b3: |<--+
+   * | ... |
+   * +-+
+   */
+  basic_block b0 = EDGE_PRED (pred_0, 0)->src;
+
+  if (EDGE_COUNT (b0->succs) == 2 && EDGE_PRED (pred_1, 0)->src == b0)
+   cond_block = b0;
+}
+

[PATCH v1] RISC-V: Implement SAT_ADD for signed integer vector

2024-09-11 Thread pan2 . li
From: Pan Li 

This patch would like to implement the ssadd for vector integer.  Aka
form 1 of ssadd vector.

Form 1:
  #define DEF_VEC_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \
  void __attribute__((noinline))   \
  vec_sat_s_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  {\
T x = op_1[i]; \
T y = op_2[i]; \
T sum = (UT)x + (UT)y; \
out[i] = (x ^ y) < 0   \
  ? sum\
  : (sum ^ x) >= 0 \
? sum  \
: x < 0 ? MIN : MAX;   \
  }\
  }

DEF_VEC_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)

Before this patch:
vec_sat_s_add_int64_t_fmt_1:
  ...
  vsetvli  t1,zero,e64,m1,ta,mu
  vadd.vv  v3,v1,v2
  vxor.vv  v0,v1,v3
  vmslt.vi v0,v0,0
  vxor.vv  v2,v1,v2
  vmsge.vi v2,v2,0
  vmand.mm v0,v0,v2
  vsra.vx  v1,v1,t3
  vxor.vv  v3,v1,v4,v0.t
  ...

After this patch:
vec_sat_s_add_int64_t_fmt_1:
  ...
  vsetvli  a6,zero,e64,m1,ta,ma
  vsadd.vv v1,v1,v2
  ...

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/autovec.md (ssadd3): Add new pattern for
signed integer vector SAT_ADD.
* config/riscv/riscv-protos.h (expand_vec_ssadd): Add new func
decl for vector ssadd expanding.
* config/riscv/riscv-v.cc (expand_vec_ssadd): Add new func impl
to expand vector ssadd pattern.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_data.h: Add test
data for vector ssadd.
* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper
macros.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-1.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-2.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-3.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-4.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-3.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-4.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/autovec.md   |  11 +
 gcc/config/riscv/riscv-protos.h   |   1 +
 gcc/config/riscv/riscv-v.cc   |   9 +
 .../riscv/rvv/autovec/binop/vec_sat_data.h| 264 ++
 .../riscv/rvv/autovec/binop/vec_sat_s_add-1.c |  18 ++
 .../riscv/rvv/autovec/binop/vec_sat_s_add-2.c |  18 ++
 .../riscv/rvv/autovec/binop/vec_sat_s_add-3.c |  18 ++
 .../riscv/rvv/autovec/binop/vec_sat_s_add-4.c |  18 ++
 .../rvv/autovec/binop/vec_sat_s_add-run-1.c   |  17 ++
 .../rvv/autovec/binop/vec_sat_s_add-run-2.c   |  17 ++
 .../rvv/autovec/binop/vec_sat_s_add-run-3.c   |  17 ++
 .../rvv/autovec/binop/vec_sat_s_add-run-4.c   |  17 ++
 .../riscv/rvv/autovec/vec_sat_arith.h |  25 ++
 13 files changed, 450 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-2.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-4.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-2.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-4.c

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index a4e108268b4..a53c44659f0 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2684,6 +2684,17 @@ (define_expand "usadd3"
   }
 )
 
+(define_expand "ssadd3"
+  [(match_operand:V_VLSI 0 "register_operand")
+   (match_operand:V_VLSI 1 "register_operand")
+   (match_operand:V_VLSI 2 "register_operand")]
+  "TARGET_VECTOR"
+  {
+riscv_vector::expand_vec_ssadd (operands[0], operands[1], op

[PATCH v3 5/5] RISC-V: Fix vector SAT_ADD dump check due to middle-end change

2024-09-10 Thread pan2 . li
From: Pan Li 

This patch would like fix the dump check times of vector SAT_ADD.  The
middle-end change makes the match times from 2 to 4 times.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Adjust
the dump check times from 2 to 4.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c| 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c| 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c| 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c| 2 +-
 16 files changed, 16 insertions(+), 16 deletions(-)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c
index c525ba97c52..47dd5012cc6 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c
@@ -15,4 +15,4 @@
 */
 DEF_VEC_SAT_U_ADD_FMT_6(uint8_t)
 
-/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c
index 41372d08e52..df8d5a8d275 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c
@@ -15,4 +15,4 @@
 */
 DEF_VEC_SAT_U_ADD_FMT_6(uint16_t)
 
-/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c
index dddebb54426..f286bd10e4b 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c
@@ -15,4 +15,4 @@
 */
 DEF_VEC_SAT_U_ADD_FMT_6(uint32_t)
 
-/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c
index ad5162d10a0..307ff36cc35 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c
@@ -15,4 +15,4 @@
 */
 DEF_VEC_SAT_U_ADD_FMT_6(uint64_t)
 
-/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c
index 39c20b3cea6..3218962724c 10064

[PATCH v3 2/5] Match: Add interface match_cond_with_binary_phi for true/false arg

2024-09-10 Thread pan2 . li
From: Pan Li 

When matching the cond with 2 args phi node, we need to figure out
which arg of phi node comes from the true edge of cond block, as
well as the false edge.  This patch would like to add interface
to perform the action and return the true and false arg in TREE type.

There will be some additional handling if one of the arg is INTEGER_CST.
Because the INTEGER_CST args may have no source block, thus its' edge
source points to the condition block.  See below example in line 31,
the 255 INTEGER_CST has block 2 as source.  Thus, we need to find
the non-INTEGER_CST (aka _1) to tell which one is the true/false edge.
For example, the _1(3) takes block 3 as source, which is the dest
of false edge of the condition block.

   4   │ __attribute__((noinline))
   5   │ uint8_t sat_u_add_imm_type_check_uint8_t_fmt_2 (uint8_t x)
   6   │ {
   7   │   unsigned char _1;
   8   │   unsigned char _2;
   9   │   uint8_t _3;
  10   │   __complex__ unsigned char _5;
  11   │
  12   │ ;;   basic block 2, loop depth 0
  13   │ ;;pred:   ENTRY
  14   │   _5 = .ADD_OVERFLOW (x_4(D), 9);
  15   │   _2 = IMAGPART_EXPR <_5>;
  16   │   if (_2 != 0)
  17   │ goto ; [35.00%]
  18   │   else
  19   │ goto ; [65.00%]
  20   │ ;;succ:   3
  21   │ ;;4
  22   │
  23   │ ;;   basic block 3, loop depth 0
  24   │ ;;pred:   2
  25   │   _1 = REALPART_EXPR <_5>;
  26   │ ;;succ:   4
  27   │
  28   │ ;;   basic block 4, loop depth 0
  29   │ ;;pred:   2
  30   │ ;;3
  31   │   # _3 = PHI <255(2), _1(3)>
  32   │   return _3;
  33   │ ;;succ:   EXIT
  34   │
  35   │ }

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* gimple-match-head.cc (match_cond_with_binary_phi): Add new func
impl to match binary phi for true and false arg.

Signed-off-by: Pan Li 
---
 gcc/gimple-match-head.cc | 60 
 1 file changed, 60 insertions(+)

diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc
index c51728ae742..64f4f28cc72 100644
--- a/gcc/gimple-match-head.cc
+++ b/gcc/gimple-match-head.cc
@@ -490,3 +490,63 @@ match_control_flow_graph_case_1 (basic_block b3, 
basic_block *b_out)
   *b_out = b0;
   return true;
 }
+
+/*
+ * Return the relevant gcond * of the given phi, as well as the true
+ * and false TREE args of the phi.  Or return NULL.
+ *
+ * If matched the gcond *, the output argument TREE true_arg and false_arg
+ * will be updated to the relevant args of phi.
+ *
+ * If failed to match, NULL gcond * will be returned, as well as the output
+ * arguments will be set to NULL_TREE.
+ */
+
+static inline gcond *
+match_cond_with_binary_phi (gphi *phi, tree *true_arg, tree *false_arg)
+{
+  basic_block cond_block;
+  *true_arg = *false_arg = NULL_TREE;
+
+  if (gimple_phi_num_args (phi) != 2)
+return NULL;
+
+  if (!match_control_flow_graph_case_0 (gimple_bb (phi), &cond_block)
+  && !match_control_flow_graph_case_1 (gimple_bb (phi), &cond_block))
+return NULL;
+
+  gcond *cond = safe_dyn_cast  (*gsi_last_bb (cond_block));
+
+  if (!cond || EDGE_COUNT (cond_block->succs) != 2)
+return NULL;
+
+  tree t0 = gimple_phi_arg_def (phi, 0);
+  tree t1 = gimple_phi_arg_def (phi, 1);
+  edge e0 = gimple_phi_arg_edge (phi, 0);
+  edge e1 = gimple_phi_arg_edge (phi, 1);
+
+  if (TREE_CODE (t0) == INTEGER_CST && TREE_CODE (t1) == INTEGER_CST)
+return NULL;
+
+  bool arg_0_cst_p = TREE_CODE (t0) == INTEGER_CST;
+  edge arg_edge = arg_0_cst_p ? e1 : e0;
+  tree arg = arg_0_cst_p ? t1 : t0;
+  tree other_arg = arg_0_cst_p ? t0 : t1;
+
+  edge cond_e0 = EDGE_SUCC (cond_block, 0);
+  edge cond_e1 = EDGE_SUCC (cond_block, 1);
+  edge matched_edge = arg_edge->src == cond_e0->dest ? cond_e0 : cond_e1;
+
+  if (matched_edge->flags & EDGE_TRUE_VALUE)
+{
+  *true_arg = arg;
+  *false_arg = other_arg;
+}
+  else
+{
+  *false_arg = arg;
+  *true_arg = other_arg;
+}
+
+  return cond;
+}
-- 
2.43.0



[PATCH v3 4/5] Match: Support form 3 for scalar signed integer .SAT_ADD

2024-09-10 Thread pan2 . li
From: Pan Li 

This patch would like to support the form 3 of the scalar signed
integer .SAT_ADD.  Aka below example:

Form 3:
  #define DEF_SAT_S_ADD_FMT_3(T, UT, MIN, MAX)   \
  T __attribute__((noinline))\
  sat_s_add_##T##_fmt_3 (T x, T y)   \
  {  \
T sum;   \
bool overflow = __builtin_add_overflow (x, y, &sum); \
return overflow ? x < 0 ? MIN : MAX : sum;   \
  }

DEF_SAT_S_ADD_FMT_3(int8_t, uint8_t, INT8_MIN, INT8_MAX)

We can tell the difference before and after this patch if backend
implemented the ssadd3 pattern similar as below.

Before this patch:
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y)
   6   │ {
   7   │   signed char _1;
   8   │   signed char _2;
   9   │   int8_t _3;
  10   │   __complex__ signed char _6;
  11   │   _Bool _8;
  12   │   signed char _9;
  13   │   signed char _10;
  14   │   signed char _11;
  15   │
  16   │ ;;   basic block 2, loop depth 0
  17   │ ;;pred:   ENTRY
  18   │   _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
  19   │   _2 = IMAGPART_EXPR <_6>;
  20   │   if (_2 != 0)
  21   │ goto ; [50.00%]
  22   │   else
  23   │ goto ; [50.00%]
  24   │ ;;succ:   4
  25   │ ;;3
  26   │
  27   │ ;;   basic block 3, loop depth 0
  28   │ ;;pred:   2
  29   │   _1 = REALPART_EXPR <_6>;
  30   │   goto ; [100.00%]
  31   │ ;;succ:   5
  32   │
  33   │ ;;   basic block 4, loop depth 0
  34   │ ;;pred:   2
  35   │   _8 = x_4(D) < 0;
  36   │   _9 = (signed char) _8;
  37   │   _10 = -_9;
  38   │   _11 = _10 ^ 127;
  39   │ ;;succ:   5
  40   │
  41   │ ;;   basic block 5, loop depth 0
  42   │ ;;pred:   3
  43   │ ;;4
  44   │   # _3 = PHI <_1(3), _11(4)>
  45   │   return _3;
  46   │ ;;succ:   EXIT
  47   │
  48   │ }

After this patch:
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y)
   6   │ {
   7   │   int8_t _3;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;pred:   ENTRY
  11   │   _3 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
  12   │   return _3;
  13   │ ;;succ:   EXIT
  14   │
  15   │ }

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add the form 3 of signed .SAT_ADD matching.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 4cef965c9c7..167b1b106dd 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3237,6 +3237,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
@2)
  (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type
 
+/* Signed saturation add, case 4:
+   Z = .ADD_OVERFLOW (X, Y)
+   SAT_S_ADD = IMAGPART_EXPR (Z) != 0 ? (-(T)(X < 0) ^ MAX) : sum;  */
+(match (signed_integer_sat_add @0 @1)
+ (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop)
+   (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
+   (realpart @2))
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
+
 /* Unsigned saturation sub, case 1 (branch with gt):
SAT_U_SUB = X > Y ? X - Y : 0  */
 (match (unsigned_integer_sat_sub @0 @1)
-- 
2.43.0



[PATCH v3 3/5] Genmatch: Refine the gen_phi_on_cond by match_cond_with_binary_phi

2024-09-10 Thread pan2 . li
From: Pan Li 

This patch would like to leverage the match_cond_with_binary_phi to
match the phi on cond, and get the true/false arg if matched.  This
helps a lot to simplify the implementation of gen_phi_on_cond.

Before this patch:
basic_block _b1 = gimple_bb (_a1);
if (gimple_phi_num_args (_a1) == 2)
  {
basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src;
basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src;
basic_block _db_1 = safe_dyn_cast  (*gsi_last_bb (_pb_0_1)) ? 
_pb_0_1 : _pb_1_1;
basic_block _other_db_1 = safe_dyn_cast  (*gsi_last_bb (_pb_0_1)) 
? _pb_1_1 : _pb_0_1;
gcond *_ct_1 = safe_dyn_cast  (*gsi_last_bb (_db_1));
if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1
&& EDGE_COUNT (_other_db_1->succs) == 1
&& EDGE_PRED (_other_db_1, 0)->src == _db_1)
{
  tree _cond_lhs_1 = gimple_cond_lhs (_ct_1);
  tree _cond_rhs_1 = gimple_cond_rhs (_ct_1);
  tree _p0 = build2 (gimple_cond_code (_ct_1), boolean_type_node, 
_cond_lhs_1, _cond_rhs_1);
  bool _arg_0_is_true_1 = gimple_phi_arg_edge (_a1, 0)->flags & 
EDGE_TRUE_VALUE;
  tree _p1 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 0 : 1);
  tree _p2 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 1 : 0);
...

After this patch:
basic_block _b1 = gimple_bb (_a1);
tree _p1, _p2;
gcond *_cond_1 = match_cond_with_binary_phi (_a1, &_p1, &_p2);
if (_cond_1 && _p1 && _p2)
  {
tree _cond_lhs_1 = gimple_cond_lhs (_cond_1);
tree _cond_rhs_1 = gimple_cond_rhs (_cond_1);
tree _p0 = build2 (gimple_cond_code (_cond_1), boolean_type_node, 
_cond_lhs_1, _cond_rhs_1);
...

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* genmatch.cc (dt_operand::gen_phi_on_cond): Leverage the
match_cond_with_binary_phi API to get cond gimple, true and
false TREE arg.

Signed-off-by: Pan Li 
---
 gcc/genmatch.cc | 67 +++--
 1 file changed, 15 insertions(+), 52 deletions(-)

diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
index a56bd90cb2c..e3d2ecc6266 100644
--- a/gcc/genmatch.cc
+++ b/gcc/genmatch.cc
@@ -3516,79 +3516,42 @@ dt_operand::gen (FILE *f, int indent, bool gimple, int 
depth)
 void
 dt_operand::gen_phi_on_cond (FILE *f, int indent, int depth)
 {
-  fprintf_indent (f, indent,
-"basic_block _b%d = gimple_bb (_a%d);\n", depth, depth);
-
-  fprintf_indent (f, indent, "if (gimple_phi_num_args (_a%d) == 2)\n", depth);
+  char opname_0[20];
+  char opname_1[20];
+  char opname_2[20];
 
-  indent += 2;
-  fprintf_indent (f, indent, "{\n");
-  indent += 2;
+  gen_opname (opname_0, 0);
+  gen_opname (opname_1, 1);
+  gen_opname (opname_2, 2);
 
   fprintf_indent (f, indent,
-"basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth);
-  fprintf_indent (f, indent,
-"basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth);
-  fprintf_indent (f, indent,
-"basic_block _db_%d = safe_dyn_cast  (*gsi_last_bb (_pb_0_%d)) ? "
-"_pb_0_%d : _pb_1_%d;\n", depth, depth, depth, depth);
+"basic_block _b%d = gimple_bb (_a%d);\n", depth, depth);
+  fprintf_indent (f, indent, "tree %s, %s;\n", opname_1, opname_2);
   fprintf_indent (f, indent,
-"basic_block _other_db_%d = safe_dyn_cast  "
-"(*gsi_last_bb (_pb_0_%d)) ? _pb_1_%d : _pb_0_%d;\n",
-depth, depth, depth, depth);
+"gcond *_cond_%d = match_cond_with_binary_phi (_a%d, &%s, &%s);\n",
+depth, depth, opname_1, opname_2);
 
-  fprintf_indent (f, indent,
-"gcond *_ct_%d = safe_dyn_cast  (*gsi_last_bb (_db_%d));\n",
-depth, depth);
-  fprintf_indent (f, indent, "if (_ct_%d"
-" && EDGE_COUNT (_other_db_%d->preds) == 1\n", depth, depth);
-  fprintf_indent (f, indent,
-"  && EDGE_COUNT (_other_db_%d->succs) == 1\n", depth);
-  fprintf_indent (f, indent,
-"  && EDGE_PRED (_other_db_%d, 0)->src == _db_%d)\n", depth, depth);
+  fprintf_indent (f, indent, "if (_cond_%d && %s && %s)\n",
+depth, opname_1, opname_2);
 
   indent += 2;
   fprintf_indent (f, indent, "{\n");
   indent += 2;
 
   fprintf_indent (f, indent,
-"tree _cond_lhs_%d = gimple_cond_lhs (_ct_%d);\n", depth, depth);
+"tree _cond_lhs_%d = gimple_cond_lhs (_cond_%d);\n", depth, depth);
   fprintf_indent (f, indent,
-"tree _cond_rhs_%d = gimple_cond_rhs (_ct_%d);\n", depth, depth);
-
-  char opname_0[20];
-  char opname_1[20];
-  char opname_2[20];
-  gen_opname (opname_0, 0);
-
+"tree _cond_rhs_%d = gimple_cond_rhs (_cond_%d);\n", depth, depth);
   fprintf_indent (f, indent,
-"tree %s = build2 (gimple_cond_code (_ct_%d), "
+"tree %s = build2 (gimple_cond_code (_cond_%d), "
 "boolean_type_node, _cond_lhs_%d, _cond_rhs_%d);\n",
 opname_0, depth, depth, depth);
 
-  fprintf_indent (f, indent,
-"bool _arg_0_is_true_%d = gimple_phi_arg_edge (_a%d, 0)->flags"
-" & EDGE_TRUE_VALUE;\n", depth, depth);
-
-  ge

[PATCH v3 1/5] Genmatch: Add control flow graph match for case 0 and case 1

2024-09-10 Thread pan2 . li
From: Pan Li 

The gen_phi_on_cond can only support below control flow for cond
from day 1.  Aka:

+--+
| def  |
| ...  |   +-+
| cond |-->| def |
+--+   | ... |
   |   +-+
   |  |
   v  |
+-+   |
| PHI |<--+
+-+

Unfortunately, there will be more scenarios of control flow on PHI.
For example as below:

T __attribute__((noinline))\
sat_s_add_##T##_fmt_3 (T x, T y)   \
{  \
  T sum;   \
  bool overflow = __builtin_add_overflow (x, y, &sum); \
  return overflow ? x < 0 ? MIN : MAX : sum;   \
}

DEF_SAT_S_ADD_FMT_3(int8_t, uint8_t, INT8_MIN, INT8_MAX)

With expanded RTL like below.
   3   │
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y)
   6   │ {
   7   │   signed char _1;
   8   │   signed char _2;
   9   │   int8_t _3;
  10   │   __complex__ signed char _6;
  11   │   _Bool _8;
  12   │   signed char _9;
  13   │   signed char _10;
  14   │   signed char _11;
  15   │
  16   │ ;;   basic block 2, loop depth 0
  17   │ ;;pred:   ENTRY
  18   │   _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
  19   │   _2 = IMAGPART_EXPR <_6>;
  20   │   if (_2 != 0)
  21   │ goto ; [50.00%]
  22   │   else
  23   │ goto ; [50.00%]
  24   │ ;;succ:   4
  25   │ ;;3
  26   │
  27   │ ;;   basic block 3, loop depth 0
  28   │ ;;pred:   2
  29   │   _1 = REALPART_EXPR <_6>;
  30   │   goto ; [100.00%]
  31   │ ;;succ:   5
  32   │
  33   │ ;;   basic block 4, loop depth 0
  34   │ ;;pred:   2
  35   │   _8 = x_4(D) < 0;
  36   │   _9 = (signed char) _8;
  37   │   _10 = -_9;
  38   │   _11 = _10 ^ 127;
  39   │ ;;succ:   5
  40   │
  41   │ ;;   basic block 5, loop depth 0
  42   │ ;;pred:   3
  43   │ ;;4
  44   │   # _3 = PHI <_1(3), _11(4)>
  45   │   return _3;
  46   │ ;;succ:   EXIT
  47   │
  48   │ }

The above code will have below control flow which is not supported by
the gen_phi_on_cond.

+--+
| def  |
| ...  |   +-+
| cond |-->| def |
+--+   | ... |
   |   +-+
   |  |
   v  |
+-+   |
| def |   |
| ... |   |
+-+   |
   |  |
   |  |
   v  |
+-+   |
| PHI |<--+
+-+

This patch would like to add support above control flow matching for
the gen_phi_on_cond.

The below testsuites are passed for this patch:
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* gimple-match-head.cc (match_control_flow_graph_case_0): Add
new func impl to match case 0 of cfg.
(match_control_flow_graph_case_1): Ditto but for case 1.

Signed-off-by: Pan Li 
---
 gcc/gimple-match-head.cc | 115 +++
 1 file changed, 115 insertions(+)

diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc
index 924d3f1e710..c51728ae742 100644
--- a/gcc/gimple-match-head.cc
+++ b/gcc/gimple-match-head.cc
@@ -375,3 +375,118 @@ gimple_bitwise_inverted_equal_p (tree expr1, tree expr2, 
bool &wascmp, tree (*va
 return true;
   return false;
 }
+
+/*
+ * Return TRUE if the cfg matches the below layout by the given b2 in
+ * the first argument.  Or return FALSE.
+ *
+ * If return TRUE, the output argument b_out will be updated to the b0
+ * block as below example.
+ *
+ * If return FALSE, the output argument b_out will be NULL_BLOCK.
+ *
+ *|
+ *|
+ *v
+ * +--+
+ * | b0:  |
+ * | def  |   +-+
+ * | ...  |   | b1: |
+ * | cond |-->| def |
+ * +--+   | ... |
+ *|   +-+
+ *|  |
+ *v  |
+ * +-+   |
+ * | b2: |   |
+ * | def |<--+
+ * +-+
+ */
+static inline bool
+match_control_flow_graph_case_0 (basic_block b2, basic_block *b_out)
+{
+  *b_out = NULL;
+
+  if (EDGE_COUNT (b2->preds) != 2)
+return false;
+
+  basic_block pred_0 = EDGE_PRED (b2, 0)->src;
+  basic_block pred_1 = EDGE_PRED (b2, 1)->src;
+
+  if (pred_0 == NULL || pred_1 == NULL)
+return false;
+
+  if (!(EDGE_COUNT (pred_0->succs) == 2 && EDGE_COUNT (pred_1->succs) == 1)
+ && !(EDGE_COUNT (pred_0->succs) == 1 && EDGE_COUNT (pred_1->succs) == 2))
+return false;
+
+  basic_block b0 = EDGE_COUNT (pred_0->succs) == 2 ? pred_0 : pred_1;
+  basic_block b1 = EDGE_COUNT (pred_0->succs) == 1 ? pred_0 : pred_1;
+
+  if (EDGE_COUNT (b1->preds) != 1 || EDGE_PRED (b1, 0)->src != b0)
+return false;
+
+  *b_out = b0;
+  return true;
+}
+
+/*
+ * Return TRUE if the cfg matches the below layout by the given b3 in
+ * the first argument.  Or return FALSE.
+ *
+ * If return TRUE, the output argument b_out will be updated to the b0
+ 

[PATCH v1] RISC-V: Fix asm check for Vector SAT_* due to middle-end change

2024-09-10 Thread pan2 . li
From: Pan Li 

The middle-end change makes the effect on the layout of the assembly
for vector SAT_*.  This patch would like to fix it and make it robust.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: Adjust
asm check and make it robust.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-12.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-9.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-10.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-11.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-12.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-13.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-14.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-17.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-18.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-19.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-20.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-21.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-23.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-24.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-25.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-26.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-27.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-28.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-29.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-30.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-31.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-32.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-33.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-34.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-35.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-36.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-37.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-38.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-39.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/b

[PATCH v2 2/2] Match: Support form 3 for scalar signed integer .SAT_ADD

2024-09-05 Thread pan2 . li
From: Pan Li 

This patch would like to support the form 3 of the scalar signed
integer .SAT_ADD.  Aka below example:

Form 3:
  #define DEF_SAT_S_ADD_FMT_3(T, UT, MIN, MAX)   \
  T __attribute__((noinline))\
  sat_s_add_##T##_fmt_3 (T x, T y)   \
  {  \
T sum;   \
bool overflow = __builtin_add_overflow (x, y, &sum); \
return overflow ? x < 0 ? MIN : MAX : sum;   \
  }

DEF_SAT_S_ADD_FMT_3(int8_t, uint8_t, INT8_MIN, INT8_MAX)

We can tell the difference before and after this patch if backend
implemented the ssadd3 pattern similar as below.

Before this patch:
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y)
   6   │ {
   7   │   signed char _1;
   8   │   signed char _2;
   9   │   int8_t _3;
  10   │   __complex__ signed char _6;
  11   │   _Bool _8;
  12   │   signed char _9;
  13   │   signed char _10;
  14   │   signed char _11;
  15   │
  16   │ ;;   basic block 2, loop depth 0
  17   │ ;;pred:   ENTRY
  18   │   _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
  19   │   _2 = IMAGPART_EXPR <_6>;
  20   │   if (_2 != 0)
  21   │ goto ; [50.00%]
  22   │   else
  23   │ goto ; [50.00%]
  24   │ ;;succ:   4
  25   │ ;;3
  26   │
  27   │ ;;   basic block 3, loop depth 0
  28   │ ;;pred:   2
  29   │   _1 = REALPART_EXPR <_6>;
  30   │   goto ; [100.00%]
  31   │ ;;succ:   5
  32   │
  33   │ ;;   basic block 4, loop depth 0
  34   │ ;;pred:   2
  35   │   _8 = x_4(D) < 0;
  36   │   _9 = (signed char) _8;
  37   │   _10 = -_9;
  38   │   _11 = _10 ^ 127;
  39   │ ;;succ:   5
  40   │
  41   │ ;;   basic block 5, loop depth 0
  42   │ ;;pred:   3
  43   │ ;;4
  44   │   # _3 = PHI <_1(3), _11(4)>
  45   │   return _3;
  46   │ ;;succ:   EXIT
  47   │
  48   │ }

After this patch:
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y)
   6   │ {
   7   │   int8_t _3;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;pred:   ENTRY
  11   │   _3 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
  12   │   return _3;
  13   │ ;;succ:   EXIT
  14   │
  15   │ }

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add the form 3 of signed .SAT_ADD matching.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 621306213e4..1d478d42ed5 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3207,6 +3207,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
   && types_match (type, @0, @1
 
+/* Signed saturation add, case 3:
+   Z = .ADD_OVERFLOW (X, Y)
+   SAT_S_ADD = IMAGPART_EXPR (Z) != 0 ? (-(T)(X < 0) ^ MAX) : sum;  */
+(match (signed_integer_sat_add @0 @1)
+ (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop)
+   (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
+   (realpart @2))
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
+
 /* Unsigned saturation sub, case 1 (branch with gt):
SAT_U_SUB = X > Y ? X - Y : 0  */
 (match (unsigned_integer_sat_sub @0 @1)
-- 
2.43.0



[PATCH v2 1/2] Genmatch: Support control flow graph case 1 for phi on condition

2024-09-05 Thread pan2 . li
From: Pan Li 

The gen_phi_on_cond can only support below control flow for cond
from day 1.  Aka:

+--+
| def  |
| ...  |   +-+
| cond |-->| def |
+--+   | ... |
   |   +-+
   |  |
   v  |
+-+   |
| PHI |<--+
+-+

Unfortunately, there will be more scenarios of control flow on PHI.
For example as below:

T __attribute__((noinline))\
sat_s_add_##T##_fmt_3 (T x, T y)   \
{  \
  T sum;   \
  bool overflow = __builtin_add_overflow (x, y, &sum); \
  return overflow ? x < 0 ? MIN : MAX : sum;   \
}

DEF_SAT_S_ADD_FMT_3(int8_t, uint8_t, INT8_MIN, INT8_MAX)

With expanded RTL like below.
   3   │
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y)
   6   │ {
   7   │   signed char _1;
   8   │   signed char _2;
   9   │   int8_t _3;
  10   │   __complex__ signed char _6;
  11   │   _Bool _8;
  12   │   signed char _9;
  13   │   signed char _10;
  14   │   signed char _11;
  15   │
  16   │ ;;   basic block 2, loop depth 0
  17   │ ;;pred:   ENTRY
  18   │   _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
  19   │   _2 = IMAGPART_EXPR <_6>;
  20   │   if (_2 != 0)
  21   │ goto ; [50.00%]
  22   │   else
  23   │ goto ; [50.00%]
  24   │ ;;succ:   4
  25   │ ;;3
  26   │
  27   │ ;;   basic block 3, loop depth 0
  28   │ ;;pred:   2
  29   │   _1 = REALPART_EXPR <_6>;
  30   │   goto ; [100.00%]
  31   │ ;;succ:   5
  32   │
  33   │ ;;   basic block 4, loop depth 0
  34   │ ;;pred:   2
  35   │   _8 = x_4(D) < 0;
  36   │   _9 = (signed char) _8;
  37   │   _10 = -_9;
  38   │   _11 = _10 ^ 127;
  39   │ ;;succ:   5
  40   │
  41   │ ;;   basic block 5, loop depth 0
  42   │ ;;pred:   3
  43   │ ;;4
  44   │   # _3 = PHI <_1(3), _11(4)>
  45   │   return _3;
  46   │ ;;succ:   EXIT
  47   │
  48   │ }

The above code will have below control flow which is not supported by
the gen_phi_on_cond.

+--+
| def  |
| ...  |   +-+
| cond |-->| def |
+--+   | ... |
   |   +-+
   |  |
   v  |
+-+   |
| def |   |
| ... |   |
+-+   |
   |  |
   |  |
   v  |
+-+   |
| PHI |<--+
+-+

This patch would like to add support above control flow for the
gen_phi_on_cond.  The generated match code looks like below.

Before this patch:
basic_block _b1 = gimple_bb (_a1);
if (gimple_phi_num_args (_a1) == 2)
  {
basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src;
basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src;
basic_block _db_1 = safe_dyn_cast  (*gsi_last_bb (_pb_0_1)) ? 
_pb_0_1 : _pb_1_1;
basic_block _other_db_1 = safe_dyn_cast  (*gsi_last_bb (_pb_0_1)) 
? _pb_1_1 : _pb_0_1;
gcond *_ct_1 = safe_dyn_cast  (*gsi_last_bb (_db_1));
if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1
  && EDGE_COUNT (_other_db_1->succs) == 1
  && EDGE_PRED (_other_db_1, 0)->src == _db_1)
  {
...

After this patch:
basic_block _b1 = gimple_bb (_a1);
basic_block _b_cond_1;
if (gimple_phi_num_args (_a1) == 2
&& (control_flow_graph_case_0_match (_b1, &_b_cond_1)
|| control_flow_graph_case_1_match (_b1, &_b_cond_1)))
{
...

The below testsuites are passed for this patch:
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* genmatch.cc (dt_operand::gen_phi_on_cond): Add support control
flow graph case 1 for gen phi on condition.
* gimple-match-head.cc (control_flow_graph_case_0_match): Add
new func impl to match case 0 of cfg.
(control_flow_graph_case_1_match): Ditto but for case 1.

Signed-off-by: Pan Li 
---
 gcc/genmatch.cc  |  37 +
 gcc/gimple-match-head.cc | 115 +++
 2 files changed, 130 insertions(+), 22 deletions(-)

diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
index a56bd90cb2c..e0ec1c0e928 100644
--- a/gcc/genmatch.cc
+++ b/gcc/genmatch.cc
@@ -3518,43 +3518,36 @@ dt_operand::gen_phi_on_cond (FILE *f, int indent, int 
depth)
 {
   fprintf_indent (f, indent,
 "basic_block _b%d = gimple_bb (_a%d);\n", depth, depth);
+  fprintf_indent (f, indent, "basic_block _b_cond_%d;\n", depth);
 
-  fprintf_indent (f, indent, "if (gimple_phi_num_args (_a%d) == 2)\n", depth);
+  fprintf_indent (f, indent, "if (gimple_phi_num_args (_a%d) == 2\n", depth);
 
-  indent += 2;
-  fprintf_indent (f, indent, "{\n");
   indent += 2;
 
   fprintf_indent (f, indent,
-"basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth);
-  fprintf_indent (f, indent,
-"basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth);
-  fprintf_indent (f

[PATCH v1] RISC-V: Fix SAT_* dump check failure due to middle-end change.

2024-09-04 Thread pan2 . li
From: Pan Li 

Some middl-end change may effect on the times of .SAT_*.  Thus,
refine the dump check for SAT_*, from the scan-times to scan as
we only care about the .SAT_* exist or not.  And there will an
other PATCH to perform similar refinement and this PATCH only
fix the failed test cases.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: Refine
the dump check from times to exist.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-1.c: 
Diito.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-10.c: 
Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-13.c: 
Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-14.c: 
Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-15.c: 
Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-2.c: 
Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-5.c: 
Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-6.c: 
Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-9.c: 
Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-10.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-11.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-12.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-13.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-14.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-17.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-18.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-19.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-20.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-21.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-23.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-24.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-33.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-34.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-35.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-36.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-37.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-38.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-39.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-40.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-9.c: Ditto.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c   | 2 +-
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-1.c | 2 +-
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-10.c| 2 +-
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-13.c| 2 +-
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-14.c| 2 +-
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-15.c| 2 +-
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-2.c | 2 +-
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-5.c | 2 +-
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-6.c | 2 +-
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-9.c | 2 +-
 .../gcc.tar

[PATCH v1 2/2] Match: Support form 3 for scalar signed integer .SAT_ADD

2024-09-04 Thread pan2 . li
From: Pan Li 

This patch would like to support the form 3 of the scalar signed
integer .SAT_ADD.  Aka below example:

Form 3:
  #define DEF_SAT_S_ADD_FMT_3(T, UT, MIN, MAX)   \
  T __attribute__((noinline))\
  sat_s_add_##T##_fmt_3 (T x, T y)   \
  {  \
T sum;   \
bool overflow = __builtin_add_overflow (x, y, &sum); \
return overflow ? x < 0 ? MIN : MAX : sum;   \
  }

DEF_SAT_S_ADD_FMT_3(int8_t, uint8_t, INT8_MIN, INT8_MAX)

We can tell the difference before and after this patch if backend
implemented the ssadd3 pattern similar as below.

Before this patch:
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y)
   6   │ {
   7   │   signed char _1;
   8   │   signed char _2;
   9   │   int8_t _3;
  10   │   __complex__ signed char _6;
  11   │   _Bool _8;
  12   │   signed char _9;
  13   │   signed char _10;
  14   │   signed char _11;
  15   │
  16   │ ;;   basic block 2, loop depth 0
  17   │ ;;pred:   ENTRY
  18   │   _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
  19   │   _2 = IMAGPART_EXPR <_6>;
  20   │   if (_2 != 0)
  21   │ goto ; [50.00%]
  22   │   else
  23   │ goto ; [50.00%]
  24   │ ;;succ:   4
  25   │ ;;3
  26   │
  27   │ ;;   basic block 3, loop depth 0
  28   │ ;;pred:   2
  29   │   _1 = REALPART_EXPR <_6>;
  30   │   goto ; [100.00%]
  31   │ ;;succ:   5
  32   │
  33   │ ;;   basic block 4, loop depth 0
  34   │ ;;pred:   2
  35   │   _8 = x_4(D) < 0;
  36   │   _9 = (signed char) _8;
  37   │   _10 = -_9;
  38   │   _11 = _10 ^ 127;
  39   │ ;;succ:   5
  40   │
  41   │ ;;   basic block 5, loop depth 0
  42   │ ;;pred:   3
  43   │ ;;4
  44   │   # _3 = PHI <_1(3), _11(4)>
  45   │   return _3;
  46   │ ;;succ:   EXIT
  47   │
  48   │ }

After this patch:
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y)
   6   │ {
   7   │   int8_t _3;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;pred:   ENTRY
  11   │   _3 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
  12   │   return _3;
  13   │ ;;succ:   EXIT
  14   │
  15   │ }

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add the form 3 of signed .SAT_ADD matching.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 1372f2ba377..1218abcd01a 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3222,6 +3222,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
   && types_match (type, @0, @1
 
+/* Signed saturation add, case 3:
+   Z = .ADD_OVERFLOW (X, Y)
+   SAT_S_ADD = IMAGPART_EXPR (Z) != 0 ? (-(T)(X < 0) ^ MAX) : sum;  */
+(match (signed_integer_sat_add @0 @1)
+ (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop)
+   (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
+   (realpart @2))
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
+
 /* Unsigned saturation sub, case 1 (branch with gt):
SAT_U_SUB = X > Y ? X - Y : 0  */
 (match (unsigned_integer_sat_sub @0 @1)
-- 
2.43.0



[PATCH v1 1/2] Genmatch: Support new flow for phi on condition

2024-09-04 Thread pan2 . li
From: Pan Li 

The gen_phi_on_cond can only support below control flow for cond
from day 1.  Aka:

+--+
| def  |
| ...  |   +-+
| cond |-->| def |
+--+   | ... |
   |   +-+
   |  |
   v  |
+-+   |
| PHI |<--+
+-+

Unfortunately, there will be more scenarios of control flow on PHI.
For example as below:

T __attribute__((noinline))\
sat_s_add_##T##_fmt_3 (T x, T y)   \
{  \
  T sum;   \
  bool overflow = __builtin_add_overflow (x, y, &sum); \
  return overflow ? x < 0 ? MIN : MAX : sum;   \
}

DEF_SAT_S_ADD_FMT_3(int8_t, uint8_t, INT8_MIN, INT8_MAX)

With expanded RTL like below.
   3   │
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y)
   6   │ {
   7   │   signed char _1;
   8   │   signed char _2;
   9   │   int8_t _3;
  10   │   __complex__ signed char _6;
  11   │   _Bool _8;
  12   │   signed char _9;
  13   │   signed char _10;
  14   │   signed char _11;
  15   │
  16   │ ;;   basic block 2, loop depth 0
  17   │ ;;pred:   ENTRY
  18   │   _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
  19   │   _2 = IMAGPART_EXPR <_6>;
  20   │   if (_2 != 0)
  21   │ goto ; [50.00%]
  22   │   else
  23   │ goto ; [50.00%]
  24   │ ;;succ:   4
  25   │ ;;3
  26   │
  27   │ ;;   basic block 3, loop depth 0
  28   │ ;;pred:   2
  29   │   _1 = REALPART_EXPR <_6>;
  30   │   goto ; [100.00%]
  31   │ ;;succ:   5
  32   │
  33   │ ;;   basic block 4, loop depth 0
  34   │ ;;pred:   2
  35   │   _8 = x_4(D) < 0;
  36   │   _9 = (signed char) _8;
  37   │   _10 = -_9;
  38   │   _11 = _10 ^ 127;
  39   │ ;;succ:   5
  40   │
  41   │ ;;   basic block 5, loop depth 0
  42   │ ;;pred:   3
  43   │ ;;4
  44   │   # _3 = PHI <_1(3), _11(4)>
  45   │   return _3;
  46   │ ;;succ:   EXIT
  47   │
  48   │ }

The above code will have below control flow which is not supported by
the gen_phi_on_cond.

+--+
| def  |
| ...  |   +-+
| cond |-->| def |
+--+   | ... |
   |   +-+
   |  |
   v  |
+-+   |
| def |   |
| ... |   |
+-+   |
   |  |
   |  |
   v  |
+-+   |
| PHI |<--+
+-+

This patch would like to add support above control flow for the
gen_phi_on_cond.

The below testsuites are passed for this patch:
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* genmatch.cc (dt_operand::gen_phi_on_cond): Add support for
a new control flow when gen phi on condition.

Signed-off-by: Pan Li 
---
 gcc/genmatch.cc | 85 +++--
 1 file changed, 76 insertions(+), 9 deletions(-)

diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
index a56bd90cb2c..f538df1be62 100644
--- a/gcc/genmatch.cc
+++ b/gcc/genmatch.cc
@@ -3529,28 +3529,95 @@ dt_operand::gen_phi_on_cond (FILE *f, int indent, int 
depth)
 "basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth);
   fprintf_indent (f, indent,
 "basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth);
+
   fprintf_indent (f, indent,
-"basic_block _db_%d = safe_dyn_cast  (*gsi_last_bb (_pb_0_%d)) ? "
-"_pb_0_%d : _pb_1_%d;\n", depth, depth, depth, depth);
+"gcond *_ct_0_%d = safe_dyn_cast  (*gsi_last_bb (_pb_0_%d));\n",
+depth, depth);
   fprintf_indent (f, indent,
-"basic_block _other_db_%d = safe_dyn_cast  "
-"(*gsi_last_bb (_pb_0_%d)) ? _pb_1_%d : _pb_0_%d;\n",
+"gcond *_ct_1_%d = safe_dyn_cast  (*gsi_last_bb (_pb_1_%d));\n",
+depth, depth);
+  fprintf_indent (f, indent,
+"gcond *_ct_a_%d = _ct_0_%d ? _ct_0_%d : _ct_1_%d;\n",
+depth, depth, depth, depth);
+  fprintf_indent (f, indent,
+"basic_block _db_%d = _ct_0_%d ? _pb_0_%d : _pb_1_%d;\n",
+depth, depth, depth, depth);
+  fprintf_indent (f, indent,
+"basic_block _other_db_%d = _ct_0_%d ? _pb_1_%d : _pb_0_%d;\n",
 depth, depth, depth, depth);
 
   fprintf_indent (f, indent,
-"gcond *_ct_%d = safe_dyn_cast  (*gsi_last_bb (_db_%d));\n",
-depth, depth);
-  fprintf_indent (f, indent, "if (_ct_%d"
+"edge _e_00_%d = _pb_0_%d->preds ? EDGE_PRED (_pb_0_%d, 0) : NULL;\n",
+depth, depth, depth);
+  fprintf_indent (f, indent,
+"basic_block _pb_00_%d = _e_00_%d ? _e_00_%d->src : NULL;\n",
+depth, depth, depth);
+  fprintf_indent (f, indent,
+"gcond *_ct_b_%d = _pb_00_%d ? "
+"safe_dyn_cast  (*gsi_last_bb (_pb_00_%d)) : NULL;\n",
+depth, depth, depth);
+
+  /* Case 1 flow for PHI.
+   * +--+
+   * | def  |
+   * | ...  |   +-+
+   * | cond |-->| def |
+   * +--+   | ... |
+   * 

[PATCH v1] Match: Support form 2 for scalar signed integer .SAT_ADD

2024-09-03 Thread pan2 . li
From: Pan Li 

This patch would like to support the form 2 of the scalar signed
integer .SAT_ADD.  Aka below example:

Form 2:
  #define DEF_SAT_S_ADD_FMT_2(T, UT, MIN, MAX) \
  T __attribute__((noinline))  \
  sat_s_add_##T##_fmt_2 (T x, T y) \
  {\
T sum = (UT)x + (UT)y; \
   \
if ((x ^ y) < 0 || (sum ^ x) >= 0) \
  return sum;  \
   \
return x < 0 ? MIN : MAX;  \
  }

DEF_SAT_S_ADD_FMT_2(int8_t, uint8_t, INT8_MIN, INT8_MAX)

We can tell the difference before and after this patch if backend
implemented the ssadd3 pattern similar as below.

Before this patch:
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_add_int8_t_fmt_2 (int8_t x, int8_t y)
   6   │ {
   7   │   int8_t sum;
   8   │   unsigned char x.0_1;
   9   │   unsigned char y.1_2;
  10   │   unsigned char _3;
  11   │   signed char _4;
  12   │   signed char _5;
  13   │   int8_t _6;
  14   │   _Bool _11;
  15   │   signed char _12;
  16   │   signed char _13;
  17   │   signed char _14;
  18   │   signed char _22;
  19   │   signed char _23;
  20   │
  21   │ ;;   basic block 2, loop depth 0
  22   │ ;;pred:   ENTRY
  23   │   x.0_1 = (unsigned char) x_7(D);
  24   │   y.1_2 = (unsigned char) y_8(D);
  25   │   _3 = x.0_1 + y.1_2;
  26   │   sum_9 = (int8_t) _3;
  27   │   _4 = x_7(D) ^ y_8(D);
  28   │   _5 = x_7(D) ^ sum_9;
  29   │   _23 = ~_4;
  30   │   _22 = _5 & _23;
  31   │   if (_22 >= 0)
  32   │ goto ; [42.57%]
  33   │   else
  34   │ goto ; [57.43%]
  35   │ ;;succ:   4
  36   │ ;;3
  37   │
  38   │ ;;   basic block 3, loop depth 0
  39   │ ;;pred:   2
  40   │   _11 = x_7(D) < 0;
  41   │   _12 = (signed char) _11;
  42   │   _13 = -_12;
  43   │   _14 = _13 ^ 127;
  44   │ ;;succ:   4
  45   │
  46   │ ;;   basic block 4, loop depth 0
  47   │ ;;pred:   2
  48   │ ;;3
  49   │   # _6 = PHI 
  50   │   return _6;
  51   │ ;;succ:   EXIT
  52   │
  53   │ }

After this patch:
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_add_int8_t_fmt_2 (int8_t x, int8_t y)
   6   │ {
   7   │   int8_t _6;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;pred:   ENTRY
  11   │   _6 = .SAT_ADD (x_7(D), y_8(D)); [tail call]
  12   │   return _6;
  13   │ ;;succ:   EXIT
  14   │
  15   │ }

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add the form 2 of signed .SAT_ADD matching.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 4298e89dad6..1372f2ba377 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3207,6 +3207,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
   && types_match (type, @0, @1
 
+/* Signed saturation add, case 2:
+   T sum = (T)((UT)X + (UT)Y)
+   SAT_S_ADD = (X ^ sum) & !(X ^ Y) >= 0 ? sum : (-(T)(X < 0) ^ MAX);
+
+   The T and UT are type pair like T=int8_t, UT=uint8_t.  */
+(match (signed_integer_sat_add @0 @1)
+ (cond^ (ge (bit_and:c (bit_xor:c @0 (nop_convert@2 (plus (nop_convert @0)
+ (nop_convert @1
+  (bit_not (bit_xor:c @0 @1)))
+   integer_zerop)
+   @2
+   (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value))
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
+
 /* Unsigned saturation sub, case 1 (branch with gt):
SAT_U_SUB = X > Y ? X - Y : 0  */
 (match (unsigned_integer_sat_sub @0 @1)
-- 
2.43.0



[PATCH v1] RISC-V: Allow IMM operand for unsigned scalar .SAT_ADD

2024-09-02 Thread pan2 . li
From: Pan Li 

This patch would like to allow the IMM operand of the unsigned
scalar .SAT_ADD.  Like the operand 0, the operand 1 of .SAT_ADD
will be zero extended to Xmode before underlying code generation.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_expand_usadd): Zero extend
the second operand of usadd as the first operand does.
* config/riscv/riscv.md (usadd3): Allow imm operand for
scalar usadd pattern.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_add-11.c: Make asm check robust.
* gcc.target/riscv/sat_u_add-15.c: Ditto.
* gcc.target/riscv/sat_u_add-19.c: Ditto.
* gcc.target/riscv/sat_u_add-23.c: Ditto.
* gcc.target/riscv/sat_u_add-3.c: Ditto.
* gcc.target/riscv/sat_u_add-7.c: Ditto.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc | 2 +-
 gcc/config/riscv/riscv.md | 4 ++--
 gcc/testsuite/gcc.target/riscv/sat_u_add-11.c | 2 +-
 gcc/testsuite/gcc.target/riscv/sat_u_add-15.c | 2 +-
 gcc/testsuite/gcc.target/riscv/sat_u_add-19.c | 2 +-
 gcc/testsuite/gcc.target/riscv/sat_u_add-23.c | 2 +-
 gcc/testsuite/gcc.target/riscv/sat_u_add-3.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/sat_u_add-7.c  | 2 +-
 8 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index d03e51f3a68..4061d2372b4 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11970,7 +11970,7 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y)
   rtx xmode_sum = gen_reg_rtx (Xmode);
   rtx xmode_lt = gen_reg_rtx (Xmode);
   rtx xmode_x = riscv_gen_zero_extend_rtx (x, mode);
-  rtx xmode_y = gen_lowpart (Xmode, y);
+  rtx xmode_y = riscv_gen_zero_extend_rtx (y, mode);
   rtx xmode_dest = gen_reg_rtx (Xmode);
 
   /* Step-1: sum = x + y  */
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 3289ed2155a..4b0be43f436 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4358,8 +4358,8 @@ (define_insn_and_split ""
 
 (define_expand "usadd3"
   [(match_operand:ANYI 0 "register_operand")
-   (match_operand:ANYI 1 "register_operand")
-   (match_operand:ANYI 2 "register_operand")]
+   (match_operand:ANYI 1 "reg_or_int_operand")
+   (match_operand:ANYI 2 "reg_or_int_operand")]
   ""
   {
 riscv_expand_usadd (operands[0], operands[1], operands[2]);
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c
index e248aeafa8e..bd830ececad 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c
@@ -8,7 +8,7 @@
 ** sat_u_add_uint32_t_fmt_3:
 ** slli\s+[atx][0-9]+,\s*a0,\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
-** add\s+[atx][0-9]+,\s*a0,\s*a1
+** add\s+[atx][0-9]+,\s*a[01],\s*a[01]
 ** slli\s+[atx][0-9]+,\s*[atx][0-9],\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
 ** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-15.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-15.c
index bb8b991a84e..de615a6225e 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_u_add-15.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-15.c
@@ -8,7 +8,7 @@
 ** sat_u_add_uint32_t_fmt_4:
 ** slli\s+[atx][0-9]+,\s*a0,\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
-** add\s+[atx][0-9]+,\s*a0,\s*a1
+** add\s+[atx][0-9]+,\s*a[01],\s*a[01]
 ** slli\s+[atx][0-9]+,\s*[atx][0-9],\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
 ** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-19.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-19.c
index 7e4ae12f2f5..2b793e2f8fd 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_u_add-19.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-19.c
@@ -8,7 +8,7 @@
 ** sat_u_add_uint32_t_fmt_5:
 ** slli\s+[atx][0-9]+,\s*a0,\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
-** add\s+[atx][0-9]+,\s*a0,\s*a1
+** add\s+[atx][0-9]+,\s*a[01],\s*a[01]
 ** slli\s+[atx][0-9]+,\s*[atx][0-9],\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
 ** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-23.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-23.c
index 49bbb74a401..5de086e1138 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_u_add-23.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-23.c
@@ -8,7 +8,7 @@
 ** sat_u_add_uint32_t_fmt_6:
 ** slli\s+[atx][0-9]+,\s*a0,\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
-** add\s+[atx][0-9]+,\s*a0,\s*a1
+** add\s+[atx][0-9]+,\s*a[01],\s*a[01]
 ** slli\s+[atx][0-9]+,\s*[atx][0-9],\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
 ** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-3.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-3.c
index cd15dc96450..bd7ccb2a8c7 100644
--- a/gcc/testsu

[PATCH v1 2/2] Match: Add int type fits check for form 2 of .SAT_SUB imm operand

2024-09-01 Thread pan2 . li
From: Pan Li 

This patch would like to add strict check for imm operand of .SAT_SUB
matching.  We have no type checking for imm operand in previous, which
may result in unexpected IL to be catched by .SAT_SUB pattern.

We leverage the int_fits_type_p here to make sure the imm operand is
a int type fits the result type of the .SAT_SUB.  For example:

Fits uint8_t:
uint8_t a;
uint8_t sum = .SAT_SUB (a, 12);
uint8_t sum = .SAT_SUB (a, 12u);
uint8_t sum = .SAT_SUB (a, 126u);
uint8_t sum = .SAT_SUB (a, 128u);
uint8_t sum = .SAT_SUB (a, 228);
uint8_t sum = .SAT_SUB (a, 223u);

Not fits uint8_t:
uint8_t a;
uint8_t sum = .SAT_SUB (a, -1);
uint8_t sum = .SAT_SUB (a, 256u);
uint8_t sum = .SAT_SUB (a, 257);

The below test suite are passed for this patch:
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add int_fits_type_p check for .SAT_SUB imm operand.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_add_imm_type_check-57.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-58.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-59.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-60.c: New test.

Signed-off-by: Pan Li 
---
 gcc/match.pd  |  2 +-
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 14 ++
 .../riscv/sat_u_add_imm_type_check-57.c   | 18 +
 .../riscv/sat_u_add_imm_type_check-58.c   | 27 +++
 .../riscv/sat_u_add_imm_type_check-59.c   | 18 +
 .../riscv/sat_u_add_imm_type_check-60.c   | 27 +++
 6 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-58.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-59.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-60.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 45e0cc4a54f..6c54f0502eb 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3288,7 +3288,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (unsigned_integer_sat_sub @0 @1)
  (plus (max @0 INTEGER_CST@1) INTEGER_CST@2)
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
- && types_match (type, @1))
+ && types_match (type, @1) && int_fits_type_p (@1, type))
  (with
   {
unsigned precision = TYPE_PRECISION (type);
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 75f48b4b760..4d11b6dcf3b 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -281,6 +281,20 @@ sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_2 (T y)\
   return IMM > y ? IMM - y : 0;   \
 }
 
+#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3(INDEX, T, IMM) \
+T __attribute__((noinline))   \
+sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_3 (T x)\
+{ \
+  return x >= IMM ? x - IMM : 0;  \
+}
+
+#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_4(INDEX, T, IMM) \
+T __attribute__((noinline))   \
+sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_4 (T x)\
+{ \
+  return x > IMM ? x - IMM : 0;   \
+}
+
 
/**/
 /* Saturation Truncate (unsigned and signed)  
*/
 
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c
new file mode 100644
index 000..1b193bcfb26
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "sat_arith.h"
+
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (0, uint8_t, -43)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (1, uint8_t, 269)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (2, uint8_t, 369u)
+
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (3, uint16_t, -4)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (4, uint16_t, 65579)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (5, uint16_t, 65679u)
+
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (6, uint32_t, -62l)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (7, uint32_t, 6294967342ll)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (8, uint32_t, 4394967342ull)
+
+/* { dg-final { scan-rtl-dump-not ".SAT_ADD " "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-58.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-58.c
new file mode 100644
ind

[PATCH v1 1/2] Match: Add int type fits check for form 1 of .SAT_SUB imm operand

2024-09-01 Thread pan2 . li
From: Pan Li 

This patch would like to add strict check for imm operand of .SAT_SUB
matching.  We have no type checking for imm operand in previous, which
may result in unexpected IL to be catched by .SAT_SUB pattern.

We leverage the int_fits_type_p here to make sure the imm operand is
a int type fits the result type of the .SAT_SUB.  For example:

Fits uint8_t:
uint8_t a;
uint8_t sum = .SAT_SUB (12, a);
uint8_t sum = .SAT_SUB (12u, a);
uint8_t sum = .SAT_SUB (126u, a);
uint8_t sum = .SAT_SUB (128u, a);
uint8_t sum = .SAT_SUB (228, a);
uint8_t sum = .SAT_SUB (223u, a);

Not fits uint8_t:
uint8_t a;
uint8_t sum = .SAT_SUB (-1, a);
uint8_t sum = .SAT_SUB (256u, a);
uint8_t sum = .SAT_SUB (257, a);

The below test suite are passed for this patch:
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add int_fits_type_p check for .SAT_SUB imm operand.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_add_imm_type_check-53.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-54.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-55.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-56.c: New test.

Signed-off-by: Pan Li 
---
 gcc/match.pd  |  2 +-
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 14 ++
 .../riscv/sat_u_add_imm_type_check-53.c   | 18 +
 .../riscv/sat_u_add_imm_type_check-54.c   | 27 +++
 .../riscv/sat_u_add_imm_type_check-55.c   | 18 +
 .../riscv/sat_u_add_imm_type_check-56.c   | 27 +++
 6 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-54.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-55.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-56.c

diff --git a/gcc/match.pd b/gcc/match.pd
index be211535a49..45e0cc4a54f 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3269,7 +3269,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (unsigned_integer_sat_sub @0 @1)
  (cond^ (le @1 INTEGER_CST@2) (minus INTEGER_CST@0 @1) integer_zerop)
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
- && types_match (type, @1))
+ && types_match (type, @1) && int_fits_type_p (@0, type))
  (with
   {
unsigned precision = TYPE_PRECISION (type);
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index a899979904b..75f48b4b760 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -267,6 +267,20 @@ sat_u_sub_imm##IMM##_##T##_fmt_4 (T x)  \
 #define RUN_SAT_U_SUB_IMM_FMT_4(T, x, IMM, expect) \
   if (sat_u_sub_imm##IMM##_##T##_fmt_4(x) != expect) __builtin_abort ()
 
+#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1(INDEX, T, IMM) \
+T __attribute__((noinline))   \
+sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_1 (T y)\
+{ \
+  return IMM >= y ? IMM - y : 0;  \
+}
+
+#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_2(INDEX, T, IMM) \
+T __attribute__((noinline))   \
+sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_2 (T y)\
+{ \
+  return IMM > y ? IMM - y : 0;   \
+}
+
 
/**/
 /* Saturation Truncate (unsigned and signed)  
*/
 
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c
new file mode 100644
index 000..c959eeb0d86
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "sat_arith.h"
+
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (0, uint8_t, -43)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (1, uint8_t, 269)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (2, uint8_t, 369u)
+
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (3, uint16_t, -4)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (4, uint16_t, 65579)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (5, uint16_t, 65679u)
+
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (6, uint32_t, -62)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (7, uint32_t, 4294967342ll)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (8, uint32_t, 4394967342ull)
+
+/* { dg-final { scan-rtl-dump-not ".SAT_ADD " "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-54.c 
b/gcc/testsuite/gc

[PATCH v1] Vect: Support form 1 of vector signed integer .SAT_ADD

2024-08-30 Thread pan2 . li
From: Pan Li 

This patch would like to support the vector signed ssadd pattern
for the RISC-V backend.  Aka

Form 1:
  #define DEF_VEC_SAT_S_ADD_FMT_1(T, UT, MIN, MAX)   \
  void __attribute__((noinline)) \
  vec_sat_s_add_##T##_fmt_1 (T *out, T *x, T *y, unsigned n) \
  {  \
for (unsigned i = 0; i < n; i++) \
  {  \
T sum = (UT)x[i] + (UT)y[i]; \
out[i] = (x[i] ^ y[i]) < 0   \
  ? sum  \
  : (sum ^ x[i]) >= 0\
? sum\
: x[i] < 0 ? MIN : MAX;  \
  }  \
  }

DEF_VEC_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)

If the backend implemented the vector mode of ssadd, we will see IR diff
similar as below:

Before this patch:
 108   │   _114 = .SELECT_VL (ivtmp_112, POLY_INT_CST [2, 2]);
 109   │   ivtmp_77 = _114 * 8;
 110   │   vect__4.9_80 = .MASK_LEN_LOAD (vectp_x.7_78, 64B, { -1, ...  }, 
_114, 0);
 111   │   vect__5.10_81 = VIEW_CONVERT_EXPR(vect__4.9_80);
 112   │   vect__7.13_85 = .MASK_LEN_LOAD (vectp_y.11_83, 64B, { -1, ...  }, 
_114, 0);
 113   │   vect__8.14_86 = VIEW_CONVERT_EXPR(vect__7.13_85);
 114   │   vect__9.15_87 = vect__5.10_81 + vect__8.14_86;
 115   │   vect_sum_20.16_88 = VIEW_CONVERT_EXPR(vect__9.15_87);
 116   │   vect__10.17_89 = vect__4.9_80 ^ vect__7.13_85;
 117   │   vect__11.18_90 = vect__4.9_80 ^ vect_sum_20.16_88;
 118   │   mask__46.19_92 = vect__10.17_89 >= { 0, ... };
 119   │   _36 = vect__4.9_80 >> 63;
 120   │   mask__44.26_104 = vect__11.18_90 < { 0, ... };
 121   │   mask__43.27_105 = mask__46.19_92 & mask__44.26_104;
 122   │   _115 = .COND_XOR (mask__43.27_105, _36, { 9223372036854775807, ... 
}, vect_sum_20.16_88);
 123   │   .MASK_LEN_STORE (vectp_out.29_108, 64B, { -1, ... }, _114, 0, _115);
 124   │   vectp_x.7_79 = vectp_x.7_78 + ivtmp_77;
 125   │   vectp_y.11_84 = vectp_y.11_83 + ivtmp_77;
 126   │   vectp_out.29_109 = vectp_out.29_108 + ivtmp_77;
 127   │   ivtmp_113 = ivtmp_112 - _114;

After this patch:
  94   │   # vectp_x.7_82 = PHI 
  95   │   # vectp_y.10_86 = PHI 
  96   │   # vectp_out.14_91 = PHI 
  97   │   # ivtmp_95 = PHI 
  98   │   _97 = .SELECT_VL (ivtmp_95, POLY_INT_CST [2, 2]);
  99   │   ivtmp_81 = _97 * 8;
 100   │   vect__4.9_84 = .MASK_LEN_LOAD (vectp_x.7_82, 64B, { -1, ...  }, _97, 
0);
 101   │   vect__7.12_88 = .MASK_LEN_LOAD (vectp_y.10_86, 64B, { -1, ...  }, 
_97, 0);
 102   │   vect_patt_40.13_89 = .SAT_ADD (vect__4.9_84, vect__7.12_88);
 103   │   .MASK_LEN_STORE (vectp_out.14_91, 64B, { -1, ... }, _97, 0, 
vect_patt_40.13_89);
 104   │   vectp_x.7_83 = vectp_x.7_82 + ivtmp_81;
 105   │   vectp_y.10_87 = vectp_y.10_86 + ivtmp_81;
 106   │   vectp_out.14_92 = vectp_out.14_91 + ivtmp_81;
 107   │   ivtmp_96 = ivtmp_95 - _97;

The below test suites are passed for this patch:
1. The rv64gcv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.

gcc/ChangeLog:

* match.pd: Add case 2 for the signed .SAT_ADD consumed by
vect pattern.
* tree-vect-patterns.cc (gimple_signed_integer_sat_add): Add new
matching func decl for signed .SAT_ADD.
(vect_recog_sat_add_pattern): Add signed .SAT_ADD pattern match.

Signed-off-by: Pan Li 
---
 gcc/match.pd  | 17 +
 gcc/tree-vect-patterns.cc |  5 -
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index be211535a49..578c9dd5b77 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3207,6 +3207,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
   && types_match (type, @0, @1
 
+/* Signed saturation add, case 2:
+   T sum = (T)((UT)X + (UT)Y)
+   SAT_S_ADD = (X ^ Y) < 0 && (X ^ sum) >= 0 ? (-(T)(X < 0) ^ MAX) : sum;
+
+   The T and UT are type pair like T=int8_t, UT=uint8_t.  */
+(match (signed_integer_sat_add @0 @1)
+ (cond^ (bit_and:c (lt (bit_xor:c @0 (nop_convert@2 (plus (nop_convert @0)
+ (nop_convert @1
+  integer_zerop)
+  (ge (bit_xor:c @0 @1) integer_zerop))
+   (bit_xor:c (nop_convert (negate (nop_convert (convert
+ (lt @0 integer_zerop)
+  max_value)
+   @2)
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
+
 /* Unsigned saturation sub, case 1 (branch with gt):
SAT_U_SUB = X > Y ? X - Y : 0  */
 (match (unsigned_integer_sat_sub @0 @1)
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc

[PATCH v1] RISC-V: Refactor gen zero_extend rtx for SAT_* when expand SImode in RV64

2024-08-30 Thread pan2 . li
From: Pan Li 

In previous, we have some specially handling for both the .SAT_ADD and
.SAT_SUB for unsigned int.  There are similar to take care of SImode
in RV64 for zero extend.  Thus refactor these two helper function
into one for possible code duplication.

The below test suite are passed for this patch.
* The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_gen_zero_extend_rtx): Merge
the zero_extend handing from func riscv_gen_unsigned_xmode_reg.
(riscv_gen_unsigned_xmode_reg): Remove.
(riscv_expand_ussub): Leverage riscv_gen_zero_extend_rtx
instead of riscv_gen_unsigned_xmode_reg.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_sub-11.c: Adjust asm check.
* gcc.target/riscv/sat_u_sub-15.c: Ditto.
* gcc.target/riscv/sat_u_sub-19.c: Ditto.
* gcc.target/riscv/sat_u_sub-23.c: Ditto.
* gcc.target/riscv/sat_u_sub-27.c: Ditto.
* gcc.target/riscv/sat_u_sub-3.c: Ditto.
* gcc.target/riscv/sat_u_sub-31.c: Ditto.
* gcc.target/riscv/sat_u_sub-35.c: Ditto.
* gcc.target/riscv/sat_u_sub-39.c: Ditto.
* gcc.target/riscv/sat_u_sub-43.c: Ditto.
* gcc.target/riscv/sat_u_sub-47.c: Ditto.
* gcc.target/riscv/sat_u_sub-7.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-11.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-11_1.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-11_2.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-15.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-15_1.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-15_2.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-3.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-3_1.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-3_2.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-7.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-7_1.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-7_2.c: Ditto.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc | 99 +--
 gcc/testsuite/gcc.target/riscv/sat_u_sub-11.c |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-15.c |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-19.c |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-23.c |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-27.c |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-3.c  |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-31.c |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-35.c |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-39.c |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-43.c |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-47.c |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-7.c  |  4 +
 .../gcc.target/riscv/sat_u_sub_imm-11.c   |  2 +
 .../gcc.target/riscv/sat_u_sub_imm-11_1.c |  2 +
 .../gcc.target/riscv/sat_u_sub_imm-11_2.c |  2 +
 .../gcc.target/riscv/sat_u_sub_imm-15.c   |  2 +
 .../gcc.target/riscv/sat_u_sub_imm-15_1.c |  2 +
 .../gcc.target/riscv/sat_u_sub_imm-15_2.c |  2 +
 .../gcc.target/riscv/sat_u_sub_imm-3.c|  2 +
 .../gcc.target/riscv/sat_u_sub_imm-3_1.c  |  2 +
 .../gcc.target/riscv/sat_u_sub_imm-3_2.c  |  2 +
 .../gcc.target/riscv/sat_u_sub_imm-7.c|  2 +
 .../gcc.target/riscv/sat_u_sub_imm-7_1.c  |  2 +
 .../gcc.target/riscv/sat_u_sub_imm-7_2.c  |  2 +
 25 files changed, 118 insertions(+), 53 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 496dd177fe7..75b37b53244 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11894,19 +11894,56 @@ riscv_get_raw_result_mode (int regno)
   return default_get_reg_raw_mode (regno);
 }
 
-/* Generate a new rtx of Xmode based on the rtx and mode in define pattern.
-   The rtx x will be zero extended to Xmode if the mode is HI/QImode,  and
-   the new zero extended Xmode rtx will be returned.
-   Or the gen_lowpart rtx of Xmode will be returned.  */
+/* Generate a REG rtx of Xmode from the given rtx and mode.
+   The rtx x can be REG (QI/HI/SI/DI) or const_int.
+   The machine_mode mode is the original mode from define pattern.
+
+   If rtx is REG and Xmode, the RTX x will be returned directly.
+
+   If rtx is REG and non-Xmode, the zero extended to new REG of Xmode will be
+   returned.
+
+   If rtx is const_int, a new REG rtx will be created to hold the value of
+   const_int and then returned.
+
+   According to the gccint doc, the constants generated for modes with fewer
+   bits than in HOST_WIDE_INT must be sign extended to full width.  Thus there
+   will be two cases here, take QImode as example.
+
+   For .SAT_SUB (127, y) in QImode, we have (const_int 127) and one simple
+   mov from const_int to the new REG rtx is good enough here.
+
+   For .SAT_SUB (254, y) in QImode, we have (const_int -2) after define_expand.
+   Aka 0xfffe in Xmode of RV64 but we actually need 0xfe in Xmode
+   of RV64.  So we need to cleanup the highest 56 bits of the new REG 

[PATCH v1 2/2] RISC-V: Add testcases for form 4 of unsigned vector .SAT_ADD IMM

2024-08-29 Thread pan2 . li
From: Pan Li 

This patch would like to add test cases for the unsigned vector .SAT_ADD
when one of the operand is IMM.

Form 4:
  #define DEF_VEC_SAT_U_ADD_IMM_FMT_4(T, IMM)   \
  T __attribute__((noinline))   \
  vec_sat_u_add_imm##IMM##_##T##_fmt_4 (T *out, T *in, unsigned limit)  \
  { \
unsigned i; \
T ret;  \
for (i = 0; i < limit; i++) \
  { \
out[i] = __builtin_add_overflow (in[i], IMM, &ret) == 0 ? ret : -1; \
  } \
  }

DEF_VEC_SAT_U_ADD_IMM_FMT_4(uint64_t, 123)

The below test are passed for this patch.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-16.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-13.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-14.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-15.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-16.c: New 
test.

Signed-off-by: Pan Li 
---
 .../rvv/autovec/binop/vec_sat_u_add_imm-13.c  | 14 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-14.c  | 14 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-15.c  | 14 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-16.c  | 14 ++
 .../autovec/binop/vec_sat_u_add_imm-run-13.c  | 28 +++
 .../autovec/binop/vec_sat_u_add_imm-run-14.c  | 28 +++
 .../autovec/binop/vec_sat_u_add_imm-run-15.c  | 28 +++
 .../autovec/binop/vec_sat_u_add_imm-run-16.c  | 28 +++
 .../riscv/rvv/autovec/vec_sat_arith.h | 20 +
 9 files changed, 188 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-16.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-13.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-14.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-15.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-16.c

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c
new file mode 100644
index 000..a9439dff39f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../vec_sat_arith.h"
+
+/*
+** vec_sat_u_add_imm9u_uint8_t_fmt_4:
+** ...
+** vsaddu\.vi\s+v[0-9]+,\s*v[0-9]+,\s*9
+** ...
+*/
+DEF_VEC_SAT_U_ADD_IMM_FMT_4(uint8_t, 9u)
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c
new file mode 100644
index 000..dbe47497599
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../vec_sat_arith.h"
+
+/*
+** vec_sat_u_add_imm15_uint16_t_fmt_4:
+** ...
+** vsaddu\.vi\s+v[0-9]+,\s*v[0-9]+,\s*15
+** ...
+*/
+DEF_VEC_SAT_U_ADD_IMM_FMT_4(uint16_t, 15)
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c
new file mode 100644
index 000..0ac2e1b2942
--- /dev/null
+++ 

[PATCH v1 1/2] RISC-V: Add testcases for form 3 of unsigned vector .SAT_ADD IMM

2024-08-29 Thread pan2 . li
From: Pan Li 

This patch would like to add test cases for the unsigned vector .SAT_ADD
when one of the operand is IMM.

Form 3:
  #define DEF_VEC_SAT_U_ADD_IMM_FMT_3(T, IMM)  \
  T __attribute__((noinline))  \
  vec_sat_u_add_imm##IMM##_##T##_fmt_3 (T *out, T *in, unsigned limit) \
  {\
unsigned i;\
T ret; \
for (i = 0; i < limit; i++)\
  {\
out[i] = __builtin_add_overflow (in[i], IMM, &ret) ? -1 : ret; \
  }\
  }

DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint64_t, 123)

The below test are passed for this patch.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-9.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-10.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-11.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-12.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-9.c: New 
test.

Signed-off-by: Pan Li 
---
 .../rvv/autovec/binop/vec_sat_u_add_imm-10.c  | 14 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-11.c  | 14 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-12.c  | 14 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-9.c   | 14 ++
 .../autovec/binop/vec_sat_u_add_imm-run-10.c  | 28 +++
 .../autovec/binop/vec_sat_u_add_imm-run-11.c  | 28 +++
 .../autovec/binop/vec_sat_u_add_imm-run-12.c  | 28 +++
 .../autovec/binop/vec_sat_u_add_imm-run-9.c   | 28 +++
 8 files changed, 168 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-9.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-10.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-11.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-12.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-9.c

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c
new file mode 100644
index 000..b6b605ac615
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../vec_sat_arith.h"
+
+/*
+** vec_sat_u_add_imm15_uint16_t_fmt_3:
+** ...
+** vsaddu\.vi\s+v[0-9]+,\s*v[0-9]+,\s*15
+** ...
+*/
+DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint16_t, 15)
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c
new file mode 100644
index 000..6da86a1abe1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../vec_sat_arith.h"
+
+/*
+** vec_sat_u_add_imm33u_uint32_t_fmt_3:
+** ...
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint32_t, 33u)
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c
new file mode 100644
index 000..b6ff5a6d5d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-

[PATCH v2 1/2] RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 2

2024-08-29 Thread pan2 . li
From: Pan Li 

This patch would like to add test cases for the unsigned scalar quad and
oct .SAT_TRUNC form 2.  Aka:

Form 2:
  #define DEF_SAT_U_TRUC_FMT_2(NT, WT) \
  NT __attribute__((noinline)) \
  sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \
  {\
WT max = (WT)(NT)-1;   \
return x > max ? (NT) max : (NT)x; \
  }

QUAD:
DEF_SAT_U_TRUC_FMT_2 (uint16_t, uint64_t)
DEF_SAT_U_TRUC_FMT_2 (uint8_t, uint32_t)

OCT:
DEF_SAT_U_TRUC_FMT_2 (uint8_t, uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_trunc-10.c: New test.
* gcc.target/riscv/sat_u_trunc-11.c: New test.
* gcc.target/riscv/sat_u_trunc-12.c: New test.
* gcc.target/riscv/sat_u_trunc-run-10.c: New test.
* gcc.target/riscv/sat_u_trunc-run-11.c: New test.
* gcc.target/riscv/sat_u_trunc-run-12.c: New test.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/sat_u_trunc-10.c | 17 
 .../gcc.target/riscv/sat_u_trunc-11.c | 17 
 .../gcc.target/riscv/sat_u_trunc-12.c | 20 +++
 .../gcc.target/riscv/sat_u_trunc-run-10.c | 16 +++
 .../gcc.target/riscv/sat_u_trunc-run-11.c | 16 +++
 .../gcc.target/riscv/sat_u_trunc-run-12.c | 16 +++
 6 files changed, 102 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-11.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-12.c

diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c
new file mode 100644
index 000..5ea8e613901
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_trunc_uint32_t_to_uint8_t_fmt_2:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUNC_FMT_2(uint8_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c
new file mode 100644
index 000..3b45e2af9ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_trunc_uint64_t_to_uint8_t_fmt_2:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUNC_FMT_2(uint8_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c
new file mode 100644
index 000..7ea2c93a301
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_trunc_uint64_t_to_uint16_t_fmt_2:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_TRUNC_FMT_2(uint16_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c
new file mode 100644
index 000..2281610f335
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint32_t
+
+DEF_SAT_U_TRUNC_FMT_2_WRAP(T1, T2)
+
+#define DATA   TEST_UNARY_DATA_WRAP(T1, T2)
+#define

[PATCH v2 2/2] RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 3

2024-08-29 Thread pan2 . li
From: Pan Li 

This patch would like to add test cases for the unsigned scalar quad and
oct .SAT_TRUNC form 3.  Aka:

Form 3:
  #define DEF_SAT_U_TRUC_FMT_3(NT, WT) \
  NT __attribute__((noinline)) \
  sat_u_truc_##WT##_to_##NT##_fmt_3 (WT x) \
  {\
WT max = (WT)(NT)-1;   \
return x <= max ? (NT)x : (NT) max;\
  }

QUAD:
DEF_SAT_U_TRUC_FMT_3 (uint16_t, uint64_t)
DEF_SAT_U_TRUC_FMT_3 (uint8_t, uint32_t)

OCT:
DEF_SAT_U_TRUC_FMT_3 (uint8_t, uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_trunc-16.c: New test.
* gcc.target/riscv/sat_u_trunc-17.c: New test.
* gcc.target/riscv/sat_u_trunc-18.c: New test.
* gcc.target/riscv/sat_u_trunc-run-16.c: New test.
* gcc.target/riscv/sat_u_trunc-run-17.c: New test.
* gcc.target/riscv/sat_u_trunc-run-18.c: New test.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/sat_u_trunc-16.c | 17 
 .../gcc.target/riscv/sat_u_trunc-17.c | 17 
 .../gcc.target/riscv/sat_u_trunc-18.c | 20 +++
 .../gcc.target/riscv/sat_u_trunc-run-16.c | 16 +++
 .../gcc.target/riscv/sat_u_trunc-run-17.c | 16 +++
 .../gcc.target/riscv/sat_u_trunc-run-18.c | 16 +++
 6 files changed, 102 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-17.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-18.c

diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c
new file mode 100644
index 000..f91da58c0ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_trunc_uint32_t_to_uint8_t_fmt_3:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUNC_FMT_3(uint8_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c
new file mode 100644
index 000..9813e1f79b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_trunc_uint64_t_to_uint8_t_fmt_3:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUNC_FMT_3(uint8_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c
new file mode 100644
index 000..eb799849f73
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_trunc_uint64_t_to_uint16_t_fmt_3:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_TRUNC_FMT_3(uint16_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c
new file mode 100644
index 000..20ceda6852e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint32_t
+
+DEF_SAT_U_TRUNC_FMT_3_WRAP(T1, T2)
+
+#define DATA   TEST_UNARY_DATA_WRAP(T1, T2)
+#define

[PATCH v1] RISC-V: Support form 1 of integer scalar .SAT_ADD

2024-08-28 Thread pan2 . li
From: Pan Li 

This patch would like to support the scalar signed ssadd pattern
for the RISC-V backend.  Aka

Form 1:
  #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \
  T __attribute__((noinline))  \
  sat_s_add_##T##_fmt_1 (T x, T y) \
  {\
T sum = (UT)x + (UT)y; \
return (x ^ y) < 0 \
  ? sum\
  : (sum ^ x) >= 0 \
? sum  \
: x < 0 ? MIN : MAX;   \
  }

DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)

Before this patch:
  10   │ sat_s_add_int64_t_fmt_1:
  11   │ mv   a5,a0
  12   │ add  a0,a0,a1
  13   │ xor  a1,a5,a1
  14   │ not  a1,a1
  15   │ xor  a4,a5,a0
  16   │ and  a1,a1,a4
  17   │ blt  a1,zero,.L5
  18   │ ret
  19   │ .L5:
  20   │ srai a5,a5,63
  21   │ li   a0,-1
  22   │ srli a0,a0,1
  23   │ xor  a0,a5,a0
  24   │ ret

After this patch:
  10   │ sat_s_add_int64_t_fmt_1:
  11   │ add  a2,a0,a1
  12   │ xor  a1,a0,a1
  13   │ xor  a5,a0,a2
  14   │ srli a5,a5,63
  15   │ srli a1,a1,63
  16   │ xori a1,a1,1
  17   │ and  a5,a5,a1
  18   │ srai a4,a0,63
  19   │ li   a3,-1
  20   │ srli a3,a3,1
  21   │ xor  a3,a3,a4
  22   │ neg  a4,a5
  23   │ and  a3,a3,a4
  24   │ addi a5,a5,-1
  25   │ and  a0,a2,a5
  26   │ or   a0,a0,a3
  27   │ ret

The below test suites are passed for this patch:
1. The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (riscv_expand_ssadd): Add new func
decl for expanding ssadd.
* config/riscv/riscv.cc (riscv_gen_sign_max_cst): Add new func
impl to gen the max int rtx.
(riscv_expand_ssadd): Add new func impl to expand the ssadd.
* config/riscv/riscv.md (ssadd3): Add new pattern for
signed integer .SAT_ADD.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_arith_data.h: Add test data.
* gcc.target/riscv/sat_s_add-1.c: New test.
* gcc.target/riscv/sat_s_add-2.c: New test.
* gcc.target/riscv/sat_s_add-3.c: New test.
* gcc.target/riscv/sat_s_add-4.c: New test.
* gcc.target/riscv/sat_s_add-run-1.c: New test.
* gcc.target/riscv/sat_s_add-run-2.c: New test.
* gcc.target/riscv/sat_s_add-run-3.c: New test.
* gcc.target/riscv/sat_s_add-run-4.c: New test.
* gcc.target/riscv/scalar_sat_binary_run_xxx.h: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-protos.h   |  1 +
 gcc/config/riscv/riscv.cc | 90 +++
 gcc/config/riscv/riscv.md | 11 +++
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 17 
 .../gcc.target/riscv/sat_arith_data.h | 85 ++
 gcc/testsuite/gcc.target/riscv/sat_s_add-1.c  | 30 +++
 gcc/testsuite/gcc.target/riscv/sat_s_add-2.c  | 32 +++
 gcc/testsuite/gcc.target/riscv/sat_s_add-3.c  | 31 +++
 gcc/testsuite/gcc.target/riscv/sat_s_add-4.c  | 30 +++
 .../gcc.target/riscv/sat_s_add-run-1.c| 16 
 .../gcc.target/riscv/sat_s_add-run-2.c| 16 
 .../gcc.target/riscv/sat_s_add-run-3.c| 16 
 .../gcc.target/riscv/sat_s_add-run-4.c| 16 
 .../riscv/scalar_sat_binary_run_xxx.h | 26 ++
 14 files changed, 417 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/scalar_sat_binary_run_xxx.h

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 926899ccad6..3358e3887b9 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -134,6 +134,7 @@ extern bool
 riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int);
 extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
 extern void riscv_expand_usadd (rtx, rtx, rtx);
+extern void riscv_expand_ssadd (rtx, rtx, rtx);
 extern void riscv_expand_ussub (rtx, rtx, rtx);
 extern void riscv_expand_ustrunc (rtx, rtx);
 
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index e9b1b9bc3ad..e2b28a278f6 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11947,6 +11947,96 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y)
 

[PATCH v3] Vect: Reconcile the const_int operand type of unsigned .SAT_ADD

2024-08-27 Thread pan2 . li
From: Pan Li 

The .SAT_ADD has 2 operand, when one of the operand may be INTEGER_CST.
For example _1 = .SAT_ADD (_2, 9) comes from below sample code.

Form 3:
  #define DEF_VEC_SAT_U_ADD_IMM_FMT_3(T, IMM)  \
  T __attribute__((noinline))  \
  vec_sat_u_add_imm##IMM##_##T##_fmt_3 (T *out, T *in, unsigned limit) \
  {\
unsigned i;\
T ret; \
for (i = 0; i < limit; i++)\
  {\
out[i] = __builtin_add_overflow (in[i], IMM, &ret) ? -1 : ret; \
  }\
  }

DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint64_t, 9)

It will fail to vectorize as the vectorizable_call will check the
operands is type_compatiable but the imm will be (const_int 9) with
the SImode, which is different from _2 (DImode).  Aka:

uint64_t _1;
uint64_t _2;
_1 = .SAT_ADD (_2, 9);

This patch would like to reconcile the imm operand to the operand type
mode of _2 by fold_convert to make the vectorizable_call happy.

The below test suites are passed for this patch:
1. The rv64gcv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.

gcc/ChangeLog:

* tree-vect-patterns.cc (vect_recog_sat_add_pattern): Add fold
convert for const_int to the type of operand 0.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-1.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-10.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-11.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-12.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-13.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-14.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-15.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-2.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-3.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-4.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-5.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-6.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-7.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-8.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-9.c: 
New test.

Signed-off-by: Pan Li 
---
 .../binop/vec_sat_u_add_imm_reconcile-1.c |  9 +
 .../binop/vec_sat_u_add_imm_reconcile-10.c|  9 +
 .../binop/vec_sat_u_add_imm_reconcile-11.c|  9 +
 .../binop/vec_sat_u_add_imm_reconcile-12.c|  9 +
 .../binop/vec_sat_u_add_imm_reconcile-13.c|  9 +
 .../binop/vec_sat_u_add_imm_reconcile-14.c|  9 +
 .../binop/vec_sat_u_add_imm_reconcile-15.c|  9 +
 .../binop/vec_sat_u_add_imm_reconcile-2.c |  9 +
 .../binop/vec_sat_u_add_imm_reconcile-3.c |  9 +
 .../binop/vec_sat_u_add_imm_reconcile-4.c |  9 +
 .../binop/vec_sat_u_add_imm_reconcile-5.c |  9 +
 .../binop/vec_sat_u_add_imm_reconcile-6.c |  9 +
 .../binop/vec_sat_u_add_imm_reconcile-7.c |  9 +
 .../binop/vec_sat_u_add_imm_reconcile-8.c |  9 +
 .../binop/vec_sat_u_add_imm_reconcile-9.c |  9 +
 .../riscv/rvv/autovec/vec_sat_arith.h | 20 +++
 gcc/tree-vect-patterns.cc |  3 +++
 17 files changed, 158 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-10.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-11.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-12.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-13.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-14.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-15.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-2.

[PATCH v4] Match: Support form 1 for scalar signed integer .SAT_ADD

2024-08-27 Thread pan2 . li
From: Pan Li 

This patch would like to support the form 1 of the scalar signed
integer .SAT_ADD.  Aka below example:

Form 1:
  #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \
  T __attribute__((noinline))  \
  sat_s_add_##T##_fmt_1 (T x, T y) \
  {\
T sum = (UT)x + (UT)y; \
return (x ^ y) < 0 \
  ? sum\
  : (sum ^ x) >= 0 \
? sum  \
: x < 0 ? MIN : MAX;   \
  }

DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)

We can tell the difference before and after this patch if backend
implemented the ssadd3 pattern similar as below.

Before this patch:
   4   │ __attribute__((noinline))
   5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
   6   │ {
   7   │   int64_t sum;
   8   │   long unsigned int x.0_1;
   9   │   long unsigned int y.1_2;
  10   │   long unsigned int _3;
  11   │   long int _4;
  12   │   long int _5;
  13   │   int64_t _6;
  14   │   _Bool _11;
  15   │   long int _12;
  16   │   long int _13;
  17   │   long int _14;
  18   │   long int _16;
  19   │   long int _17;
  20   │
  21   │ ;;   basic block 2, loop depth 0
  22   │ ;;pred:   ENTRY
  23   │   x.0_1 = (long unsigned int) x_7(D);
  24   │   y.1_2 = (long unsigned int) y_8(D);
  25   │   _3 = x.0_1 + y.1_2;
  26   │   sum_9 = (int64_t) _3;
  27   │   _4 = x_7(D) ^ y_8(D);
  28   │   _5 = x_7(D) ^ sum_9;
  29   │   _17 = ~_4;
  30   │   _16 = _5 & _17;
  31   │   if (_16 < 0)
  32   │ goto ; [41.00%]
  33   │   else
  34   │ goto ; [59.00%]
  35   │ ;;succ:   3
  36   │ ;;4
  37   │
  38   │ ;;   basic block 3, loop depth 0
  39   │ ;;pred:   2
  40   │   _11 = x_7(D) < 0;
  41   │   _12 = (long int) _11;
  42   │   _13 = -_12;
  43   │   _14 = _13 ^ 9223372036854775807;
  44   │ ;;succ:   4
  45   │
  46   │ ;;   basic block 4, loop depth 0
  47   │ ;;pred:   2
  48   │ ;;3
  49   │   # _6 = PHI 
  50   │   return _6;
  51   │ ;;succ:   EXIT
  52   │
  53   │ }

After this patch:
   4   │ __attribute__((noinline))
   5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
   6   │ {
   7   │   int64_t _4;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;pred:   ENTRY
  11   │   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
  12   │   return _4;
  13   │ ;;succ:   EXIT
  14   │
  15   │ }

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add the matching for signed .SAT_ADD.
* tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
matching func decl.
(match_unsigned_saturation_add): Try signed .SAT_ADD and rename
to ...
(match_saturation_add): ... here.
(math_opts_dom_walker::after_dom_children): Update the above renamed
func from caller.

Signed-off-by: Pan Li 
---
 gcc/match.pd  | 15 +++
 gcc/tree-ssa-math-opts.cc | 35 ++-
 2 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 78f1957e8c7..09a36159163 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3192,6 +3192,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
   && types_match (type, @0
 
+/* Signed saturation add, case 1:
+   T sum = (T)((UT)X + (UT)Y)
+   SAT_S_ADD = (X ^ sum) & !(X ^ Y) < 0 ? (-(T)(X < 0) ^ MAX) : sum;
+
+   The T and UT are type pair like T=int8_t, UT=uint8_t.  */
+(match (signed_integer_sat_add @0 @1)
+ (cond^ (lt (bit_and:c (bit_xor:c @0 (nop_convert@2 (plus (nop_convert @0)
+ (nop_convert @1
+  (bit_not (bit_xor:c @0 @1)))
+   integer_zerop)
+   (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
+   @2)
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
+
 /* Unsigned saturation sub, case 1 (branch with gt):
SAT_U_SUB = X > Y ? X - Y : 0  */
 (match (unsigned_integer_sat_sub @0 @1)
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 8d96a4c964b..3c93fca5b53 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, 
tree (*)(tree));
 extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
 extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
 
+extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
+
 static void
 build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
 

[PATCH v1 2/2] RISC-V: Add testcases for unsigned scalar .SAT_SUB IMM form 4

2024-08-27 Thread pan2 . li
From: Pan Li 

This patch would like to add test cases for the unsigned scalar
.SAT_SUB IMM form 4.  Aka:

Form 4:
  #define DEF_SAT_U_SUB_IMM_FMT_4(T, IMM) \
  T __attribute__((noinline)) \
  sat_u_sub_imm##IMM##_##T##_fmt_4 (T x)  \
  {   \
return x > (T)IMM ? x - (T)IMM : 0;   \
  }

DEF_SAT_U_SUB_IMM_FMT_4(uint64_t, 23)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_sub_imm-13.c: New test.
* gcc.target/riscv/sat_u_sub_imm-13_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-13_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-14.c: New test.
* gcc.target/riscv/sat_u_sub_imm-14_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-14_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-15.c: New test.
* gcc.target/riscv/sat_u_sub_imm-15_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-15_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-16.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-13.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-14.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-15.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-16.c: New test.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h|  9 +++
 .../gcc.target/riscv/sat_u_sub_imm-13.c   | 19 +++
 .../gcc.target/riscv/sat_u_sub_imm-13_1.c | 19 +++
 .../gcc.target/riscv/sat_u_sub_imm-13_2.c | 19 +++
 .../gcc.target/riscv/sat_u_sub_imm-14.c   | 20 +++
 .../gcc.target/riscv/sat_u_sub_imm-14_1.c | 21 +++
 .../gcc.target/riscv/sat_u_sub_imm-14_2.c | 22 
 .../gcc.target/riscv/sat_u_sub_imm-15.c   | 19 +++
 .../gcc.target/riscv/sat_u_sub_imm-15_1.c | 21 +++
 .../gcc.target/riscv/sat_u_sub_imm-15_2.c | 22 
 .../gcc.target/riscv/sat_u_sub_imm-16.c   | 18 ++
 .../gcc.target/riscv/sat_u_sub_imm-run-13.c   | 55 +++
 .../gcc.target/riscv/sat_u_sub_imm-run-14.c   | 55 +++
 .../gcc.target/riscv/sat_u_sub_imm-run-15.c   | 54 ++
 .../gcc.target/riscv/sat_u_sub_imm-run-16.c   | 48 
 15 files changed, 421 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13_1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13_2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-14.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-14_1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-14_2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-15.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-15_1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-15_2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-13.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-14.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-15.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-16.c

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index b4339eb0dff..a899979904b 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -238,6 +238,13 @@ sat_u_sub_imm##IMM##_##T##_fmt_3 (T y)  \
   return (T)IMM > y ? (T)IMM - y : 0;   \
 }
 
+#define DEF_SAT_U_SUB_IMM_FMT_4(T, IMM) \
+T __attribute__((noinline)) \
+sat_u_sub_imm##IMM##_##T##_fmt_4 (T x)  \
+{   \
+  return x > (T)IMM ? x - (T)IMM : 0;   \
+}
+
 #define RUN_SAT_U_SUB_FMT_1(T, x, y) sat_u_sub_##T##_fmt_1(x, y)
 #define RUN_SAT_U_SUB_FMT_2(T, x, y) sat_u_sub_##T##_fmt_2(x, y)
 #define RUN_SAT_U_SUB_FMT_3(T, x, y) sat_u_sub_##T##_fmt_3(x, y)
@@ -257,6 +264,8 @@ sat_u_sub_imm##IMM##_##T##_fmt_3 (T y)  \
   if (sat_u_sub_imm##IMM##_##T##_fmt_2(x) != expect) __builtin_abort ()
 #define RUN_SAT_U_SUB_IMM_FMT_3(T, IMM, y, expect) \
   if (sat_u_sub_imm##IMM##_##T##_fmt_3(y) != expect) __builtin_abort ()
+#define RUN_SAT_U_SUB_IMM_FMT_4(T, x, IMM, expect) \
+  if (sat_u_sub_imm##IMM##_##T##_fmt_4(x) != expect) __builtin_abort ()
 
 
/**/
 /* Saturation Truncate (unsigned and signed)  
*/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13.c
new file mode 100644
index 000..7dcbc3b1a12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-opt

[PATCH v1 1/2] RISC-V: Add testcases for unsigned scalar .SAT_SUB IMM form 3

2024-08-27 Thread pan2 . li
From: Pan Li 

This patch would like to add test cases for the unsigned scalar
.SAT_SUB IMM form 3.  Aka:

Form 3:
  #define DEF_SAT_U_SUB_IMM_FMT_3(T, IMM) \
  T __attribute__((noinline)) \
  sat_u_sub_imm##IMM##_##T##_fmt_3 (T y)  \
  {   \
return (T)IMM > y ? (T)IMM - y : 0;   \
  }

DEF_SAT_U_SUB_IMM_FMT_3(uint64_t, 23)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_sub_imm-10.c: New test.
* gcc.target/riscv/sat_u_sub_imm-10_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-10_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-11.c: New test.
* gcc.target/riscv/sat_u_sub_imm-11_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-11_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-12.c: New test.
* gcc.target/riscv/sat_u_sub_imm-9.c: New test.
* gcc.target/riscv/sat_u_sub_imm-9_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-9_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-10.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-11.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-12.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-9.c: New test.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h|  9 +++
 .../gcc.target/riscv/sat_u_sub_imm-10.c   | 21 +++
 .../gcc.target/riscv/sat_u_sub_imm-10_1.c | 22 
 .../gcc.target/riscv/sat_u_sub_imm-10_2.c | 22 
 .../gcc.target/riscv/sat_u_sub_imm-11.c   | 20 +++
 .../gcc.target/riscv/sat_u_sub_imm-11_1.c | 22 
 .../gcc.target/riscv/sat_u_sub_imm-11_2.c | 22 
 .../gcc.target/riscv/sat_u_sub_imm-12.c   | 19 +++
 .../gcc.target/riscv/sat_u_sub_imm-9.c| 20 +++
 .../gcc.target/riscv/sat_u_sub_imm-9_1.c  | 20 +++
 .../gcc.target/riscv/sat_u_sub_imm-9_2.c  | 20 +++
 .../gcc.target/riscv/sat_u_sub_imm-run-10.c   | 56 +++
 .../gcc.target/riscv/sat_u_sub_imm-run-11.c   | 55 ++
 .../gcc.target/riscv/sat_u_sub_imm-run-12.c   | 48 
 .../gcc.target/riscv/sat_u_sub_imm-run-9.c| 56 +++
 15 files changed, 432 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10_1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10_2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-11.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-11_1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-11_2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-12.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-9.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-9_1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-9_2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-10.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-11.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-12.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-9.c

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index c8ff8320d82..b4339eb0dff 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -231,6 +231,13 @@ sat_u_sub_imm##IMM##_##T##_fmt_2 (T x)  \
   return x >= (T)IMM ? x - (T)IMM : 0;  \
 }
 
+#define DEF_SAT_U_SUB_IMM_FMT_3(T, IMM) \
+T __attribute__((noinline)) \
+sat_u_sub_imm##IMM##_##T##_fmt_3 (T y)  \
+{   \
+  return (T)IMM > y ? (T)IMM - y : 0;   \
+}
+
 #define RUN_SAT_U_SUB_FMT_1(T, x, y) sat_u_sub_##T##_fmt_1(x, y)
 #define RUN_SAT_U_SUB_FMT_2(T, x, y) sat_u_sub_##T##_fmt_2(x, y)
 #define RUN_SAT_U_SUB_FMT_3(T, x, y) sat_u_sub_##T##_fmt_3(x, y)
@@ -248,6 +255,8 @@ sat_u_sub_imm##IMM##_##T##_fmt_2 (T x)  \
   if (sat_u_sub_imm##IMM##_##T##_fmt_1(y) != expect) __builtin_abort ()
 #define RUN_SAT_U_SUB_IMM_FMT_2(T, x, IMM, expect) \
   if (sat_u_sub_imm##IMM##_##T##_fmt_2(x) != expect) __builtin_abort ()
+#define RUN_SAT_U_SUB_IMM_FMT_3(T, IMM, y, expect) \
+  if (sat_u_sub_imm##IMM##_##T##_fmt_3(y) != expect) __builtin_abort ()
 
 
/**/
 /* Saturation Truncate (unsigned and signed)  
*/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10.c
new file mode 100644
index 000..db450d7cfbf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options 

[PATCH v2] Vect: Reconcile the const_int operand type of unsigned .SAT_ADD

2024-08-27 Thread pan2 . li
From: Pan Li 

The .SAT_ADD has 2 operand, when one of the operand may be INTEGER_CST.
For example _1 = .SAT_ADD (_2, 9) comes from below sample code.

Form 3:
  #define DEF_VEC_SAT_U_ADD_IMM_FMT_3(T, IMM)  \
  T __attribute__((noinline))  \
  vec_sat_u_add_imm##IMM##_##T##_fmt_3 (T *out, T *in, unsigned limit) \
  {\
unsigned i;\
T ret; \
for (i = 0; i < limit; i++)\
  {\
out[i] = __builtin_add_overflow (in[i], IMM, &ret) ? -1 : ret; \
  }\
  }

DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint64_t, 9)

It will fail to vectorize as the vectorizable_call will check the
operands is type_compatiable but the imm will be (const_int 9) with
the SImode, which is different from _2 (DImode).  Aka:

uint64_t _1;
uint64_t _2;
_1 = .SAT_ADD (_2, 9);

This patch would like to reconcile the imm operand to the operand type
mode of _2 if and only if there is no precision/data loss.  Aka convert
the imm 9 to the DImode for above example.

The below test suites are passed for this patch:
1. The rv64gcv fully regression tests.
2. The rv64gcv build with glibc.
3. The x86 bootstrap tests.
4. The x86 fully regression tests.

gcc/ChangeLog:

* tree-vect-patterns.cc (vect_recog_reconcile_cst_to_unsigned):
Add new func impl to reconcile the cst int type to given TREE type.
(vect_recog_sat_add_pattern): Reconcile the ops of .SAT_ADD
before building the gimple call.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-1.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-10.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-11.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-12.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-13.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-14.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-15.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-2.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-3.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-4.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-5.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-6.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-7.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-8.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-9.c: 
New test.

Signed-off-by: Pan Li 
---
 .../binop/vec_sat_u_add_imm_reconcile-1.c |  9 +
 .../binop/vec_sat_u_add_imm_reconcile-10.c|  9 +
 .../binop/vec_sat_u_add_imm_reconcile-11.c|  9 +
 .../binop/vec_sat_u_add_imm_reconcile-12.c|  9 +
 .../binop/vec_sat_u_add_imm_reconcile-13.c|  9 +
 .../binop/vec_sat_u_add_imm_reconcile-14.c|  9 +
 .../binop/vec_sat_u_add_imm_reconcile-15.c|  9 +
 .../binop/vec_sat_u_add_imm_reconcile-2.c |  9 +
 .../binop/vec_sat_u_add_imm_reconcile-3.c |  9 +
 .../binop/vec_sat_u_add_imm_reconcile-4.c |  9 +
 .../binop/vec_sat_u_add_imm_reconcile-5.c |  9 +
 .../binop/vec_sat_u_add_imm_reconcile-6.c |  9 +
 .../binop/vec_sat_u_add_imm_reconcile-7.c |  9 +
 .../binop/vec_sat_u_add_imm_reconcile-8.c |  9 +
 .../binop/vec_sat_u_add_imm_reconcile-9.c |  9 +
 .../riscv/rvv/autovec/vec_sat_arith.h | 20 ++
 gcc/tree-vect-patterns.cc | 38 +++
 17 files changed, 193 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-10.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-11.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-12.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-13.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-14.c
 create mode 100644 
gcc/testsuite/gcc.tar

[PATCH v1] RISC-V: Support IMM for operand 1 of ussub pattern

2024-08-26 Thread pan2 . li
From: Pan Li 

This patch would like to allow IMM for the operand 1 of ussub pattern.
Aka .SAT_SUB(x, 22) as the below example.

Form 2:
  #define DEF_SAT_U_SUB_IMM_FMT_2(T, IMM) \
  T __attribute__((noinline)) \
  sat_u_sub_imm##IMM##_##T##_fmt_2 (T x)  \
  {   \
return x >= (T)IMM ? x - (T)IMM : 0;  \
  }

DEF_SAT_U_SUB_IMM_FMT_2(uint64_t, 1022)

It is almost the as support imm for operand 0 of ussub pattern, but
allow the second operand to be imm insted of the first operand.

The below test suites are passed for this patch:
1. The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_expand_ussub): Gen xmode for the
second operand, aka y in parameter.
* config/riscv/riscv.md (ussub3): Allow const_int for operand 2.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_sub_imm-5.c: New test.
* gcc.target/riscv/sat_u_sub_imm-5_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-5_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-6.c: New test.
* gcc.target/riscv/sat_u_sub_imm-6_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-6_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-7.c: New test.
* gcc.target/riscv/sat_u_sub_imm-7_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-7_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-8.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-5.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-6.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-7.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-8.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc |  2 +-
 gcc/config/riscv/riscv.md |  2 +-
 gcc/testsuite/gcc.target/riscv/sat_arith.h|  9 +++
 .../gcc.target/riscv/sat_u_sub_imm-5.c| 19 +++
 .../gcc.target/riscv/sat_u_sub_imm-5_1.c  | 19 +++
 .../gcc.target/riscv/sat_u_sub_imm-5_2.c  | 19 +++
 .../gcc.target/riscv/sat_u_sub_imm-6.c| 20 +++
 .../gcc.target/riscv/sat_u_sub_imm-6_1.c  | 21 +++
 .../gcc.target/riscv/sat_u_sub_imm-6_2.c  | 22 
 .../gcc.target/riscv/sat_u_sub_imm-7.c| 19 +++
 .../gcc.target/riscv/sat_u_sub_imm-7_1.c  | 21 +++
 .../gcc.target/riscv/sat_u_sub_imm-7_2.c  | 22 
 .../gcc.target/riscv/sat_u_sub_imm-8.c| 18 ++
 .../gcc.target/riscv/sat_u_sub_imm-run-5.c| 55 +++
 .../gcc.target/riscv/sat_u_sub_imm-run-6.c| 55 +++
 .../gcc.target/riscv/sat_u_sub_imm-run-7.c| 54 ++
 .../gcc.target/riscv/sat_u_sub_imm-run-8.c| 48 
 17 files changed, 423 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-5_1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-5_2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-6_1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-6_2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7_1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7_2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-8.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-7.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-8.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 90a6e936558..1f544c1287e 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11965,7 +11965,7 @@ riscv_expand_ussub (rtx dest, rtx x, rtx y)
 {
   machine_mode mode = GET_MODE (dest);
   rtx xmode_x = riscv_gen_unsigned_xmode_reg (x, mode);
-  rtx xmode_y = gen_lowpart (Xmode, y);
+  rtx xmode_y = riscv_gen_unsigned_xmode_reg (y, mode);
   rtx xmode_lt = gen_reg_rtx (Xmode);
   rtx xmode_minus = gen_reg_rtx (Xmode);
   rtx xmode_dest = gen_reg_rtx (Xmode);
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index a94705a8e7c..3289ed2155a 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4370,7 +4370,7 @@ (define_expand "usadd3"
 (define_expand "ussub3"
   [(match_operand:ANYI 0 "register_operand")
(match_operand:ANYI 1 "reg_or_int_operand")
-   (match_operand:ANYI 2 "register_operand")]
+   (match_operand:ANYI 2 "reg_or_int_operand")]
   ""
   {
 riscv_expand_ussub (operands[0], operands[1], operands[2]);
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.

[PATCH v2] Match: Add int type fits check for .SAT_ADD imm operand

2024-08-26 Thread pan2 . li
From: Pan Li 

This patch would like to add strict check for imm operand of .SAT_ADD
matching.  We have no type checking for imm operand in previous, which
may result in unexpected IL to be catched by .SAT_ADD pattern.

We leverage the int_fits_type_p here to make sure the imm operand is
a int type fits the result type of the .SAT_ADD.  For example:

Fits uint8_t:
uint8_t a;
uint8_t sum = .SAT_ADD (a, 12);
uint8_t sum = .SAT_ADD (a, 12u);
uint8_t sum = .SAT_ADD (a, 126u);
uint8_t sum = .SAT_ADD (a, 128u);
uint8_t sum = .SAT_ADD (a, 228);
uint8_t sum = .SAT_ADD (a, 223u);

Not fits uint8_t:
uint8_t a;
uint8_t sum = .SAT_ADD (a, -1);
uint8_t sum = .SAT_ADD (a, 256u);
uint8_t sum = .SAT_ADD (a, 257);

The below test suite are passed for this patch:
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add int_fits_type_p check for .SAT_ADD imm operand.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_add_imm-11.c: Adjust test case for imm.
* gcc.target/riscv/sat_u_add_imm-11.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-12.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-15.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-16.c: Ditto.
* gcc.target/riscv/sat_u_add_imm_type_check-1.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-10.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-11.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-12.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-13.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-14.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-15.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-16.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-17.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-18.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-19.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-2.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-20.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-21.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-22.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-23.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-24.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-25.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-26.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-27.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-28.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-29.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-3.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-30.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-31.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-32.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-33.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-34.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-35.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-36.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-37.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-38.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-39.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-4.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-40.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-41.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-42.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-43.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-44.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-45.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-46.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-47.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-48.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-49.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-5.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-50.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-51.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-52.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-6.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-7.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-8.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-9.c: New test.

Signed-off-by: Pan Li 
---
 gcc/match.pd |  2 +-
 gcc/testsuite/gcc.target/riscv/sat_arith.h   | 16 ++

[PATCH v3] Match: Support form 1 for scalar signed integer .SAT_ADD

2024-08-25 Thread pan2 . li
From: Pan Li 

This patch would like to support the form 1 of the scalar signed
integer .SAT_ADD.  Aka below example:

Form 1:
  #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \
  T __attribute__((noinline))  \
  sat_s_add_##T##_fmt_1 (T x, T y) \
  {\
T sum = (UT)x + (UT)y; \
return (x ^ y) < 0 \
  ? sum\
  : (sum ^ x) >= 0 \
? sum  \
: x < 0 ? MIN : MAX;   \
  }

DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)

We can tell the difference before and after this patch if backend
implemented the ssadd3 pattern similar as below.

Before this patch:
   4   │ __attribute__((noinline))
   5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
   6   │ {
   7   │   int64_t sum;
   8   │   long unsigned int x.0_1;
   9   │   long unsigned int y.1_2;
  10   │   long unsigned int _3;
  11   │   long int _4;
  12   │   long int _5;
  13   │   int64_t _6;
  14   │   _Bool _11;
  15   │   long int _12;
  16   │   long int _13;
  17   │   long int _14;
  18   │   long int _16;
  19   │   long int _17;
  20   │
  21   │ ;;   basic block 2, loop depth 0
  22   │ ;;pred:   ENTRY
  23   │   x.0_1 = (long unsigned int) x_7(D);
  24   │   y.1_2 = (long unsigned int) y_8(D);
  25   │   _3 = x.0_1 + y.1_2;
  26   │   sum_9 = (int64_t) _3;
  27   │   _4 = x_7(D) ^ y_8(D);
  28   │   _5 = x_7(D) ^ sum_9;
  29   │   _17 = ~_4;
  30   │   _16 = _5 & _17;
  31   │   if (_16 < 0)
  32   │ goto ; [41.00%]
  33   │   else
  34   │ goto ; [59.00%]
  35   │ ;;succ:   3
  36   │ ;;4
  37   │
  38   │ ;;   basic block 3, loop depth 0
  39   │ ;;pred:   2
  40   │   _11 = x_7(D) < 0;
  41   │   _12 = (long int) _11;
  42   │   _13 = -_12;
  43   │   _14 = _13 ^ 9223372036854775807;
  44   │ ;;succ:   4
  45   │
  46   │ ;;   basic block 4, loop depth 0
  47   │ ;;pred:   2
  48   │ ;;3
  49   │   # _6 = PHI 
  50   │   return _6;
  51   │ ;;succ:   EXIT
  52   │
  53   │ }

After this patch:
   4   │ __attribute__((noinline))
   5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
   6   │ {
   7   │   int64_t _4;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;pred:   ENTRY
  11   │   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
  12   │   return _4;
  13   │ ;;succ:   EXIT
  14   │
  15   │ }

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add the matching for signed .SAT_ADD.
* tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
matching func decl.
(match_unsigned_saturation_add): Try signed .SAT_ADD and rename
to ...
(match_saturation_add): ... here.
(math_opts_dom_walker::after_dom_children): Update the above renamed
func from caller.

Signed-off-by: Pan Li 
---
 gcc/match.pd  | 18 ++
 gcc/tree-ssa-math-opts.cc | 35 ++-
 2 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 78f1957e8c7..b059e313415 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3192,6 +3192,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
   && types_match (type, @0
 
+/* Signed saturation add, case 1:
+   T sum = (UT)X + (UT)Y;
+   SAT_S_ADD = (X ^ Y) < 0
+ ? sum
+ : (sum ^ x) >= 0
+   ? sum
+   : x < 0 ? MIN : MAX;
+   T and UT are type pair like T=int8_t, UT=uint8_t.  */
+(match (signed_integer_sat_add @0 @1)
+ (cond^ (lt (bit_and:c (bit_xor:c @0 (convert@2 (plus:c (convert @0)
+   (convert @1
+  (bit_not (bit_xor:c @0 @1)))
+   integer_zerop)
+   (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
+   @2)
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
+
 /* Unsigned saturation sub, case 1 (branch with gt):
SAT_U_SUB = X > Y ? X - Y : 0  */
 (match (unsigned_integer_sat_sub @0 @1)
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 8d96a4c964b..3c93fca5b53 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, 
tree (*)(tree));
 extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
 extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
 
+extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
+
 static void
 build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, inte

[PATCH v1 2/2] RISC-V: Add testcases for unsigned vector .SAT_TRUNC form 4

2024-08-24 Thread pan2 . li
From: Pan Li 

This patch would like to add test cases for the unsigned vector
.SAT_TRUNC form 4.  Aka:

Form 4:
  #define DEF_VEC_SAT_U_TRUNC_FMT_4(NT, WT) \
  void __attribute__((noinline))\
  vec_sat_u_trunc_##NT##_##WT##_fmt_4 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
bool not_overflow = in[i] <= (WT)(NT)(-1);  \
out[i] = ((NT)in[i]) | (NT)((NT)not_overflow - 1);  \
  } \
  }

DEF_VEC_SAT_U_TRUNC_FMT_4 (uint32_t, uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-20.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-21.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-22.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-23.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-24.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-19.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-20.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-21.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-22.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-23.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-24.c: New test.

Signed-off-by: Pan Li 
---
 .../rvv/autovec/unop/vec_sat_u_trunc-19.c | 19 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-20.c | 21 +
 .../rvv/autovec/unop/vec_sat_u_trunc-21.c | 23 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-22.c | 19 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-23.c | 21 +
 .../rvv/autovec/unop/vec_sat_u_trunc-24.c | 19 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-run-19.c | 16 +
 .../rvv/autovec/unop/vec_sat_u_trunc-run-20.c | 16 +
 .../rvv/autovec/unop/vec_sat_u_trunc-run-21.c | 16 +
 .../rvv/autovec/unop/vec_sat_u_trunc-run-22.c | 16 +
 .../rvv/autovec/unop/vec_sat_u_trunc-run-23.c | 16 +
 .../rvv/autovec/unop/vec_sat_u_trunc-run-24.c | 16 +
 .../riscv/rvv/autovec/vec_sat_arith.h | 18 +++
 13 files changed, 236 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-20.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-21.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-22.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-23.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-24.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-19.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-20.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-21.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-22.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-23.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-24.c

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c
new file mode 100644
index 000..a80cefe46ab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../vec_sat_arith.h"
+
+/*
+** vec_sat_u_trunc_uint8_t_uint16_t_fmt_4:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*mf2,\s*ta,\s*ma
+** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0
+** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** ...
+*/
+DEF_VEC_SAT_U_TRUNC_FMT_4 (uint8_t, uint16_t)
+
+/* 

[PATCH v1 1/2] RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 4

2024-08-24 Thread pan2 . li
From: Pan Li 

This patch would like to add test cases for the unsigned scalar quad and
oct .SAT_TRUNC form 4.  Aka:

Form 4:
  #define DEF_SAT_U_TRUNC_FMT_4(NT, WT)  \
  NT __attribute__((noinline))   \
  sat_u_trunc_##WT##_to_##NT##_fmt_4 (WT x)  \
  {  \
bool not_overflow = x <= (WT)(NT)(-1);   \
return ((NT)x) | (NT)((NT)not_overflow - 1); \
  }

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_trunc-19.c: New test.
* gcc.target/riscv/sat_u_trunc-20.c: New test.
* gcc.target/riscv/sat_u_trunc-21.c: New test.
* gcc.target/riscv/sat_u_trunc-22.c: New test.
* gcc.target/riscv/sat_u_trunc-23.c: New test.
* gcc.target/riscv/sat_u_trunc-24.c: New test.
* gcc.target/riscv/sat_u_trunc-run-19.c: New test.
* gcc.target/riscv/sat_u_trunc-run-20.c: New test.
* gcc.target/riscv/sat_u_trunc-run-21.c: New test.
* gcc.target/riscv/sat_u_trunc-run-22.c: New test.
* gcc.target/riscv/sat_u_trunc-run-23.c: New test.
* gcc.target/riscv/sat_u_trunc-run-24.c: New test.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 12 +++
 .../gcc.target/riscv/sat_u_trunc-19.c | 17 
 .../gcc.target/riscv/sat_u_trunc-20.c | 20 +++
 .../gcc.target/riscv/sat_u_trunc-21.c | 19 ++
 .../gcc.target/riscv/sat_u_trunc-22.c | 17 
 .../gcc.target/riscv/sat_u_trunc-23.c | 17 
 .../gcc.target/riscv/sat_u_trunc-24.c | 20 +++
 .../gcc.target/riscv/sat_u_trunc-run-19.c | 16 +++
 .../gcc.target/riscv/sat_u_trunc-run-20.c | 16 +++
 .../gcc.target/riscv/sat_u_trunc-run-21.c | 16 +++
 .../gcc.target/riscv/sat_u_trunc-run-22.c | 16 +++
 .../gcc.target/riscv/sat_u_trunc-run-23.c | 16 +++
 .../gcc.target/riscv/sat_u_trunc-run-24.c | 16 +++
 13 files changed, 218 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-19.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-20.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-21.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-22.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-23.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-24.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-19.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-20.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-21.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-22.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-23.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-24.c

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 91853b60f59..229e1f0a5cd 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -245,6 +245,15 @@ sat_u_trunc_##WT##_to_##NT##_fmt_3 (WT x) \
 }
 #define DEF_SAT_U_TRUNC_FMT_3_WRAP(NT, WT) DEF_SAT_U_TRUNC_FMT_3(NT, WT)
 
+#define DEF_SAT_U_TRUNC_FMT_4(NT, WT)  \
+NT __attribute__((noinline))   \
+sat_u_trunc_##WT##_to_##NT##_fmt_4 (WT x)  \
+{  \
+  bool not_overflow = x <= (WT)(NT)(-1);   \
+  return ((NT)x) | (NT)((NT)not_overflow - 1); \
+}
+#define DEF_SAT_U_TRUNC_FMT_4_WRAP(NT, WT) DEF_SAT_U_TRUNC_FMT_4(NT, WT)
+
 #define RUN_SAT_U_TRUNC_FMT_1(NT, WT, x) sat_u_trunc_##WT##_to_##NT##_fmt_1 (x)
 #define RUN_SAT_U_TRUNC_FMT_1_WRAP(NT, WT, x) RUN_SAT_U_TRUNC_FMT_1(NT, WT, x)
 
@@ -254,4 +263,7 @@ sat_u_trunc_##WT##_to_##NT##_fmt_3 (WT x) \
 #define RUN_SAT_U_TRUNC_FMT_3(NT, WT, x) sat_u_trunc_##WT##_to_##NT##_fmt_3 (x)
 #define RUN_SAT_U_TRUNC_FMT_3_WRAP(NT, WT, x) RUN_SAT_U_TRUNC_FMT_3(NT, WT, x)
 
+#define RUN_SAT_U_TRUNC_FMT_4(NT, WT, x) sat_u_trunc_##WT##_to_##NT##_fmt_4 (x)
+#define RUN_SAT_U_TRUNC_FMT_4_WRAP(NT, WT, x) RUN_SAT_U_TRUNC_FMT_4(NT, WT, x)
+
 #endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-19.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-19.c
new file mode 100644
index 000..e61faffbbc6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-19.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_trunc_uint16_t_to_uint8_t_fmt_4:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx]

[PATCH v1] Match: Add type check for .SAT_ADD imm operand

2024-08-24 Thread pan2 . li
From: Pan Li 

This patch would like to add strict check for imm operand of .SAT_ADD
matching.  We have no type checking for imm operand in previous,  which
may result in unexpected IL to be catched by .SAT_ADD pattern.

However,  things may become more complicated due to the int promotion.
This means any const_int without any suffix will be promoted to int
before matching.  For example as below.

uint8_t a;
uint8_t sum = .SAT_ADD (a, 12);

The second operand will be (const_int 12) with int type when try to
match .SAT_ADD.  Thus,  to support int8/int16 .SAT_ADD,  only the
int32 and int64 will be strictly checked.

The below test suite are passed for this patch:
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd:
* match.pd: Add strict type check for .SAT_ADD imm operand.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_add_imm-11.c: Adjust test case for imm.
* gcc.target/riscv/sat_u_add_imm-12.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-15.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-16.c: Ditto.
* gcc.target/riscv/sat_u_add_imm_type_check-1.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-2.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-3.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-4.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-5.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-6.c: New test.

Signed-off-by: Pan Li 
---
 gcc/match.pd  | 11 ++-
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c |  4 ++--
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-12.c |  4 ++--
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-15.c |  4 ++--
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-16.c |  4 ++--
 .../gcc.target/riscv/sat_u_add_imm_type_check-1.c |  9 +
 .../gcc.target/riscv/sat_u_add_imm_type_check-2.c |  9 +
 .../gcc.target/riscv/sat_u_add_imm_type_check-3.c |  9 +
 .../gcc.target/riscv/sat_u_add_imm_type_check-4.c |  9 +
 .../gcc.target/riscv/sat_u_add_imm_type_check-5.c |  9 +
 .../gcc.target/riscv/sat_u_add_imm_type_check-6.c |  9 +
 11 files changed, 72 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-6.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 65a3aae2243..f695790629e 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3190,7 +3190,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (cond^ (ne (imagpart (IFN_ADD_OVERFLOW@2 @0 INTEGER_CST@1)) integer_zerop)
   integer_minus_onep (realpart @2))
   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
-  && types_match (type, @0
+   && types_match (type, @0))
+   (with
+{
+ unsigned precision = TYPE_PRECISION (type);
+ unsigned int_precision = HOST_BITS_PER_INT;
+}
+/* The const_int will perform int promotion,  the const_int will have at
+   least the int_precision.  Thus, type less than int_precision will be
+   skipped the type match checking.  */
+(if (precision < int_precision || types_match (type, @1))
 
 /* Unsigned saturation sub, case 1 (branch with gt):
SAT_U_SUB = X > Y ? X - Y : 0  */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c
index 43f34b5f3c9..a246e9b1857 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c
@@ -5,7 +5,7 @@
 #include "sat_arith.h"
 
 /*
-** sat_u_add_imm7_uint32_t_fmt_3:
+** sat_u_add_imm7u_uint32_t_fmt_3:
 ** slli\s+[atx][0-9]+,\s*a0,\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
 ** addi\s+[atx][0-9]+,\s*a0,\s*7
@@ -17,6 +17,6 @@
 ** sext.w\s+a0,\s*a0
 ** ret
 */
-DEF_SAT_U_ADD_IMM_FMT_3(uint32_t, 7)
+DEF_SAT_U_ADD_IMM_FMT_3(uint32_t, 7u)
 
 /* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-12.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-12.c
index 561c127f5fa..143f14c3af0 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-12.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-12.c
@@ -5,13 +5,13 @@
 #include "sat_arith.h"
 
 /*
-** sat_u_add_imm8_uint64_t_fmt_3:
+** sat_u_add_imm8ull_uint64_t_fmt_3:
 ** addi\s+[atx][0-9]+,\s*a0,\s*8
 ** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
 ** neg\s+[atx][0-9]+,\s*[atx][0-9]+
 ** or\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+
 *

[PATCH v1 2/2] RISC-V: Add testcases for unsigned vector .SAT_TRUNC form 3

2024-08-21 Thread pan2 . li
From: Pan Li 

This patch would like to add test cases for the unsigned vector
.SAT_TRUNC form 3.  Aka:

Form 3:
  #define DEF_VEC_SAT_U_TRUNC_FMT_3(NT, WT) \
  void __attribute__((noinline))\
  vec_sat_u_trunc_##NT##_##WT##_fmt_3 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT max = (WT)(NT)-1;\
out[i] = in[i] <= max ? (NT)in[i] : (NT)max;\
  } \
  }

DEF_VEC_SAT_U_TRUNC_FMT_3 (uint32_t, uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-14.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-15.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-16.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-17.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-18.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-13.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-14.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-15.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-16.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-17.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-18.c: New test.

Signed-off-by: Pan Li 
---
 .../rvv/autovec/unop/vec_sat_u_trunc-13.c | 19 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-14.c | 21 +
 .../rvv/autovec/unop/vec_sat_u_trunc-15.c | 23 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-16.c | 19 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-17.c | 21 +
 .../rvv/autovec/unop/vec_sat_u_trunc-18.c | 19 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-run-13.c | 16 +
 .../rvv/autovec/unop/vec_sat_u_trunc-run-14.c | 16 +
 .../rvv/autovec/unop/vec_sat_u_trunc-run-15.c | 16 +
 .../rvv/autovec/unop/vec_sat_u_trunc-run-16.c | 16 +
 .../rvv/autovec/unop/vec_sat_u_trunc-run-17.c | 16 +
 .../rvv/autovec/unop/vec_sat_u_trunc-run-18.c | 16 +
 .../riscv/rvv/autovec/vec_sat_arith.h | 18 +++
 13 files changed, 236 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-14.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-15.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-16.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-17.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-18.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-13.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-14.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-15.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-16.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-17.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-18.c

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c
new file mode 100644
index 000..49bdbdc3606
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../vec_sat_arith.h"
+
+/*
+** vec_sat_u_trunc_uint8_t_uint16_t_fmt_3:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*mf2,\s*ta,\s*ma
+** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0
+** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** ...
+*/
+DEF_VEC_SAT_U_TRUNC_FMT_3 (uint8_t, uint16_t)
+
+/* 

[PATCH v1 1/2] RISC-V: Add testcases for unsigned vector .SAT_TRUNC form 2

2024-08-21 Thread pan2 . li
From: Pan Li 

This patch would like to add test cases for the unsigned vector
.SAT_TRUNC form 2.  Aka:

Form 2:
  #define DEF_VEC_SAT_U_TRUNC_FMT_2(NT, WT) \
  void __attribute__((noinline))\
  vec_sat_u_trunc_##NT##_##WT##_fmt_2 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT max = (WT)(NT)-1;\
out[i] = in[i] > max ? (NT)max : (NT)in[i]; \
  } \
  }

DEF_VEC_SAT_U_TRUNC_FMT_2 (uint32_t, uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-11.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-12.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-7.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-8.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-9.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-10.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-11.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-12.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-7.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-8.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-9.c: New test.

Signed-off-by: Pan Li 
---
 .../rvv/autovec/unop/vec_sat_u_trunc-10.c | 19 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-11.c | 21 +
 .../rvv/autovec/unop/vec_sat_u_trunc-12.c | 19 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-7.c  | 19 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-8.c  | 21 +
 .../rvv/autovec/unop/vec_sat_u_trunc-9.c  | 23 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-run-10.c | 16 +
 .../rvv/autovec/unop/vec_sat_u_trunc-run-11.c | 16 +
 .../rvv/autovec/unop/vec_sat_u_trunc-run-12.c | 16 +
 .../rvv/autovec/unop/vec_sat_u_trunc-run-7.c  | 16 +
 .../rvv/autovec/unop/vec_sat_u_trunc-run-8.c  | 16 +
 .../rvv/autovec/unop/vec_sat_u_trunc-run-9.c  | 16 +
 .../riscv/rvv/autovec/vec_sat_arith.h | 18 +++
 13 files changed, 236 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-11.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-12.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-7.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-8.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-9.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-10.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-11.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-12.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-7.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-8.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-9.c

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c
new file mode 100644
index 000..f5084e503eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../vec_sat_arith.h"
+
+/*
+** vec_sat_u_trunc_uint16_t_uint32_t_fmt_2:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*mf2,\s*ta,\s*ma
+** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0
+** vse16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** ...
+*/
+DEF_VEC_SAT_U_TRUNC_FMT_2 (uint16_t, uint32_t)
+
+/* { dg-fin

[PATCH v1] RISC-V: Fix one typo in .SAT_TRUNC test func name [NFC]

2024-08-20 Thread pan2 . li
From: Pan Li 

Fix one typo `sat_truc` to `sat_trunc`, as well as `SAT_TRUC` to `SAT_TRUNC`.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Fix SAT_TRUNC typo.
* gcc.target/riscv/sat_u_trunc-1.c: Ditto.
* gcc.target/riscv/sat_u_trunc-13.c: Ditto.
* gcc.target/riscv/sat_u_trunc-14.c: Ditto.
* gcc.target/riscv/sat_u_trunc-15.c: Ditto.
* gcc.target/riscv/sat_u_trunc-2.c: Ditto.
* gcc.target/riscv/sat_u_trunc-3.c: Ditto.
* gcc.target/riscv/sat_u_trunc-4.c: Ditto.
* gcc.target/riscv/sat_u_trunc-5.c: Ditto.
* gcc.target/riscv/sat_u_trunc-6.c: Ditto.
* gcc.target/riscv/sat_u_trunc-7.c: Ditto.
* gcc.target/riscv/sat_u_trunc-8.c: Ditto.
* gcc.target/riscv/sat_u_trunc-9.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-1.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-13.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-14.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-15.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-2.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-3.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-4.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-5.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-6.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-7.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-8.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-9.c: Ditto.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 30 +--
 .../gcc.target/riscv/sat_u_trunc-1.c  |  4 +--
 .../gcc.target/riscv/sat_u_trunc-13.c |  4 +--
 .../gcc.target/riscv/sat_u_trunc-14.c |  4 +--
 .../gcc.target/riscv/sat_u_trunc-15.c |  4 +--
 .../gcc.target/riscv/sat_u_trunc-2.c  |  4 +--
 .../gcc.target/riscv/sat_u_trunc-3.c  |  4 +--
 .../gcc.target/riscv/sat_u_trunc-4.c  |  4 +--
 .../gcc.target/riscv/sat_u_trunc-5.c  |  4 +--
 .../gcc.target/riscv/sat_u_trunc-6.c  |  4 +--
 .../gcc.target/riscv/sat_u_trunc-7.c  |  4 +--
 .../gcc.target/riscv/sat_u_trunc-8.c  |  4 +--
 .../gcc.target/riscv/sat_u_trunc-9.c  |  4 +--
 .../gcc.target/riscv/sat_u_trunc-run-1.c  |  4 +--
 .../gcc.target/riscv/sat_u_trunc-run-13.c |  4 +--
 .../gcc.target/riscv/sat_u_trunc-run-14.c |  4 +--
 .../gcc.target/riscv/sat_u_trunc-run-15.c |  4 +--
 .../gcc.target/riscv/sat_u_trunc-run-2.c  |  4 +--
 .../gcc.target/riscv/sat_u_trunc-run-3.c  |  4 +--
 .../gcc.target/riscv/sat_u_trunc-run-4.c  |  4 +--
 .../gcc.target/riscv/sat_u_trunc-run-5.c  |  4 +--
 .../gcc.target/riscv/sat_u_trunc-run-6.c  |  4 +--
 .../gcc.target/riscv/sat_u_trunc-run-7.c  |  4 +--
 .../gcc.target/riscv/sat_u_trunc-run-8.c  |  4 +--
 .../gcc.target/riscv/sat_u_trunc-run-9.c  |  4 +--
 25 files changed, 63 insertions(+), 63 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index cf055410fd1..91853b60f59 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -218,40 +218,40 @@ sat_u_sub_##T##_fmt_12 (T x, T y)  \
 /* Saturation Truncate (unsigned and signed)  
*/
 
/**/
 
-#define DEF_SAT_U_TRUC_FMT_1(NT, WT) \
+#define DEF_SAT_U_TRUNC_FMT_1(NT, WT)\
 NT __attribute__((noinline)) \
-sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
+sat_u_trunc_##WT##_to_##NT##_fmt_1 (WT x) \
 {\
   bool overflow = x > (WT)(NT)(-1);  \
   return ((NT)x) | (NT)-overflow;\
 }
-#define DEF_SAT_U_TRUC_FMT_1_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_1(NT, WT)
+#define DEF_SAT_U_TRUNC_FMT_1_WRAP(NT, WT) DEF_SAT_U_TRUNC_FMT_1(NT, WT)
 
-#define DEF_SAT_U_TRUC_FMT_2(NT, WT) \
+#define DEF_SAT_U_TRUNC_FMT_2(NT, WT)\
 NT __attribute__((noinline)) \
-sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \
+sat_u_trunc_##WT##_to_##NT##_fmt_2 (WT x) \
 {\
   WT max = (WT)(NT)-1;   \
   return x > max ? (NT) max : (NT)x; \
 }
-#define DEF_SAT_U_TRUC_FMT_2_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_2(NT, WT)
+#define DEF_SAT_U_TRUNC_FMT_2_WRAP(NT, WT) DEF_SAT_U_TRUNC_FMT_2(NT, WT)
 
-#define DEF_SAT_U_TRUC_FMT_3(NT, WT) \
+#define DEF_SAT_U_TRUNC_FMT_3(NT, WT)\
 NT __attribute__((noinline)) \
-sat_u_truc_##WT##_to_##NT##_fmt_3 (WT x) \
+sat_u_trunc_##WT##_to_##NT##_fmt_3 (WT x) \
 {\
   WT max = (WT)(NT)-1;   \
   return x <= max ? (NT)x : (NT) max;\
 }
-#define DEF_SAT_U_TRUC_FMT_3_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_3(NT, WT)
+#define DEF_SAT_U_TRUNC_FMT_3_WRAP(NT, WT) DEF_SAT_U_TRUNC_FMT_3(NT, WT)
 
-#define RUN_SAT_U_TRUC_FMT_1(NT, WT, x) sat_u

[PATCH v1] Match: Support form 4 for unsigned integer .SAT_TRUNC

2024-08-20 Thread pan2 . li
From: Pan Li 

This patch would like to support the form 4 of the unsigned integer
.SAT_TRUNC. Aka below example:

Form 4:
  #define DEF_SAT_U_TRUC_FMT_4(NT, WT)   \
  NT __attribute__((noinline))   \
  sat_u_truc_##WT##_to_##NT##_fmt_4 (WT x)   \
  {  \
bool not_overflow = x <= (WT)(NT)(-1);   \
return ((NT)x) | (NT)((NT)not_overflow - 1); \
  }

DEF_SAT_U_TRUC_FMT_4(uint32_t, uint64_t)

Before this patch:
   4   │ __attribute__((noinline))
   5   │ uint8_t sat_u_truc_uint32_t_to_uint8_t_fmt_4 (uint32_t x)
   6   │ {
   7   │   _Bool not_overflow;
   8   │   unsigned char _1;
   9   │   unsigned char _2;
  10   │   unsigned char _3;
  11   │   uint8_t _6;
  12   │
  13   │ ;;   basic block 2, loop depth 0
  14   │ ;;pred:   ENTRY
  15   │   not_overflow_5 = x_4(D) <= 255;
  16   │   _1 = (unsigned char) x_4(D);
  17   │   _2 = (unsigned char) not_overflow_5;
  18   │   _3 = _2 + 255;
  19   │   _6 = _1 | _3;
  20   │   return _6;
  21   │ ;;succ:   EXIT
  22   │
  23   │ }

After this patch:
   4   │ __attribute__((noinline))
   5   │ uint8_t sat_u_truc_uint32_t_to_uint8_t_fmt_4 (uint32_t x)
   6   │ {
   7   │   uint8_t _6;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;pred:   ENTRY
  11   │   _6 = .SAT_TRUNC (x_4(D)); [tail call]
  12   │   return _6;
  13   │ ;;succ:   EXIT
  14   │
  15   │ }

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add form 4 for unsigned .SAT_TRUNC matching.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index c9c8478d286..5a2f777e65f 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3311,6 +3311,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   }
   (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst))
 
+/* Unsigned saturation truncate, case 3, sizeof (WT) > sizeof (NT).
+   SAT_U_TRUNC = (NT)X | ((NT)(X <= (WT)-1) + (NT)-1).  */
+(match (unsigned_integer_sat_trunc @0)
+ (bit_ior:c (plus:c (convert (le @0 INTEGER_CST@1)) INTEGER_CST@2)
+   (convert @0))
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+  && TYPE_UNSIGNED (TREE_TYPE (@0)))
+ (with
+  {
+   unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0));
+   unsigned otype_precision = TYPE_PRECISION (type);
+   wide_int trunc_max = wi::mask (otype_precision, false, itype_precision);
+   wide_int max = wi::mask (otype_precision, false, otype_precision);
+   wide_int int_cst_1 = wi::to_wide (@1);
+   wide_int int_cst_2 = wi::to_wide (@2);
+  }
+  (if (wi::eq_p (trunc_max, int_cst_1) && wi::eq_p (max, int_cst_2))
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
-- 
2.43.0



[PATCH v3] RISC-V: Support IMM for operand 0 of ussub pattern

2024-08-18 Thread pan2 . li
From: Pan Li 

This patch would like to allow IMM for the operand 0 of ussub pattern.
Aka .SAT_SUB(1023, y) as the below example.

Form 1:
  #define DEF_SAT_U_SUB_IMM_FMT_1(T, IMM) \
  T __attribute__((noinline)) \
  sat_u_sub_imm##IMM##_##T##_fmt_1 (T y)  \
  {   \
return (T)IMM >= y ? (T)IMM - y : 0;  \
  }

DEF_SAT_U_SUB_IMM_FMT_1(uint64_t, 1023)

Before this patch:
  10   │ sat_u_sub_imm82_uint64_t_fmt_1:
  11   │ li  a5,82
  12   │ bgtua0,a5,.L3
  13   │ sub a0,a5,a0
  14   │ ret
  15   │ .L3:
  16   │ li  a0,0
  17   │ ret

After this patch:
  10   │ sat_u_sub_imm82_uint64_t_fmt_1:
  11   │ li  a5,82
  12   │ sltua4,a5,a0
  13   │ addia4,a4,-1
  14   │ sub a0,a5,a0
  15   │ and a0,a4,a0
  16   │ ret

The below test suites are passed for this patch:
1. The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_gen_unsigned_xmode_reg): Add new
func impl to gen xmode rtx reg from operand rtx.
(riscv_expand_ussub): Gen xmode reg for operand 1.
* config/riscv/riscv.md: Allow const_int for operand 1.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macro.
* gcc.target/riscv/sat_u_sub_imm-1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-1_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-1_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-2_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-2_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-3.c: New test.
* gcc.target/riscv/sat_u_sub_imm-3_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-3_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-4.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-3.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-4.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc | 46 ++-
 gcc/config/riscv/riscv.md |  2 +-
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 10 
 .../gcc.target/riscv/sat_u_sub_imm-1.c| 20 +++
 .../gcc.target/riscv/sat_u_sub_imm-1_1.c  | 20 +++
 .../gcc.target/riscv/sat_u_sub_imm-1_2.c  | 20 +++
 .../gcc.target/riscv/sat_u_sub_imm-2.c| 21 +++
 .../gcc.target/riscv/sat_u_sub_imm-2_1.c  | 21 +++
 .../gcc.target/riscv/sat_u_sub_imm-2_2.c  | 22 
 .../gcc.target/riscv/sat_u_sub_imm-3.c| 20 +++
 .../gcc.target/riscv/sat_u_sub_imm-3_1.c  | 21 +++
 .../gcc.target/riscv/sat_u_sub_imm-3_2.c  | 22 
 .../gcc.target/riscv/sat_u_sub_imm-4.c| 19 +++
 .../gcc.target/riscv/sat_u_sub_imm-run-1.c| 56 +++
 .../gcc.target/riscv/sat_u_sub_imm-run-2.c| 56 +++
 .../gcc.target/riscv/sat_u_sub_imm-run-3.c| 55 ++
 .../gcc.target/riscv/sat_u_sub_imm-run-4.c| 48 
 17 files changed, 477 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-4.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index f266c45ed4d..5e6f3ba10e4 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11893,6 +11893,50 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y)
   emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
 }
 
+/* Generate a REG rtx of Xmode from the given rtx and mode.
+   The rtx x can be REG (QI/HI/SI/DI) or const_int.
+   The machine_mode mode is the original mode from define pattern.
+
+   If rtx is REG,  the gen_lowpart of Xmode will be returned.
+
+   If rtx is const_int,  a new REG rtx will be created to hold the value of
+   const_int and then returned.
+
+   According to the gccint doc, the constants generated for modes with fewer
+   bits than in HOST_WIDE_INT 

[PATCH v2] Test: Move pr116278 run test to dg/torture [NFC]

2024-08-18 Thread pan2 . li
From: Pan Li 

Move the run test of pr116278 to dg/torture and leave the risc-v the
asm check under risc-v part.

PR target/116278

gcc/testsuite/ChangeLog:

* gcc.target/riscv/pr116278-run-1.c: Take compile instead of run.
* gcc.target/riscv/pr116278-run-2.c: Ditto.
* gcc.dg/torture/pr116278-run-1.c: New test.
* gcc.dg/torture/pr116278-run-2.c: New test.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.dg/torture/pr116278-run-1.c | 19 +++
 gcc/testsuite/gcc.dg/torture/pr116278-run-2.c | 19 +++
 .../gcc.target/riscv/pr116278-run-1.c |  2 +-
 .../gcc.target/riscv/pr116278-run-2.c |  2 +-
 4 files changed, 40 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr116278-run-1.c
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr116278-run-2.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr116278-run-1.c 
b/gcc/testsuite/gcc.dg/torture/pr116278-run-1.c
new file mode 100644
index 000..8e07fb6af29
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116278-run-1.c
@@ -0,0 +1,19 @@
+/* { dg-do run } */
+/* { dg-require-effective-target int32 } */
+/* { dg-options "-O2" } */
+
+#include 
+
+int8_t b[1];
+int8_t *d = b;
+int32_t c;
+
+int main() {
+  b[0] = -40;
+  uint16_t t = (uint16_t)d[0];
+
+  c = (t < 0xFFF6 ? t : 0xFFF6) + 9;
+
+  if (c != 65505)
+__builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.dg/torture/pr116278-run-2.c 
b/gcc/testsuite/gcc.dg/torture/pr116278-run-2.c
new file mode 100644
index 000..d85e21531e1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116278-run-2.c
@@ -0,0 +1,19 @@
+/* { dg-do run } */
+/* { dg-require-effective-target int32 } */
+/* { dg-options "-O2" } */
+
+#include 
+
+int16_t b[1];
+int16_t *d = b;
+int64_t c;
+
+int main() {
+  b[0] = -40;
+  uint32_t t = (uint32_t)d[0];
+
+  c = (t < 0xFFF6u ? t : 0xFFF6u) + 9;
+
+  if (c != 4294967265)
+__builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c 
b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c
index d3812bdcdfb..c758fca7975 100644
--- a/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c
+++ b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c
@@ -1,4 +1,4 @@
-/* { dg-do run { target { riscv_v } } } */
+/* { dg-do compile } */
 /* { dg-options "-O2 -fdump-rtl-expand-details" } */
 
 #include 
diff --git a/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c 
b/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c
index 669cd4f003f..a4da8a323f0 100644
--- a/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c
+++ b/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c
@@ -1,4 +1,4 @@
-/* { dg-do run { target { riscv_v } } } */
+/* { dg-do compile } */
 /* { dg-options "-O2 -fdump-rtl-expand-details" } */
 
 #include 
-- 
2.43.0



[PATCH v1 1/2] RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 2

2024-08-18 Thread pan2 . li
From: Pan Li 

This patch would like to add test cases for the unsigned scalar quad and
oct .SAT_TRUNC form 2.  Aka:

Form 2:
  #define DEF_SAT_U_TRUC_FMT_2(NT, WT) \
  NT __attribute__((noinline)) \
  sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \
  {\
WT max = (WT)(NT)-1;   \
return x > max ? (NT) max : (NT)x; \
  }

QUAD:
DEF_SAT_U_TRUC_FMT_2 (uint16_t, uint64_t)
DEF_SAT_U_TRUC_FMT_2 (uint8_t, uint32_t)

OCT:
DEF_SAT_U_TRUC_FMT_2 (uint8_t, uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_trunc-10.c: New test.
* gcc.target/riscv/sat_u_trunc-11.c: New test.
* gcc.target/riscv/sat_u_trunc-12.c: New test.
* gcc.target/riscv/sat_u_trunc-run-10.c: New test.
* gcc.target/riscv/sat_u_trunc-run-11.c: New test.
* gcc.target/riscv/sat_u_trunc-run-12.c: New test.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/sat_u_trunc-10.c | 17 
 .../gcc.target/riscv/sat_u_trunc-11.c | 17 
 .../gcc.target/riscv/sat_u_trunc-12.c | 20 +++
 .../gcc.target/riscv/sat_u_trunc-run-10.c | 16 +++
 .../gcc.target/riscv/sat_u_trunc-run-11.c | 16 +++
 .../gcc.target/riscv/sat_u_trunc-run-12.c | 16 +++
 6 files changed, 102 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-11.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-12.c

diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c
new file mode 100644
index 000..7dfc740c54f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint32_t_to_uint8_t_fmt_2:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_2(uint8_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c
new file mode 100644
index 000..c50ae96f47d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint64_t_to_uint8_t_fmt_2:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_2(uint8_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c
new file mode 100644
index 000..61331cee6fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint64_t_to_uint16_t_fmt_2:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_2(uint16_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c
new file mode 100644
index 000..4bc9303e457
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint32_t
+
+DEF_SAT_U_TRUC_FMT_2_WRAP(T1, T2)
+
+#define DATA   TEST_UNARY_DATA_WRAP(T1, T2)
+#define T 

[PATCH v1] Test: Move pr116278 run test to c-torture [NFC]

2024-08-18 Thread pan2 . li
From: Pan Li 

Move the run test of pr116278 to c-torture and leave the risc-v the
asm check under risc-v part.

PR target/116278

gcc/testsuite/ChangeLog:

* gcc.target/riscv/pr116278-run-1.c: Take compile instead of
run test.
* gcc.target/riscv/pr116278-run-2.c: Ditto.
* gcc.c-torture/execute/pr116278-run-1.c: New test.
* gcc.c-torture/execute/pr116278-run-2.c: New test.

Signed-off-by: Pan Li 
---
 .../gcc.c-torture/execute/pr116278-run-1.c | 18 ++
 .../gcc.c-torture/execute/pr116278-run-2.c | 18 ++
 .../gcc.target/riscv/pr116278-run-1.c  |  2 +-
 .../gcc.target/riscv/pr116278-run-2.c  |  2 +-
 4 files changed, 38 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr116278-run-1.c
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr116278-run-2.c

diff --git a/gcc/testsuite/gcc.c-torture/execute/pr116278-run-1.c 
b/gcc/testsuite/gcc.c-torture/execute/pr116278-run-1.c
new file mode 100644
index 000..fa5340c9d58
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr116278-run-1.c
@@ -0,0 +1,18 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include 
+
+int8_t b[1];
+int8_t *d = b;
+int32_t c;
+
+int main() {
+  b[0] = -40;
+  uint16_t t = (uint16_t)d[0];
+
+  c = (t < 0xFFF6 ? t : 0xFFF6) + 9;
+
+  if (c != 65505)
+__builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr116278-run-2.c 
b/gcc/testsuite/gcc.c-torture/execute/pr116278-run-2.c
new file mode 100644
index 000..65439d614a1
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr116278-run-2.c
@@ -0,0 +1,18 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include 
+
+int16_t b[1];
+int16_t *d = b;
+int64_t c;
+
+int main() {
+  b[0] = -40;
+  uint32_t t = (uint32_t)d[0];
+
+  c = (t < 0xFFF6u ? t : 0xFFF6u) + 9;
+
+  if (c != 4294967265)
+__builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c 
b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c
index d3812bdcdfb..c758fca7975 100644
--- a/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c
+++ b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c
@@ -1,4 +1,4 @@
-/* { dg-do run { target { riscv_v } } } */
+/* { dg-do compile } */
 /* { dg-options "-O2 -fdump-rtl-expand-details" } */
 
 #include 
diff --git a/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c 
b/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c
index 669cd4f003f..a4da8a323f0 100644
--- a/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c
+++ b/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c
@@ -1,4 +1,4 @@
-/* { dg-do run { target { riscv_v } } } */
+/* { dg-do compile } */
 /* { dg-options "-O2 -fdump-rtl-expand-details" } */
 
 #include 
-- 
2.43.0



[PATCH v1 2/2] RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 3

2024-08-17 Thread pan2 . li
From: Pan Li 

This patch would like to add test cases for the unsigned scalar quad and
oct .SAT_TRUNC form 3.  Aka:

Form 3:
  #define DEF_SAT_U_TRUC_FMT_3(NT, WT) \
  NT __attribute__((noinline)) \
  sat_u_truc_##WT##_to_##NT##_fmt_3 (WT x) \
  {\
WT max = (WT)(NT)-1;   \
return x <= max ? (NT)x : (NT) max;\
  }

QUAD:
DEF_SAT_U_TRUC_FMT_3 (uint16_t, uint64_t)
DEF_SAT_U_TRUC_FMT_3 (uint8_t, uint32_t)

OCT:
DEF_SAT_U_TRUC_FMT_3 (uint8_t, uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_trunc-16.c: New test.
* gcc.target/riscv/sat_u_trunc-17.c: New test.
* gcc.target/riscv/sat_u_trunc-18.c: New test.
* gcc.target/riscv/sat_u_trunc-run-16.c: New test.
* gcc.target/riscv/sat_u_trunc-run-17.c: New test.
* gcc.target/riscv/sat_u_trunc-run-18.c: New test.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/sat_u_trunc-16.c | 17 
 .../gcc.target/riscv/sat_u_trunc-17.c | 17 
 .../gcc.target/riscv/sat_u_trunc-18.c | 20 +++
 .../gcc.target/riscv/sat_u_trunc-run-16.c | 16 +++
 .../gcc.target/riscv/sat_u_trunc-run-17.c | 16 +++
 .../gcc.target/riscv/sat_u_trunc-run-18.c | 16 +++
 6 files changed, 102 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-17.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-18.c

diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c
new file mode 100644
index 000..3ee7dc03ade
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint32_t_to_uint8_t_fmt_3:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_3(uint8_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c
new file mode 100644
index 000..975853712cd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint64_t_to_uint8_t_fmt_3:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_3(uint8_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c
new file mode 100644
index 000..11e34ae6fd2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint64_t_to_uint16_t_fmt_3:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_3(uint16_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c
new file mode 100644
index 000..3edcf137a79
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint32_t
+
+DEF_SAT_U_TRUC_FMT_3_WRAP(T1, T2)
+
+#define DATA   TEST_UNARY_DATA_WRAP(T1, T2)
+#define T 

[PATCH v1 1/2] RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 2

2024-08-17 Thread pan2 . li
From: Pan Li 

This patch would like to add test cases for the unsigned scalar quad and
oct .SAT_TRUNC form 2.  Aka:

Form 2:
  #define DEF_SAT_U_TRUC_FMT_2(NT, WT) \
  NT __attribute__((noinline)) \
  sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \
  {\
WT max = (WT)(NT)-1;   \
return x > max ? (NT) max : (NT)x; \
  }

QUAD:
DEF_SAT_U_TRUC_FMT_2 (uint16_t, uint64_t)
DEF_SAT_U_TRUC_FMT_2 (uint8_t, uint32_t)

OCT:
DEF_SAT_U_TRUC_FMT_2 (uint8_t, uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_trunc-10.c: New test.
* gcc.target/riscv/sat_u_trunc-11.c: New test.
* gcc.target/riscv/sat_u_trunc-12.c: New test.
* gcc.target/riscv/sat_u_trunc-run-10.c: New test.
* gcc.target/riscv/sat_u_trunc-run-11.c: New test.
* gcc.target/riscv/sat_u_trunc-run-12.c: New test.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/sat_u_trunc-10.c | 17 
 .../gcc.target/riscv/sat_u_trunc-11.c | 17 
 .../gcc.target/riscv/sat_u_trunc-12.c | 20 +++
 .../gcc.target/riscv/sat_u_trunc-run-10.c | 16 +++
 .../gcc.target/riscv/sat_u_trunc-run-11.c | 16 +++
 .../gcc.target/riscv/sat_u_trunc-run-12.c | 16 +++
 6 files changed, 102 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-11.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-12.c

diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c
new file mode 100644
index 000..7dfc740c54f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint32_t_to_uint8_t_fmt_2:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_2(uint8_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c
new file mode 100644
index 000..c50ae96f47d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint64_t_to_uint8_t_fmt_2:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_2(uint8_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c
new file mode 100644
index 000..61331cee6fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint64_t_to_uint16_t_fmt_2:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_2(uint16_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c
new file mode 100644
index 000..4bc9303e457
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint32_t
+
+DEF_SAT_U_TRUC_FMT_2_WRAP(T1, T2)
+
+#define DATA   TEST_UNARY_DATA_WRAP(T1, T2)
+#define T 

[PATCH v1 2/2] RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 3

2024-08-17 Thread pan2 . li
From: Pan Li 

This patch would like to add test cases for the unsigned scalar
.SAT_TRUNC form 3.  Aka:

Form 3:
  #define DEF_SAT_U_TRUC_FMT_3(NT, WT) \
  NT __attribute__((noinline)) \
  sat_u_truc_##WT##_to_##NT##_fmt_3 (WT x) \
  {\
WT max = (WT)(NT)-1;   \
return x <= max ? (NT)x : (NT) max;\
  }

DEF_SAT_U_TRUC_FMT_3 (uint32_t, uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_trunc-13.c: New test.
* gcc.target/riscv/sat_u_trunc-14.c: New test.
* gcc.target/riscv/sat_u_trunc-15.c: New test.
* gcc.target/riscv/sat_u_trunc-run-13.c: New test.
* gcc.target/riscv/sat_u_trunc-run-14.c: New test.
* gcc.target/riscv/sat_u_trunc-run-15.c: New test.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 12 +++
 .../gcc.target/riscv/sat_u_trunc-13.c | 17 
 .../gcc.target/riscv/sat_u_trunc-14.c | 20 +++
 .../gcc.target/riscv/sat_u_trunc-15.c | 19 ++
 .../gcc.target/riscv/sat_u_trunc-run-13.c | 16 +++
 .../gcc.target/riscv/sat_u_trunc-run-14.c | 16 +++
 .../gcc.target/riscv/sat_u_trunc-run-15.c | 16 +++
 7 files changed, 116 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-13.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-14.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-15.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-13.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-14.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-15.c

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 576a4926d1f..cf055410fd1 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -236,10 +236,22 @@ sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \
 }
 #define DEF_SAT_U_TRUC_FMT_2_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_2(NT, WT)
 
+#define DEF_SAT_U_TRUC_FMT_3(NT, WT) \
+NT __attribute__((noinline)) \
+sat_u_truc_##WT##_to_##NT##_fmt_3 (WT x) \
+{\
+  WT max = (WT)(NT)-1;   \
+  return x <= max ? (NT)x : (NT) max;\
+}
+#define DEF_SAT_U_TRUC_FMT_3_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_3(NT, WT)
+
 #define RUN_SAT_U_TRUC_FMT_1(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_1 (x)
 #define RUN_SAT_U_TRUC_FMT_1_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_1(NT, WT, x)
 
 #define RUN_SAT_U_TRUC_FMT_2(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_2 (x)
 #define RUN_SAT_U_TRUC_FMT_2_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_2(NT, WT, x)
 
+#define RUN_SAT_U_TRUC_FMT_3(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_3 (x)
+#define RUN_SAT_U_TRUC_FMT_3_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_3(NT, WT, x)
+
 #endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-13.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-13.c
new file mode 100644
index 000..58910793a80
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-13.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint16_t_to_uint8_t_fmt_3:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_3(uint8_t, uint16_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-14.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-14.c
new file mode 100644
index 000..236ea1d45f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-14.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint32_t_to_uint16_t_fmt_3:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_3(uint16_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-15.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-15.c
new file mode 100644
index 000.

[PATCH v1 1/2] RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 2

2024-08-17 Thread pan2 . li
From: Pan Li 

This patch would like to add test cases for the unsigned scalar
.SAT_TRUNC form 2.  Aka:

Form 2:
  #define DEF_SAT_U_TRUC_FMT_2(NT, WT) \
  NT __attribute__((noinline)) \
  sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \
  {\
WT max = (WT)(NT)-1;   \
return x > max ? (NT) max : (NT)x; \
  }

DEF_SAT_U_TRUC_FMT_2 (uint32_t, uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_trunc-7.c: New test.
* gcc.target/riscv/sat_u_trunc-8.c: New test.
* gcc.target/riscv/sat_u_trunc-9.c: New test.
* gcc.target/riscv/sat_u_trunc-run-7.c: New test.
* gcc.target/riscv/sat_u_trunc-run-8.c: New test.
* gcc.target/riscv/sat_u_trunc-run-9.c: New test.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 12 +++
 .../gcc.target/riscv/sat_u_trunc-7.c  | 17 
 .../gcc.target/riscv/sat_u_trunc-8.c  | 20 +++
 .../gcc.target/riscv/sat_u_trunc-9.c  | 19 ++
 .../gcc.target/riscv/sat_u_trunc-run-7.c  | 16 +++
 .../gcc.target/riscv/sat_u_trunc-run-8.c  | 16 +++
 .../gcc.target/riscv/sat_u_trunc-run-9.c  | 16 +++
 7 files changed, 116 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-7.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-8.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-9.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-7.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-8.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-9.c

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 37e0a60f21b..576a4926d1f 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -227,7 +227,19 @@ sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
 }
 #define DEF_SAT_U_TRUC_FMT_1_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_1(NT, WT)
 
+#define DEF_SAT_U_TRUC_FMT_2(NT, WT) \
+NT __attribute__((noinline)) \
+sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \
+{\
+  WT max = (WT)(NT)-1;   \
+  return x > max ? (NT) max : (NT)x; \
+}
+#define DEF_SAT_U_TRUC_FMT_2_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_2(NT, WT)
+
 #define RUN_SAT_U_TRUC_FMT_1(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_1 (x)
 #define RUN_SAT_U_TRUC_FMT_1_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_1(NT, WT, x)
 
+#define RUN_SAT_U_TRUC_FMT_2(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_2 (x)
+#define RUN_SAT_U_TRUC_FMT_2_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_2(NT, WT, x)
+
 #endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-7.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-7.c
new file mode 100644
index 000..95d513a15fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-7.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint16_t_to_uint8_t_fmt_2:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_2(uint8_t, uint16_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-8.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-8.c
new file mode 100644
index 000..f168912293d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-8.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint32_t_to_uint16_t_fmt_2:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_2(uint16_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-9.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-9.c
new file mode 100644
index 000..d82363d6aef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-9.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl

[PATCH v4] RISC-V: Make sure high bits of usadd operands is clean for non-Xmode [PR116278]

2024-08-16 Thread pan2 . li
From: Pan Li 

For QI/HImode of .SAT_ADD,  the operands may be sign-extended and the
high bits of Xmode may be all 1 which is not expected.  For example as
below code.

signed char b[1];
unsigned short c;
signed char *d = b;
int main() {
  b[0] = -40;
  c = ({ (unsigned short)d[0] < 0xFFF6 ? (unsigned short)d[0] : 0xFFF6; }) + 9;
  __builtin_printf("%d\n", c);
}

After expanding we have:

;; _6 = .SAT_ADD (_3, 9);
(insn 8 7 9 (set (reg:DI 143)
(high:DI (symbol_ref:DI ("d") [flags 0x86]  )))
 (nil))
(insn 9 8 10 (set (reg/f:DI 142)
(mem/f/c:DI (lo_sum:DI (reg:DI 143)
(symbol_ref:DI ("d") [flags 0x86]  )) [1 d+0 S8 
A64]))
 (nil))
(insn 10 9 11 (set (reg:HI 144 [ _3 ])
(sign_extend:HI (mem:QI (reg/f:DI 142) [0 *d.0_1+0 S1 A8]))) 
"test.c":7:10 -1
 (nil))

The convert from signed char to unsigned short will have sign_extend rtl
as above.  And finally become the lb insn as below:

lb  a1,0(a5)   // a1 is -40, aka 0xffd8
lui a0,0x1a
addia5,a1,9
sllia5,a5,0x30
srlia5,a5,0x30 // a5 is 65505
sltua1,a5,a1   // compare 65505 and 0xffd8 => TRUE

The sltu try to compare 65505 and 0xffd8 here,  but we
actually want to compare 65505 and 65496 (0xffd8).  Thus we need to
clean up the high bits to ensure this.

The below test suites are passed for this patch:
* The rv64gcv fully regression test.

PR target/116278

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_gen_zero_extend_rtx): Add new
func impl to zero extend rtx.
(riscv_expand_usadd): Leverage above func to cleanup operands
and sum.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/pr116278-run-1.c: New test.
* gcc.target/riscv/pr116278-run-2.c: New test.

PR 116278

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_gen_zero_extend_rtx): Add new
func impl to zero extend rtx.
(riscv_expand_usadd): Leverage above func to cleanup operands 0
and remove the special handing for SImode in RV64.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_add-11.c: Adjust asm check body.
* gcc.target/riscv/sat_u_add-15.c: Ditto.
* gcc.target/riscv/sat_u_add-19.c: Ditto.
* gcc.target/riscv/sat_u_add-23.c: Ditto.
* gcc.target/riscv/sat_u_add-3.c: Ditto.
* gcc.target/riscv/sat_u_add-7.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-11.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-15.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-3.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-7.c: Ditto.
* gcc.target/riscv/pr116278-run-1.c: New test.
* gcc.target/riscv/pr116278-run-2.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc | 34 ---
 .../gcc.target/riscv/pr116278-run-1.c | 20 +++
 .../gcc.target/riscv/pr116278-run-2.c | 20 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-11.c |  6 +++-
 gcc/testsuite/gcc.target/riscv/sat_u_add-15.c |  6 +++-
 gcc/testsuite/gcc.target/riscv/sat_u_add-19.c |  6 +++-
 gcc/testsuite/gcc.target/riscv/sat_u_add-23.c |  6 +++-
 gcc/testsuite/gcc.target/riscv/sat_u_add-3.c  |  6 +++-
 gcc/testsuite/gcc.target/riscv/sat_u_add-7.c  |  6 +++-
 .../gcc.target/riscv/sat_u_add_imm-11.c   |  6 +++-
 .../gcc.target/riscv/sat_u_add_imm-15.c   |  6 +++-
 .../gcc.target/riscv/sat_u_add_imm-3.c|  6 +++-
 .../gcc.target/riscv/sat_u_add_imm-7.c|  6 +++-
 13 files changed, 112 insertions(+), 22 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr116278-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr116278-run-2.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 1f60d8f9711..453a061428e 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11801,12 +11801,29 @@ riscv_get_raw_result_mode (int regno)
   return default_get_reg_raw_mode (regno);
 }
 
+/* Generate a new rtx of Xmode based on the rtx and mode in define pattern.
+   The rtx x will be zero extended to Xmode if the mode is HI/QImode,  and
+   the new zero extended Xmode rtx will be returned.
+   Or the gen_lowpart rtx of Xmode will be returned.  */
+
+static rtx
+riscv_gen_zero_extend_rtx (rtx x, machine_mode mode)
+{
+  if (mode == Xmode)
+return x;
+
+  rtx xmode_reg = gen_reg_rtx (Xmode);
+  riscv_emit_unary (ZERO_EXTEND, xmode_reg, x);
+
+  return xmode_reg;
+}
+
 /* Implements the unsigned saturation add standard name usadd for int mode.
 
z = SAT_ADD(x, y).
=>
1. sum = x + y.
-   2. sum = truncate (sum) for QI and HI only.
+   2. sum = truncate (sum) for non-Xmode.
3. lt = sum < x.
4. lt = -lt.
5. z = sum | lt.  */
@@ -11817,22 +11834,15 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y)
   machine_mode mode = GET_MODE (dest);
   rtx xmode_sum = gen_reg_rtx (Xmode);
   rtx xmode_lt = gen_reg_rtx (Xmode);
-  rtx xmode_x = gen_lowpart (Xmode, x)

[PATCH v3] RISC-V: Make sure high bits of usadd operands is clean for HI/QI [PR116278]

2024-08-12 Thread pan2 . li
From: Pan Li 

For QI/HImode of .SAT_ADD,  the operands may be sign-extended and the
high bits of Xmode may be all 1 which is not expected.  For example as
below code.

signed char b[1];
unsigned short c;
signed char *d = b;
int main() {
  b[0] = -40;
  c = ({ (unsigned short)d[0] < 0xFFF6 ? (unsigned short)d[0] : 0xFFF6; }) + 9;
  __builtin_printf("%d\n", c);
}

After expanding we have:

;; _6 = .SAT_ADD (_3, 9);
(insn 8 7 9 (set (reg:DI 143)
(high:DI (symbol_ref:DI ("d") [flags 0x86]  )))
 (nil))
(insn 9 8 10 (set (reg/f:DI 142)
(mem/f/c:DI (lo_sum:DI (reg:DI 143)
(symbol_ref:DI ("d") [flags 0x86]  )) [1 d+0 S8 
A64]))
 (nil))
(insn 10 9 11 (set (reg:HI 144 [ _3 ])
(sign_extend:HI (mem:QI (reg/f:DI 142) [0 *d.0_1+0 S1 A8]))) 
"test.c":7:10 -1
 (nil))

The convert from signed char to unsigned short will have sign_extend rtl
as above.  And finally become the lb insn as below:

lb  a1,0(a5)   // a1 is -40, aka 0xffd8
lui a0,0x1a
addia5,a1,9
sllia5,a5,0x30
srlia5,a5,0x30 // a5 is 65505
sltua1,a5,a1   // compare 65505 and 0xffd8 => TRUE

The sltu try to compare 65505 and 0xffd8 here,  but we
actually want to compare 65505 and 65496 (0xffd8).  Thus we need to
clean up the high bits to ensure this.

The below test suites are passed for this patch:
* The rv64gcv fully regression test.

PR target/116278

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_gen_zero_extend_rtx): Add new
func impl to zero extend rtx.
(riscv_expand_usadd): Leverage above func to cleanup operands
and sum.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/pr116278-run-1.c: New test.
* gcc.target/riscv/pr116278-run-2.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc | 19 +-
 .../gcc.target/riscv/pr116278-run-1.c | 20 +++
 .../gcc.target/riscv/pr116278-run-2.c | 20 +++
 3 files changed, 58 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr116278-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr116278-run-2.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index a1b09e865ea..9793166dc5b 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11801,6 +11801,23 @@ riscv_get_raw_result_mode (int regno)
   return default_get_reg_raw_mode (regno);
 }
 
+/* Generate a new rtx of Xmode based on the rtx and mode in define pattern.
+   The rtx x will be zero extended to Xmode if the mode is HI/QImode,  and
+   the new zero extended Xmode rtx will be returned.
+   Or the gen_lowpart rtx of Xmode will be returned.  */
+
+static rtx
+riscv_gen_zero_extend_rtx (rtx x, machine_mode mode)
+{
+  if (mode != HImode && mode != QImode)
+return gen_lowpart (Xmode, x);
+
+  rtx xmode_reg = gen_reg_rtx (Xmode);
+  riscv_emit_unary (ZERO_EXTEND, xmode_reg, x);
+
+  return xmode_reg;
+}
+
 /* Implements the unsigned saturation add standard name usadd for int mode.
 
z = SAT_ADD(x, y).
@@ -11817,7 +11834,7 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y)
   machine_mode mode = GET_MODE (dest);
   rtx xmode_sum = gen_reg_rtx (Xmode);
   rtx xmode_lt = gen_reg_rtx (Xmode);
-  rtx xmode_x = gen_lowpart (Xmode, x);
+  rtx xmode_x = riscv_gen_zero_extend_rtx (x, mode);
   rtx xmode_y = gen_lowpart (Xmode, y);
   rtx xmode_dest = gen_reg_rtx (Xmode);
 
diff --git a/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c 
b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c
new file mode 100644
index 000..d3812bdcdfb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c
@@ -0,0 +1,20 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-options "-O2 -fdump-rtl-expand-details" } */
+
+#include 
+
+int8_t b[1];
+int8_t *d = b;
+int32_t c;
+
+int main() {
+  b[0] = -40;
+  uint16_t t = (uint16_t)d[0];
+
+  c = (t < 0xFFF6 ? t : 0xFFF6) + 9;
+
+  if (c != 65505)
+__builtin_abort ();
+}
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c 
b/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c
new file mode 100644
index 000..669cd4f003f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c
@@ -0,0 +1,20 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-options "-O2 -fdump-rtl-expand-details" } */
+
+#include 
+
+int16_t b[1];
+int16_t *d = b;
+int64_t c;
+
+int main() {
+  b[0] = -40;
+  uint32_t t = (uint32_t)d[0];
+
+  c = (t < 0xFFF6u ? t : 0xFFF6u) + 9;
+
+  if (c != 4294967265)
+__builtin_abort ();
+}
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
-- 
2.43.0



[PATCH v2] RISC-V: Make sure high bits of usadd operands is clean for HI/QI [PR116278]

2024-08-11 Thread pan2 . li
From: Pan Li 

For QI/HImode of .SAT_ADD,  the operands may be sign-extended and the
high bits of Xmode may be all 1 which is not expected.  For example as
below code.

signed char b[1];
unsigned short c;
signed char *d = b;
int main() {
  b[0] = -40;
  c = ({ (unsigned short)d[0] < 0xFFF6 ? (unsigned short)d[0] : 0xFFF6; }) + 9;
  __builtin_printf("%d\n", c);
}

After expanding we have:

;; _6 = .SAT_ADD (_3, 9);
(insn 8 7 9 (set (reg:DI 143)
(high:DI (symbol_ref:DI ("d") [flags 0x86]  )))
 (nil))
(insn 9 8 10 (set (reg/f:DI 142)
(mem/f/c:DI (lo_sum:DI (reg:DI 143)
(symbol_ref:DI ("d") [flags 0x86]  )) [1 d+0 S8 
A64]))
 (nil))
(insn 10 9 11 (set (reg:HI 144 [ _3 ])
(sign_extend:HI (mem:QI (reg/f:DI 142) [0 *d.0_1+0 S1 A8]))) 
"test.c":7:10 -1
 (nil))

The convert from signed char to unsigned short will have sign_extend rtl
as above.  And finally become the lb insn as below:

lb  a1,0(a5)   // a1 is -40, aka 0xffd8
lui a0,0x1a
addia5,a1,9
sllia5,a5,0x30
srlia5,a5,0x30 // a5 is 65505
sltua1,a5,a1   // compare 65505 and 0xffd8 => TRUE

The sltu try to compare 65505 and 0xffd8 here,  but we
actually want to compare 65505 and 65496 (0xffd8).  Thus we need to
clean up the high bits to ensure this.

The below test suites are passed for this patch:
* The rv64gcv fully regression test.

PR target/116278

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_gen_zero_extend_rtx): Add new
func impl to zero extend rtx.
(riscv_expand_usadd): Leverage above func to cleanup operands
and sum.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/pr116278-run-1.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc | 19 ++-
 .../gcc.target/riscv/pr116278-run-1.c | 16 
 2 files changed, 34 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr116278-run-1.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 5fe4273beb7..cfdb3d82972 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11564,6 +11564,23 @@ riscv_get_raw_result_mode (int regno)
   return default_get_reg_raw_mode (regno);
 }
 
+/* Generate a new rtx of Xmode based on the rtx and mode in define pattern.
+   The rtx x will be zero extended to Xmode if the mode is HI/QImode,  and
+   the new zero extended Xmode rtx will be returned.
+   Or the gen_lowpart rtx of Xmode will be returned.  */
+
+static rtx
+riscv_gen_zero_extend_rtx (rtx x, machine_mode mode)
+{
+  if (mode != HImode && mode != QImode)
+return gen_lowpart (Xmode, x);
+
+  rtx xmode_reg = gen_reg_rtx (Xmode);
+  riscv_emit_unary (ZERO_EXTEND, xmode_reg, x);
+
+  return xmode_reg;
+}
+
 /* Implements the unsigned saturation add standard name usadd for int mode.
 
z = SAT_ADD(x, y).
@@ -11580,7 +11597,7 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y)
   machine_mode mode = GET_MODE (dest);
   rtx xmode_sum = gen_reg_rtx (Xmode);
   rtx xmode_lt = gen_reg_rtx (Xmode);
-  rtx xmode_x = gen_lowpart (Xmode, x);
+  rtx xmode_x = riscv_gen_zero_extend_rtx (x, mode);
   rtx xmode_y = gen_lowpart (Xmode, y);
   rtx xmode_dest = gen_reg_rtx (Xmode);
 
diff --git a/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c 
b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c
new file mode 100644
index 000..f6268e290ec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+signed char b[1];
+int c;
+signed char *d = b;
+
+int main() {
+  b[0] = -40;
+  c = ({
+(unsigned short)d[0] < 0xFFF6 ? (unsigned short)d[0] : 0xFFF6;
+  }) + 9;
+
+  if (c != 65505)
+__builtin_abort ();
+}
-- 
2.43.0



[PATCH v1] RISC-V: Bugfix incorrect operand for vwsll auto-vect

2024-08-10 Thread pan2 . li
From: Pan Li 

This patch would like to fix one ICE when rv64gcv_zvbb for vwsll.
Consider below example.

void vwsll_vv_test (short *restrict dst, char *restrict a,
int *restrict b, int n)
{
  for (int i = 0; i < n; i++)
dst[i] = a[i] << b[i];
}

It will hit the vwsll pattern with following operands.
operand 0 -> (reg:RVVMF2HI 146 [ vect__7.13 ])
operand 1 -> (reg:RVVMF4QI 165 [ vect_cst__33 ])
operand 2 -> (reg:RVVM1SI 171 [ vect_cst__36 ])

According to the ISA, operand 2 should be the same as operand 1.
Aka operand 2 should have RVVMF4QI mode as above.  Thus,  add
quad truncation for operand 2 before emit vwsll.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

PR target/116280

gcc/ChangeLog:

* config/riscv/autovec-opt.md: Add quad truncation to
align the mode requirement for vwsll.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr116280-1.c: New test.
* gcc.target/riscv/rvv/base/pr116280-2.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/autovec-opt.md|  4 
 .../gcc.target/riscv/rvv/base/pr116280-1.c | 14 ++
 .../gcc.target/riscv/rvv/base/pr116280-2.c | 10 ++
 3 files changed, 28 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-2.c

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index d7a3cfd4602..4b33a145c17 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1546,6 +1546,10 @@ (define_insn_and_split "*vwsll_zext1_trunc_"
   "&& 1"
   [(const_int 0)]
   {
+rtx truncated = gen_reg_rtx (mode);
+emit_insn (gen_trunc2 (truncated, operands[2]));
+operands[2] = truncated;
+
 insn_code icode = code_for_pred_vwsll (mode);
 riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
 DONE;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-1.c
new file mode 100644
index 000..8b8547e2c34
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-1.c
@@ -0,0 +1,14 @@
+/* Test there is no ICE when compile.  */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */
+
+short a;
+char b;
+
+void
+test (int e[][1][1], char f[][1][1][1][1]) {
+  for (int g; b;)
+for (;;)
+  for (int h; h < 4073709551572ULL; h += 18446744073709551612U)
+a = f[2][2][1][4073709551612][1] << e[1][1][g];
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-2.c
new file mode 100644
index 000..02f2de66eff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-2.c
@@ -0,0 +1,10 @@
+/* Test there is no ICE when compile.  */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */
+
+void
+test (short *restrict dst, char *restrict a, int *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+dst[i] = a[i] << b[i];
+}
-- 
2.43.0



[PATCH v1] RISC-V: Make sure high bits of usadd operands is clean for HI/QI [PR116278]

2024-08-08 Thread pan2 . li
From: Pan Li 

For QI/HImode of .SAT_ADD,  the operands may be sign-extended and the
high bits of Xmode may be all 1 which is not expected.  For example as
below code.

signed char b[1];
unsigned short c;
signed char *d = b;
int main() {
  b[0] = -40;
  c = ({ (unsigned short)d[0] < 0xFFF6 ? (unsigned short)d[0] : 0xFFF6; }) + 9;
  __builtin_printf("%d\n", c);
}

After expanding we have:

;; _6 = .SAT_ADD (_3, 9);
(insn 8 7 9 (set (reg:DI 143)
(high:DI (symbol_ref:DI ("d") [flags 0x86]  )))
 (nil))
(insn 9 8 10 (set (reg/f:DI 142)
(mem/f/c:DI (lo_sum:DI (reg:DI 143)
(symbol_ref:DI ("d") [flags 0x86]  )) [1 d+0 S8 
A64]))
 (nil))
(insn 10 9 11 (set (reg:HI 144 [ _3 ])
(sign_extend:HI (mem:QI (reg/f:DI 142) [0 *d.0_1+0 S1 A8]))) 
"test.c":7:10 -1
 (nil))

The convert from signed char to unsigned short will have sign_extend rtl
as above.  And finally become the lb insn as below:

lb  a1,0(a5)   // a1 is -40, aka 0xffd8
lui a0,0x1a
addia5,a1,9
sllia5,a5,0x30
srlia5,a5,0x30 // a5 is 65505
sltua1,a5,a1   // compare 65505 and 0xffd8 => TRUE

The sltu try to compare 65505 and 0xffd8 here,  but we
actually want to compare 65505 and 65496 (0xffd8).  Thus we need to
clean up the high bits to ensure this.

The below test suites are passed for this patch:
* The rv64gcv fully regression test.

PR target/116278

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_cleanup_rtx_high): Add new func
impl to cleanup high bits of rtx.
(riscv_expand_usadd): Leverage above func to cleanup operands
and sum.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_add-1.c: Adjust asm check.
* gcc.target/riscv/sat_u_add-10.c: Ditto.
* gcc.target/riscv/sat_u_add-13.c: Ditto.
* gcc.target/riscv/sat_u_add-14.c: Ditto.
* gcc.target/riscv/sat_u_add-17.c: Ditto.
* gcc.target/riscv/sat_u_add-18.c: Ditto.
* gcc.target/riscv/sat_u_add-2.c: Ditto.
* gcc.target/riscv/sat_u_add-21.c: Ditto.
* gcc.target/riscv/sat_u_add-22.c: Ditto.
* gcc.target/riscv/sat_u_add-5.c: Ditto.
* gcc.target/riscv/sat_u_add-6.c: Ditto.
* gcc.target/riscv/sat_u_add-9.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-1.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-10.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-13.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-14.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-2.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-5.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-6.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-9.c: Ditto.
* gcc.target/riscv/pr116278-run-1.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc | 30 ++-
 .../gcc.target/riscv/pr116278-run-1.c | 16 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add-1.c  |  1 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-10.c |  2 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add-13.c |  1 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-14.c |  2 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add-17.c |  1 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-18.c |  2 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add-2.c  |  2 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add-21.c |  1 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-22.c |  2 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add-5.c  |  1 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-6.c  |  2 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add-9.c  |  1 +
 .../gcc.target/riscv/sat_u_add_imm-1.c|  1 +
 .../gcc.target/riscv/sat_u_add_imm-10.c   |  2 ++
 .../gcc.target/riscv/sat_u_add_imm-13.c   |  1 +
 .../gcc.target/riscv/sat_u_add_imm-14.c   |  2 ++
 .../gcc.target/riscv/sat_u_add_imm-2.c|  2 ++
 .../gcc.target/riscv/sat_u_add_imm-5.c|  1 +
 .../gcc.target/riscv/sat_u_add_imm-6.c|  2 ++
 .../gcc.target/riscv/sat_u_add_imm-9.c|  1 +
 22 files changed, 68 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr116278-run-1.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 5fe4273beb7..fb916217e5e 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11564,6 +11564,24 @@ riscv_get_raw_result_mode (int regno)
   return default_get_reg_raw_mode (regno);
 }
 
+/* Cleanup the high bits of the RTX x and reserve the low bits.
+   The reserved bitsize comes from the bitsize of reserved_mode.  */
+
+static void
+riscv_cleanup_rtx_high (rtx x, machine_mode reserved_mode)
+{
+  machine_mode mode = GET_MODE (x);
+  int reserved_bitsize = GET_MODE_BITSIZE (reserved_mode).to_constant ();
+  int mode_bitsize = GET_MODE_BITSIZE (mode).to_constant ();
+
+  gcc_assert (mode_bitsize >= reserved_bitsize);
+
+  int shift_bitsize = mode_bitsize - reserved_bitsize;
+
+  riscv_emit_binary (ASHIFT, x, x, GEN_INT (shift_bitsize));
+  riscv_emit

[PATCH v2] Match: Support form 1 for scalar signed integer .SAT_ADD

2024-08-07 Thread pan2 . li
From: Pan Li 

This patch would like to support the form 1 of the scalar signed
integer .SAT_ADD.  Aka below example:

Form 1:
  #define DEF_SAT_S_ADD_FMT_1(T, MIN, MAX) \
  T __attribute__((noinline))  \
  sat_s_add_##T##_fmt_1 (T x, T y) \
  {\
T sum = x + y; \
return (x ^ y) < 0 \
  ? sum\
  : (sum ^ x) >= 0 \
? sum  \
: x < 0 ? MIN : MAX;   \
  }

DEF_SAT_S_ADD_FMT_1(int64_t, INT64_MIN, INT64_MAX)

We can tell the difference before and after this patch if backend
implemented the ssadd3 pattern similar as below.

Before this patch:
   4   │ __attribute__((noinline))
   5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
   6   │ {
   7   │   int64_t sum;
   8   │   long int _1;
   9   │   long int _2;
  10   │   int64_t _3;
  11   │   _Bool _8;
  12   │   long int _9;
  13   │   long int _10;
  14   │   long int _11;
  15   │   long int _12;
  16   │   long int _13;
  17   │
  18   │[local count: 1073741824]:
  19   │   sum_6 = x_4(D) + y_5(D);
  20   │   _1 = x_4(D) ^ y_5(D);
  21   │   _2 = x_4(D) ^ sum_6;
  22   │   _12 = ~_1;
  23   │   _13 = _2 & _12;
  24   │   if (_13 < 0)
  25   │ goto ; [41.00%]
  26   │   else
  27   │ goto ; [59.00%]
  28   │
  29   │[local count: 259738147]:
  30   │   _8 = x_4(D) < 0;
  31   │   _9 = (long int) _8;
  32   │   _10 = -_9;
  33   │   _11 = _10 ^ 9223372036854775807;
  34   │
  35   │[local count: 1073741824]:
  36   │   # _3 = PHI 
  37   │   return _3;
  38   │
  39   │ }

After this patch:
   4   │ __attribute__((noinline))
   5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
   6   │ {
   7   │   int64_t _4;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;pred:   ENTRY
  11   │   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
  12   │   return _4;
  13   │ ;;succ:   EXIT
  14   │
  15   │ }

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add the matching for signed .SAT_ADD.
* tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
matching func decl.
(match_unsigned_saturation_add): Try signed .SAT_ADD and rename
to ...
(match_saturation_add): ... here.
(math_opts_dom_walker::after_dom_children): Update the above renamed
func from caller.

Signed-off-by: Pan Li 
---
 gcc/match.pd  | 17 
 gcc/tree-ssa-math-opts.cc | 42 ++-
 2 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index c9c8478d286..8b8a5dbcfe3 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3311,6 +3311,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   }
   (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst))
 
+/* Signed saturation add, case 1:
+   T sum = X + Y;
+   SAT_S_ADD = (X ^ Y) < 0
+ ? sum
+ : (sum ^ x) >= 0
+   ? sum
+   : x < 0 ? MIN : MAX;  */
+(match (signed_integer_sat_add @0 @1)
+ (cond^ (lt (bit_and:c (bit_xor:c @0 (convert?@2 (plus:c (convert? @0)
+(convert? @1
+  (bit_not (bit_xor:c @0 @1)))
+   integer_zerop)
+   (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
+   @2)
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 8d96a4c964b..f39c88741a4 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, 
tree (*)(tree));
 extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
 extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
 
+extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
+
 static void
 build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
tree lhs, tree op_0, tree op_1)
@@ -4072,7 +4074,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, 
gassign *stmt)
 }
 
 /*
- * Try to match saturation unsigned add with PHI.
+ * Try to match saturation add with PHI.
+ * For unsigned integer:
  *:
  *   _1 = x_3(D) + y_4(D);
  *   if (_1 >= x_3(D))
@@ -4086,10 +4089,38 @@ match_unsigned_saturation_add (gimple_stmt_iterator 
*gsi, gassign *stmt)
  *   # _2 = PHI <255(2), _1(3)>
  *   =>
  *[local count: 1073741824]:
- *   _2 = .SAT_ADD (x_4(D), y_5(D));  */
+ *   _2 = .SAT_ADD (x_4(D), y_5(D));
+ 

[PATCH v2] Vect: Make sure the lhs type of .SAT_TRUNC has its mode precision [PR116202]

2024-08-06 Thread pan2 . li
From: Pan Li 

The .SAT_TRUNC vect pattern recog is valid when the lhs type has
its mode precision.  For example as below, QImode with 1 bit precision
like _Bool is invalid here.

g_12 = (long unsigned int) _2;
_13 = MIN_EXPR ;
_3 = (_Bool) _13;

The above pattern cannot be recog as .SAT_TRUNC (g_12) because the dest
only has 1 bit precision with QImode mode.  Aka the type doesn't have
the mode precision.

The below tests are passed for this patch.
1. The rv64gcv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.

PR target/116202

gcc/ChangeLog:

* tree-vect-patterns.cc (vect_recog_sat_trunc_pattern): Add the
type_has_mode_precision_p check for the lhs type.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr116202-run-1.c: New test.

Signed-off-by: Pan Li 
---
 .../riscv/rvv/base/pr116202-run-1.c   | 24 +++
 gcc/tree-vect-patterns.cc |  5 ++--
 2 files changed, 27 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr116202-run-1.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr116202-run-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116202-run-1.c
new file mode 100644
index 000..d150f20b5d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116202-run-1.c
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -march=rv64gcv_zvl256b -fdump-rtl-expand-details" } */
+
+int b[24];
+_Bool c[24];
+
+int main() {
+  for (int f = 0; f < 4; ++f)
+b[f] = 6;
+
+  for (int f = 0; f < 24; f += 4)
+c[f] = ({
+  int g = ({
+unsigned long g = -b[f];
+1 < g ? 1 : g;
+  });
+  g;
+});
+
+  if (c[0] != 1)
+__builtin_abort ();
+}
+
+/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 4674a16d15f..74f80587b0e 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -4695,11 +4695,12 @@ vect_recog_sat_trunc_pattern (vec_info *vinfo, 
stmt_vec_info stmt_vinfo,
 
   tree ops[1];
   tree lhs = gimple_assign_lhs (last_stmt);
+  tree otype = TREE_TYPE (lhs);
 
-  if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL))
+  if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
+  && type_has_mode_precision_p (otype))
 {
   tree itype = TREE_TYPE (ops[0]);
-  tree otype = TREE_TYPE (lhs);
   tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
   tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
   internal_fn fn = IFN_SAT_TRUNC;
-- 
2.43.0



[PATCH v1] RISC-V: Update .SAT_TRUNC dump check due to middle-end change

2024-08-05 Thread pan2 . li
From: Pan Li 

Due to recent middle-end change, update the .SAT_TRUNC expand dump
check from 2 to 4.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c: Adjust
asm check times from 2 to 4.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c
index 7f047f3f6a2..ae3e44cd57e 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c
@@ -16,4 +16,4 @@
 */
 DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint16_t)
 
-/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 4 "expand" } } */
-- 
2.43.0



[PATCH v1] Match: Support form 1 for scalar signed integer .SAT_ADD

2024-08-05 Thread pan2 . li
From: Pan Li 

This patch would like to support the form 1 of the scalar signed
integer .SAT_ADD.  Aka below example:

Form 1:
  #define DEF_SAT_S_ADD_FMT_1(T) \
  T __attribute__((noinline))\
  sat_s_add_##T##_fmt_1 (T x, T y)   \
  {  \
T min = (T)1u << (sizeof (T) * 8 - 1);   \
T max = min - 1; \
return (x ^ y) < 0   \
  ? (T)(x + y)   \
  : ((T)(x + y) ^ x) >= 0\
? (T)(x + y) \
: x < 0 ? min : max; \
  }

DEF_SAT_S_ADD_FMT_1 (int64_t)

We can tell the difference before and after this patch if backend
implemented the ssadd3 pattern similar as below.

Before this patch:
   4   │ __attribute__((noinline))
   5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
   6   │ {
   7   │   long int _1;
   8   │   long int _2;
   9   │   long int _3;
  10   │   int64_t _4;
  11   │   long int _7;
  12   │   _Bool _9;
  13   │   long int _10;
  14   │   long int _11;
  15   │   long int _12;
  16   │   long int _13;
  17   │
  18   │ ;;   basic block 2, loop depth 0
  19   │ ;;pred:   ENTRY
  20   │   _1 = x_5(D) ^ y_6(D);
  21   │   _13 = x_5(D) + y_6(D);
  22   │   _3 = x_5(D) ^ _13;
  23   │   _2 = ~_1;
  24   │   _7 = _2 & _3;
  25   │   if (_7 >= 0)
  26   │ goto ; [59.00%]
  27   │   else
  28   │ goto ; [41.00%]
  29   │ ;;succ:   4
  30   │ ;;3
  31   │
  32   │ ;;   basic block 3, loop depth 0
  33   │ ;;pred:   2
  34   │   _9 = x_5(D) < 0;
  35   │   _10 = (long int) _9;
  36   │   _11 = -_10;
  37   │   _12 = _11 ^ 9223372036854775807;
  38   │ ;;succ:   4
  39   │
  40   │ ;;   basic block 4, loop depth 0
  41   │ ;;pred:   2
  42   │ ;;3
  43   │   # _4 = PHI <_13(2), _12(3)>
  44   │   return _4;
  45   │ ;;succ:   EXIT
  46   │
  47   │ }

After this patch:
   4   │ __attribute__((noinline))
   5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
   6   │ {
   7   │   int64_t _4;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;pred:   ENTRY
  11   │   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
  12   │   return _4;
  13   │ ;;succ:   EXIT
  14   │
  15   │ }

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add the matching for signed .SAT_ADD.
* tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
matching func decl.
(match_unsigned_saturation_add): Try signed .SAT_ADD and rename
to ...
(match_saturation_add): ... here.
(math_opts_dom_walker::after_dom_children): Update the above renamed
func from caller.

Signed-off-by: Pan Li 
---
 gcc/match.pd  | 14 +
 gcc/tree-ssa-math-opts.cc | 42 ++-
 2 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index c9c8478d286..0a2ffc733d3 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3311,6 +3311,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   }
   (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst))
 
+/* Signed saturation add, case 1:
+   T min = (T)1u << (sizeof (T) * 8 - 1);
+   T max = min - 1;
+   SAT_S_ADD = (X ^ Y) < 0
+ ? (X + Y)
+ : ((T)(X + Y) ^ X) >= 0 ? (X + Y) : X < 0 ? min : max.  */
+(match (signed_integer_sat_add @0 @1)
+  (cond^ (ge (bit_and:c (bit_xor @0 (convert? @2)) (bit_not (bit_xor @0 @1)))
+   integer_zerop)
+   (convert? (plus@2 (convert1? @0) (convert1? @1)))
+   (bit_xor (negate (convert (lt @0 integer_zerop))) max_value))
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 8d96a4c964b..d5c9b475f72 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, 
tree (*)(tree));
 extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
 extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
 
+extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
+
 static void
 build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
tree lhs, tree op_0, tree op_1)
@@ -4072,7 +4074,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, 
gassign *stmt)
 }
 
 /*
- * Try to match saturation unsigned add with PHI.
+ * Try to match saturation add with PHI.
+ * For unsigned integer:
  *:
  *   _1 = x_3(D) + y_4(D);
  *   if (_1 >= x_3(D))

[PATCH v2] RISC-V: Support IMM for operand 0 of ussub pattern

2024-08-04 Thread pan2 . li
From: Pan Li 

This patch would like to allow IMM for the operand 0 of ussub pattern.
Aka .SAT_SUB(1023, y) as the below example.

Form 1:
  #define DEF_SAT_U_SUB_IMM_FMT_1(T, IMM) \
  T __attribute__((noinline)) \
  sat_u_sub_imm##IMM##_##T##_fmt_1 (T y)  \
  {   \
return (T)IMM >= y ? (T)IMM - y : 0;  \
  }

DEF_SAT_U_SUB_IMM_FMT_1(uint64_t, 1023)

Before this patch:
  10   │ sat_u_sub_imm82_uint64_t_fmt_1:
  11   │ li  a5,82
  12   │ bgtua0,a5,.L3
  13   │ sub a0,a5,a0
  14   │ ret
  15   │ .L3:
  16   │ li  a0,0
  17   │ ret

After this patch:
  10   │ sat_u_sub_imm82_uint64_t_fmt_1:
  11   │ li  a5,82
  12   │ sltua4,a5,a0
  13   │ addia4,a4,-1
  14   │ sub a0,a5,a0
  15   │ and a0,a4,a0
  16   │ ret

The below test suites are passed for this patch:
1. The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_gen_unsigned_xmode_reg): Add new
func impl to gen xmode rtx reg from operand rtx.
(riscv_expand_ussub): Gen xmode reg for operand 1.
* config/riscv/riscv.md: Allow const_int for operand 1.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macro.
* gcc.target/riscv/sat_u_sub_imm-1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-1_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-1_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-2_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-2_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-3.c: New test.
* gcc.target/riscv/sat_u_sub_imm-3_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-3_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-4.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-3.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-4.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc | 51 -
 gcc/config/riscv/riscv.md |  2 +-
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 10 
 .../gcc.target/riscv/sat_u_sub_imm-1.c| 20 +++
 .../gcc.target/riscv/sat_u_sub_imm-1_1.c  | 20 +++
 .../gcc.target/riscv/sat_u_sub_imm-1_2.c  | 20 +++
 .../gcc.target/riscv/sat_u_sub_imm-2.c| 21 +++
 .../gcc.target/riscv/sat_u_sub_imm-2_1.c  | 21 +++
 .../gcc.target/riscv/sat_u_sub_imm-2_2.c  | 22 
 .../gcc.target/riscv/sat_u_sub_imm-3.c| 20 +++
 .../gcc.target/riscv/sat_u_sub_imm-3_1.c  | 21 +++
 .../gcc.target/riscv/sat_u_sub_imm-3_2.c  | 22 
 .../gcc.target/riscv/sat_u_sub_imm-4.c| 19 +++
 .../gcc.target/riscv/sat_u_sub_imm-run-1.c| 56 +++
 .../gcc.target/riscv/sat_u_sub_imm-run-2.c| 56 +++
 .../gcc.target/riscv/sat_u_sub_imm-run-3.c| 55 ++
 .../gcc.target/riscv/sat_u_sub_imm-run-4.c| 48 
 17 files changed, 482 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-4.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index b19d56149e7..5e4e9722729 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11612,6 +11612,55 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y)
   emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
 }
 
+/* Generate a REG rtx of Xmode from the given rtx and mode.
+   The rtx x can be REG (QI/HI/SI/DI) or const_int.
+   The machine_mode mode is the original mode from define pattern.
+
+   If rtx is REG,  the gen_lowpart of Xmode will be returned.
+
+   If rtx is const_int,  a new REG rtx will be created to hold the value of
+   const_int and then returned.
+
+   According to the gccint doc, the constants generated for modes with fewer
+   bits than in HOST_WIDE_IN

[PATCH v1] Match: Add type_has_mode_precision_p check for SAT_TRUNC [PR116202]

2024-08-04 Thread pan2 . li
From: Pan Li 

The .SAT_TRUNC matching can only perform the type has its mode
precision.

g_12 = (long unsigned int) _2;
_13 = MIN_EXPR ;
_3 = (_Bool) _13;

The above pattern cannot be recog as .SAT_TRUNC (g_12) because the dest
only has 1 bit precision but QImode.  Aka the type doesn't have the mode
precision.  Thus,  add the type_has_mode_precision_p for the dest to
avoid such case.

The below tests are passed for this patch.
1. The rv64gcv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.

PR target/116202

gcc/ChangeLog:

* match.pd: Add type_has_mode_precision_p for the dest type
of the .SAT_TRUNC matching.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr116202-run-1.c: New test.

Signed-off-by: Pan Li 
---
 gcc/match.pd  |  6 +++--
 .../riscv/rvv/base/pr116202-run-1.c   | 24 +++
 2 files changed, 28 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr116202-run-1.c

diff --git a/gcc/match.pd b/gcc/match.pd
index c9c8478d286..dfa0bba3908 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3283,7 +3283,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
wide_int trunc_max = wi::mask (otype_precision, false, itype_precision);
wide_int int_cst = wi::to_wide (@1, itype_precision);
   }
-  (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst))
+  (if (type_has_mode_precision_p (type) && otype_precision < itype_precision
+   && wi::eq_p (trunc_max, int_cst))
 
 /* Unsigned saturation truncate, case 2, sizeof (WT) > sizeof (NT).
SAT_U_TRUNC = (NT)(MIN_EXPR (X, 255)).  */
@@ -3309,7 +3310,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
wide_int trunc_max = wi::mask (otype_precision, false, itype_precision);
wide_int int_cst = wi::to_wide (@1, itype_precision);
   }
-  (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst))
+  (if (type_has_mode_precision_p (type) && otype_precision < itype_precision
+   && wi::eq_p (trunc_max, int_cst))
 
 /* x >  y  &&  x != XXX_MIN  -->  x > y
x >  y  &&  x == XXX_MIN  -->  false . */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr116202-run-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116202-run-1.c
new file mode 100644
index 000..d150f20b5d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116202-run-1.c
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -march=rv64gcv_zvl256b -fdump-rtl-expand-details" } */
+
+int b[24];
+_Bool c[24];
+
+int main() {
+  for (int f = 0; f < 4; ++f)
+b[f] = 6;
+
+  for (int f = 0; f < 24; f += 4)
+c[f] = ({
+  int g = ({
+unsigned long g = -b[f];
+1 < g ? 1 : g;
+  });
+  g;
+});
+
+  if (c[0] != 1)
+__builtin_abort ();
+}
+
+/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */
-- 
2.43.0



[PATCH v1] RISC-V: Support IMM for operand 0 of ussub pattern

2024-08-03 Thread pan2 . li
From: Pan Li 

This patch would like to allow IMM for the operand 0 of ussub pattern.
Aka .SAT_SUB(1023, y) as the below example.

Form 1:
  #define DEF_SAT_U_SUB_IMM_FMT_1(T, IMM) \
  T __attribute__((noinline)) \
  sat_u_sub_imm##IMM##_##T##_fmt_1 (T y)  \
  {   \
return (T)IMM >= y ? (T)IMM - y : 0;  \
  }

DEF_SAT_U_SUB_IMM_FMT_1(uint64_t, 1023)

Before this patch:
  10   │ sat_u_sub_imm82_uint64_t_fmt_1:
  11   │ li  a5,82
  12   │ bgtua0,a5,.L3
  13   │ sub a0,a5,a0
  14   │ ret
  15   │ .L3:
  16   │ li  a0,0
  17   │ ret

After this patch:
  10   │ sat_u_sub_imm82_uint64_t_fmt_1:
  11   │ li  a5,82
  12   │ sltua4,a5,a0
  13   │ addia4,a4,-1
  14   │ sub a0,a5,a0
  15   │ and a0,a4,a0
  16   │ ret

The below test suites are passed for this patch:
1. The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_gen_unsigned_xmode_reg): Add new
func impl to gen xmode rtx reg.
(riscv_expand_ussub): Gen xmode reg for operand 1.
* config/riscv/riscv.md: Allow const_int for operand 1.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h:
* gcc.target/riscv/sat_u_sub_imm-1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-1_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-1_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-2_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-2_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-3.c: New test.
* gcc.target/riscv/sat_u_sub_imm-3_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-3_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-4.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-3.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-4.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc | 45 ++-
 gcc/config/riscv/riscv.md |  2 +-
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 10 
 .../gcc.target/riscv/sat_u_sub_imm-1.c| 20 +++
 .../gcc.target/riscv/sat_u_sub_imm-1_1.c  | 20 +++
 .../gcc.target/riscv/sat_u_sub_imm-1_2.c  | 20 +++
 .../gcc.target/riscv/sat_u_sub_imm-2.c| 21 +++
 .../gcc.target/riscv/sat_u_sub_imm-2_1.c  | 21 +++
 .../gcc.target/riscv/sat_u_sub_imm-2_2.c  | 22 
 .../gcc.target/riscv/sat_u_sub_imm-3.c| 20 +++
 .../gcc.target/riscv/sat_u_sub_imm-3_1.c  | 21 +++
 .../gcc.target/riscv/sat_u_sub_imm-3_2.c  | 22 
 .../gcc.target/riscv/sat_u_sub_imm-4.c| 19 +++
 .../gcc.target/riscv/sat_u_sub_imm-run-1.c| 56 +++
 .../gcc.target/riscv/sat_u_sub_imm-run-2.c| 56 +++
 .../gcc.target/riscv/sat_u_sub_imm-run-3.c| 55 ++
 .../gcc.target/riscv/sat_u_sub_imm-run-4.c| 48 
 17 files changed, 476 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-4.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index b19d56149e7..90d95944ba4 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11612,6 +11612,49 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y)
   emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
 }
 
+/* According to the gccint doc, the Constants generated for modes with fewer
+   bits than in HOST_WIDE_INT must be sign extended to full width.  Thus we
+   may have some problem here when expanding unsigned pattern like ussub.
+
+   There are 2 cases here.  Take .SAT_SUB (imm, y) as example.
+
+   1. Case 1:  .SAT_SUB (127, y) for QImode.
+  The imm will be (const_int 127) after expand_expr_real_1,  thus we
+  can just move the (const_int 127) to Xmode reg without any other insn.
+
+   2. Case 

[PATCH v1] RISC-V: Take Xmode instead of Pmode for ussub expanding

2024-07-29 Thread pan2 . li
From: Pan Li 

The Pmode is designed for pointer,  thus leverage the Xmode instead
for the expanding of the ussub.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_expand_ussub): Promote to Xmode
instead of Pmode.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index a490b9598b0..8ece7859945 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11620,26 +11620,26 @@ void
 riscv_expand_ussub (rtx dest, rtx x, rtx y)
 {
   machine_mode mode = GET_MODE (dest);
-  rtx pmode_x = gen_lowpart (Pmode, x);
-  rtx pmode_y = gen_lowpart (Pmode, y);
-  rtx pmode_lt = gen_reg_rtx (Pmode);
-  rtx pmode_minus = gen_reg_rtx (Pmode);
-  rtx pmode_dest = gen_reg_rtx (Pmode);
+  rtx xmode_x = gen_lowpart (Xmode, x);
+  rtx xmode_y = gen_lowpart (Xmode, y);
+  rtx xmode_lt = gen_reg_rtx (Xmode);
+  rtx xmode_minus = gen_reg_rtx (Xmode);
+  rtx xmode_dest = gen_reg_rtx (Xmode);
 
   /* Step-1: minus = x - y  */
-  riscv_emit_binary (MINUS, pmode_minus, pmode_x, pmode_y);
+  riscv_emit_binary (MINUS, xmode_minus, xmode_x, xmode_y);
 
   /* Step-2: lt = x < y  */
-  riscv_emit_binary (LTU, pmode_lt, pmode_x, pmode_y);
+  riscv_emit_binary (LTU, xmode_lt, xmode_x, xmode_y);
 
   /* Step-3: lt = lt - 1 (lt + (-1))  */
-  riscv_emit_binary (PLUS, pmode_lt, pmode_lt, CONSTM1_RTX (Pmode));
+  riscv_emit_binary (PLUS, xmode_lt, xmode_lt, CONSTM1_RTX (Xmode));
 
-  /* Step-4: pmode_dest = minus & lt  */
-  riscv_emit_binary (AND, pmode_dest, pmode_lt, pmode_minus);
+  /* Step-4: xmode_dest = minus & lt  */
+  riscv_emit_binary (AND, xmode_dest, xmode_lt, xmode_minus);
 
-  /* Step-5: dest = pmode_dest  */
-  emit_move_insn (dest, gen_lowpart (mode, pmode_dest));
+  /* Step-5: dest = xmode_dest  */
+  emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
 }
 
 /* Implement the unsigned saturation truncation for int mode.
-- 
2.34.1



[PATCH v2] Internal-fn: Handle vector bool type for type strict match mode [PR116103]

2024-07-29 Thread pan2 . li
From: Pan Li 

For some target like target=amdgcn-amdhsa,  we need to take care of
vector bool types prior to general vector mode types.  Or we may have
the asm check failure as below.

gcc.target/gcn/cond_smax_1.c scan-assembler-times \\tv_cmp_gt_i32\\tvcc, 
s[0-9]+, v[0-9]+ 80
gcc.target/gcn/cond_smin_1.c scan-assembler-times \\tv_cmp_gt_i32\\tvcc, 
s[0-9]+, v[0-9]+ 80
gcc.target/gcn/cond_umax_1.c scan-assembler-times \\tv_cmp_gt_i32\\tvcc, 
s[0-9]+, v[0-9]+ 56
gcc.target/gcn/cond_umin_1.c scan-assembler-times \\tv_cmp_gt_i32\\tvcc, 
s[0-9]+, v[0-9]+ 56
gcc.dg/tree-ssa/loop-bound-2.c scan-tree-dump-not ivopts "zero if "

The below test suites are passed for this patch.
1. The rv64gcv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.
4. The amdgcn test case as above.

gcc/ChangeLog:

* internal-fn.cc (type_strictly_matches_mode_p): Add handling
for vector bool type.

Signed-off-by: Pan Li 
---
 gcc/internal-fn.cc | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 8a2e07f2f96..966594a52ed 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4171,6 +4171,16 @@ direct_internal_fn_optab (internal_fn fn)
 static bool
 type_strictly_matches_mode_p (const_tree type)
 {
+  /* The masked vector operations have both vector data operands and vector
+ boolean operands.  The vector data operands are expected to have a vector
+ mode,  but the vector boolean operands can be an integer mode rather than
+ a vector mode,  depending on how TARGET_VECTORIZE_GET_MASK_MODE is
+ defined.  PR116103.  */
+  if (VECTOR_BOOLEAN_TYPE_P (type)
+  && SCALAR_INT_MODE_P (TYPE_MODE (type))
+  && TYPE_PRECISION (TREE_TYPE (type)) == 1)
+return true;
+
   if (VECTOR_TYPE_P (type))
 return VECTOR_MODE_P (TYPE_MODE (type));
 
-- 
2.34.1



[PATCH v1] Internal-fn: Handle vector bool type for type strict match mode [PR116103]

2024-07-29 Thread pan2 . li
From: Pan Li 

For some target like target=amdgcn-amdhsa,  we need to take care of
vector bool types prior to general vector mode types.  Or we may have
the asm check failure as below.

gcc.target/gcn/cond_smax_1.c scan-assembler-times \\tv_cmp_gt_i32\\tvcc, 
s[0-9]+, v[0-9]+ 80
gcc.target/gcn/cond_smin_1.c scan-assembler-times \\tv_cmp_gt_i32\\tvcc, 
s[0-9]+, v[0-9]+ 80
gcc.target/gcn/cond_umax_1.c scan-assembler-times \\tv_cmp_gt_i32\\tvcc, 
s[0-9]+, v[0-9]+ 56
gcc.target/gcn/cond_umin_1.c scan-assembler-times \\tv_cmp_gt_i32\\tvcc, 
s[0-9]+, v[0-9]+ 56
gcc.dg/tree-ssa/loop-bound-2.c scan-tree-dump-not ivopts "zero if "

The below test suites are passed for this patch.
1. The rv64gcv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.
4. The amdgcn test case as above.

gcc/ChangeLog:

* internal-fn.cc (type_strictly_matches_mode_p): Add handling
for vector bool type.

Signed-off-by: Pan Li 
---
 gcc/internal-fn.cc | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 8a2e07f2f96..086c8be398a 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4171,6 +4171,12 @@ direct_internal_fn_optab (internal_fn fn)
 static bool
 type_strictly_matches_mode_p (const_tree type)
 {
+  /* For target=amdgcn-amdhsa,  we need to take care of vector bool types.
+ More details see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116103.  */
+  if (VECTOR_BOOLEAN_TYPE_P (type) && SCALAR_INT_MODE_P (TYPE_MODE (type))
+&& TYPE_PRECISION (TREE_TYPE (type)) == 1)
+return true;
+
   if (VECTOR_TYPE_P (type))
 return VECTOR_MODE_P (TYPE_MODE (type));
 
-- 
2.34.1



[PATCH v1] Widening-Mul: Try .SAT_SUB for PLUS_EXPR when one op is IMM

2024-07-27 Thread pan2 . li
From: Pan Li 

After add the matching for .SAT_SUB when one op is IMM,  there
will be a new root PLUS_EXPR for the .SAT_SUB pattern.  For example,

Form 3:
  #define DEF_SAT_U_SUB_IMM_FMT_3(T, IMM) \
  T __attribute__((noinline)) \
  sat_u_sub_imm##IMM##_##T##_fmt_3 (T x)  \
  {   \
return x >= IMM ? x - IMM : 0;\
  }

DEF_SAT_U_SUB_IMM_FMT_3(uint64_t, 11)

And then we will have gimple before widening-mul as below.  Thus,  try
the .SAT_SUB for the PLUS_EXPR.

   4   │ __attribute__((noinline))
   5   │ uint64_t sat_u_sub_imm11_uint64_t_fmt_3 (uint64_t x)
   6   │ {
   7   │   long unsigned int _1;
   8   │   uint64_t _3;
   9   │
  10   │[local count: 1073741824]:
  11   │   _1 = MAX_EXPR ;
  12   │   _3 = _1 + 18446744073709551605;
  13   │   return _3;
  14   │
  15   │ }

The below test suites are passed for this patch.
1. The rv64gcv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.

gcc/ChangeLog:

* tree-ssa-math-opts.cc (math_opts_dom_walker::after_dom_children):
Try .SAT_SUB for PLUS_EXPR case.

Signed-off-by: Pan Li 
---
 gcc/tree-ssa-math-opts.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index ac86be8eb94..8d96a4c964b 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -6129,6 +6129,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
 
case PLUS_EXPR:
  match_unsigned_saturation_add (&gsi, as_a (stmt));
+ match_unsigned_saturation_sub (&gsi, as_a (stmt));
  /* fall-through  */
case MINUS_EXPR:
  if (!convert_plusminus_to_widen (&gsi, stmt, code))
-- 
2.34.1



[PATCH v1] Match: Support .SAT_SUB with IMM op for form 1-4

2024-07-26 Thread pan2 . li
From: Pan Li 

This patch would like to support .SAT_SUB when one of the op
is IMM.  Aka below 1-4 forms.

Form 1:
 #define DEF_SAT_U_SUB_IMM_FMT_1(T, IMM) \
 T __attribute__((noinline)) \
 sat_u_sub_imm##IMM##_##T##_fmt_1 (T y)  \
 {   \
   return IMM >= y ? IMM - y : 0;\
 }

Form 2:
  #define DEF_SAT_U_SUB_IMM_FMT_2(T, IMM) \
  T __attribute__((noinline)) \
  sat_u_sub_imm##IMM##_##T##_fmt_2 (T y)  \
  {   \
return IMM > y ? IMM - y : 0; \
  }

Form 3:
  #define DEF_SAT_U_SUB_IMM_FMT_3(T, IMM) \
  T __attribute__((noinline)) \
  sat_u_sub_imm##IMM##_##T##_fmt_3 (T x)  \
  {   \
return x >= IMM ? x - IMM : 0;\
  }

Form 4:
  #define DEF_SAT_U_SUB_IMM_FMT_4(T, IMM) \
  T __attribute__((noinline)) \
  sat_u_sub_imm##IMM##_##T##_fmt_4 (T x)  \
  {   \
return x > IMM ? x - IMM : 0; \
  }

Take below form 1 as example:

DEF_SAT_U_SUB_OP0_IMM_FMT_1(uint32_t, 11)

Before this patch:
   4   │ __attribute__((noinline))
   5   │ uint64_t sat_u_sub_imm11_uint64_t_fmt_1 (uint64_t y)
   6   │ {
   7   │   uint64_t _1;
   8   │   uint64_t _3;
   9   │
  10   │ ;;   basic block 2, loop depth 0
  11   │ ;;pred:   ENTRY
  12   │   if (y_2(D) <= 11)
  13   │ goto ; [50.00%]
  14   │   else
  15   │ goto ; [50.00%]
  16   │ ;;succ:   3
  17   │ ;;4
  18   │
  19   │ ;;   basic block 3, loop depth 0
  20   │ ;;pred:   2
  21   │   _3 = 11 - y_2(D);
  22   │ ;;succ:   4
  23   │
  24   │ ;;   basic block 4, loop depth 0
  25   │ ;;pred:   2
  26   │ ;;3
  27   │   # _1 = PHI <0(2), _3(3)>
  28   │   return _1;
  29   │ ;;succ:   EXIT
  30   │
  31   │ }

After this patch:
   4   │ __attribute__((noinline))
   5   │ uint64_t sat_u_sub_imm11_uint64_t_fmt_1 (uint64_t y)
   6   │ {
   7   │   uint64_t _1;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;pred:   ENTRY
  11   │   _1 = .SAT_SUB (11, y_2(D)); [tail call]
  12   │   return _1;
  13   │ ;;succ:   EXIT
  14   │
  15   │ }

The below test suites are passed for this patch:
1. The rv64gcv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.

gcc/ChangeLog:

* match.pd: Add case 9 and case 10 for .SAT_SUB when one
of the op is IMM.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 35 +++
 1 file changed, 35 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index cf359b0ec0f..b2e7d61790d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3234,6 +3234,41 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
   && types_match (type, @0, @1
 
+/* Unsigned saturation sub with op_0 imm, case 9 (branch with gt):
+   SAT_U_SUB = IMM > Y  ? (IMM - Y) : 0.
+ = IMM >= Y ? (IMM - Y) : 0.  */
+(match (unsigned_integer_sat_sub @0 @1)
+ (cond^ (le @1 INTEGER_CST@2) (minus INTEGER_CST@0 @1) integer_zerop)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+ && types_match (type, @1))
+ (with
+  {
+   unsigned precision = TYPE_PRECISION (type);
+   wide_int max = wi::mask (precision, false, precision);
+   wide_int c0 = wi::to_wide (@0);
+   wide_int c2 = wi::to_wide (@2);
+   wide_int c2_add_1 = wi::add (c2, wi::uhwi (1, precision));
+   bool equal_p = wi::eq_p (c0, c2);
+   bool less_than_1_p = !wi::eq_p (c2, max) && wi::eq_p (c2_add_1, c0);
+  }
+  (if (equal_p || less_than_1_p)
+
+/* Unsigned saturation sub with op_1 imm, case 10:
+   SAT_U_SUB = X > IMM  ? (X - IMM) : 0.
+ = X >= IMM ? (X - IMM) : 0.  */
+(match (unsigned_integer_sat_sub @0 @1)
+ (plus (max @0 INTEGER_CST@1) INTEGER_CST@2)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+ && types_match (type, @1))
+ (with
+  {
+   unsigned precision = TYPE_PRECISION (type);
+   wide_int c1 = wi::to_wide (@1);
+   wide_int c2 = wi::to_wide (@2);
+   wide_int sum = wi::add (c1, c2);
+  }
+  (if (wi::eq_p (sum, wi::uhwi (0, precision)))
+
 /* Unsigned saturation truncate, case 1, sizeof (WT) > sizeof (NT).
SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))).  */
 (match (unsigned_integer_sat_trunc @0)
-- 
2.34.1



[PATCH v1] RISC-V: Implement the quad and oct .SAT_TRUNC for scalar

2024-07-22 Thread pan2 . li
From: Pan Li 

This patch would like to implement the quad and oct .SAT_TRUNC pattern
in the riscv backend. Aka:

Form 1:
  #define DEF_SAT_U_TRUC_FMT_1(NT, WT) \
  NT __attribute__((noinline)) \
  sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
  {\
bool overflow = x > (WT)(NT)(-1);  \
return ((NT)x) | (NT)-overflow;\
  }

DEF_SAT_U_TRUC_FMT_1(uint16_t, uint64_t)

Before this patch:
   4   │ __attribute__((noinline))
   5   │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
   6   │ {
   7   │   _Bool overflow;
   8   │   short unsigned int _1;
   9   │   short unsigned int _2;
  10   │   short unsigned int _3;
  11   │   uint16_t _6;
  12   │
  13   │ ;;   basic block 2, loop depth 0
  14   │ ;;pred:   ENTRY
  15   │   overflow_5 = x_4(D) > 65535;
  16   │   _1 = (short unsigned int) x_4(D);
  17   │   _2 = (short unsigned int) overflow_5;
  18   │   _3 = -_2;
  19   │   _6 = _1 | _3;
  20   │   return _6;
  21   │ ;;succ:   EXIT
  22   │
  23   │ }

After this patch:
   3   │
   4   │ __attribute__((noinline))
   5   │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
   6   │ {
   7   │   uint16_t _6;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;pred:   ENTRY
  11   │   _6 = .SAT_TRUNC (x_4(D)); [tail call]
  12   │   return _6;
  13   │ ;;succ:   EXIT
  14   │
  15   │ }

The below tests suites are passed for this patch
1. The rv64gcv fully regression test.
2. The rv64gcv build with glibc

gcc/ChangeLog:

* config/riscv/iterators.md (ANYI_QUAD_TRUNC): New iterator for
quad truncation.
(ANYI_OCT_TRUNC): New iterator for oct truncation.
(ANYI_QUAD_TRUNCATED): New attr for truncated quad modes.
(ANYI_OCT_TRUNCATED): New attr for truncated oct modes.
(anyi_quad_truncated): Ditto but for lower case.
(anyi_oct_truncated): Ditto but for lower case.
* config/riscv/riscv.md (ustrunc2):
Add new pattern for quad truncation.
(ustrunc2): Ditto but for oct.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: Adjust
the expand dump check times.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: Ditto.
* gcc.target/riscv/sat_arith_data.h: Add test helper macros.
* gcc.target/riscv/sat_u_trunc-4.c: New test.
* gcc.target/riscv/sat_u_trunc-5.c: New test.
* gcc.target/riscv/sat_u_trunc-6.c: New test.
* gcc.target/riscv/sat_u_trunc-run-4.c: New test.
* gcc.target/riscv/sat_u_trunc-run-5.c: New test.
* gcc.target/riscv/sat_u_trunc-run-6.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/iterators.md | 20 
 gcc/config/riscv/riscv.md | 20 
 .../rvv/autovec/unop/vec_sat_u_trunc-2.c  |  2 +-
 .../rvv/autovec/unop/vec_sat_u_trunc-3.c  |  2 +-
 .../gcc.target/riscv/sat_arith_data.h | 51 +++
 .../gcc.target/riscv/sat_u_trunc-4.c  | 17 +++
 .../gcc.target/riscv/sat_u_trunc-5.c  | 17 +++
 .../gcc.target/riscv/sat_u_trunc-6.c  | 20 
 .../gcc.target/riscv/sat_u_trunc-run-4.c  | 16 ++
 .../gcc.target/riscv/sat_u_trunc-run-5.c  | 16 ++
 .../gcc.target/riscv/sat_u_trunc-run-6.c  | 16 ++
 11 files changed, 195 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 734da041f0c..bdcdb8babc8 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -67,14 +67,34 @@ (define_mode_iterator ANYI [QI HI SI (DI "TARGET_64BIT")])
 
 (define_mode_iterator ANYI_DOUBLE_TRUNC [HI SI (DI "TARGET_64BIT")])
 
+(define_mode_iterator ANYI_QUAD_TRUNC [SI (DI "TARGET_64BIT")])
+
+(define_mode_iterator ANYI_OCT_TRUNC [(DI "TARGET_64BIT")])
+
 (define_mode_attr ANYI_DOUBLE_TRUNCATED [
   (HI "QI") (SI "HI") (DI "SI")
 ])
 
+(define_mode_attr ANYI_QUAD_TRUNCATED [
+  (SI "QI") (DI "HI")
+])
+
+(define_mode_attr ANYI_OCT_TRUNCATED [
+  (DI "QI")
+])
+
 (define_mode_attr anyi_double_truncated [
   (HI "qi") (SI "hi") (DI "si")
 ])
 
+(define_mode_attr anyi_quad_truncated [
+  (SI "qi") (DI "hi")
+])
+
+(define_mode_attr anyi_oct_truncated [
+  (DI "qi")
+])
+
 ;; Iterator for hardware-supported floating-point modes.
 (define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT || TARGET_ZFINX")
(DF "TARGET_DOUBLE_FLOAT || TARGET_ZDINX")
diff --git a/gcc/config/riscv/riscv.m

[PATCH v1] RISC-V: Rearrange the test helper files for vector .SAT_*

2024-07-21 Thread pan2 . li
From: Pan Li 

Rearrange the test help header files,  as well as align the name
conventions.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_binary.h: Move to...
* gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vvv_run.h: ...here.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_scalar.h: Move to...
* gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vvx_run.h: ...here.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vx.h: Move to...
* gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vx_run.h: ...here.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: Adjust
the include file names.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-12.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-9.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-10.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-11.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-12.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-13.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-14.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-17.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-18.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-19.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-20.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-21.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-23.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-24.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-25.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-26.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-27.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-28.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-29.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-30.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-31.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-32.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-4.c: Ditto.
* gcc.target/riscv/rvv/au

[PATCH v2] Internal-fn: Only allow type matches mode for internal fn[PR115961]

2024-07-19 Thread pan2 . li
From: Pan Li 

The direct_internal_fn_supported_p has no restrictions for the type
modes.  For example the bitfield like below will be recog as .SAT_TRUNC.

struct e
{
  unsigned pre : 12;
  unsigned a : 4;
};

__attribute__((noipa))
void bug (e * v, unsigned def, unsigned use) {
  e & defE = *v;
  defE.a = min_u (use + 1, 0xf);
}

This patch would like to check strictly for the direct_internal_fn_supported_p,
and only allows the type matches mode for ifn type tree pair.

The below test suites are passed for this patch:
1. The rv64gcv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.

PR target/115961

gcc/ChangeLog:

* internal-fn.cc (type_strictly_matches_mode_p): Add new func
impl to check type strictly matches mode or not.
(type_pair_strictly_matches_mode_p): Ditto but for tree type
pair.
(direct_internal_fn_supported_p): Add above check for the tree
type pair.

gcc/testsuite/ChangeLog:

* g++.target/i386/pr115961-run-1.C: New test.
* g++.target/riscv/rvv/base/pr115961-run-1.C: New test.

Signed-off-by: Pan Li 
---
 gcc/internal-fn.cc| 32 +
 .../g++.target/i386/pr115961-run-1.C  | 34 +++
 .../riscv/rvv/base/pr115961-run-1.C   | 34 +++
 3 files changed, 100 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/i386/pr115961-run-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 95946bfd683..5c21249318e 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4164,6 +4164,35 @@ direct_internal_fn_optab (internal_fn fn)
   gcc_unreachable ();
 }
 
+/* Return true if TYPE's mode has the same format as TYPE, and if there is
+   a 1:1 correspondence between the values that the mode can store and the
+   values that the type can store.  */
+
+static bool
+type_strictly_matches_mode_p (const_tree type)
+{
+  if (VECTOR_TYPE_P (type))
+return VECTOR_MODE_P (TYPE_MODE (type));
+
+  if (INTEGRAL_TYPE_P (type))
+return type_has_mode_precision_p (type);
+
+  if (SCALAR_FLOAT_TYPE_P (type) || COMPLEX_FLOAT_TYPE_P (type))
+return true;
+
+  return false;
+}
+
+/* Return true if both the first and the second type of tree pair are
+   strictly matches their modes,  or return false.  */
+
+static bool
+type_pair_strictly_matches_mode_p (tree_pair type_pair)
+{
+  return type_strictly_matches_mode_p (type_pair.first)
+&& type_strictly_matches_mode_p (type_pair.second);
+}
+
 /* Return true if FN is supported for the types in TYPES when the
optimization type is OPT_TYPE.  The types are those associated with
the "type0" and "type1" fields of FN's direct_internal_fn_info
@@ -4173,6 +4202,9 @@ bool
 direct_internal_fn_supported_p (internal_fn fn, tree_pair types,
optimization_type opt_type)
 {
+  if (!type_pair_strictly_matches_mode_p (types))
+return false;
+
   switch (fn)
 {
 #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) \
diff --git a/gcc/testsuite/g++.target/i386/pr115961-run-1.C 
b/gcc/testsuite/g++.target/i386/pr115961-run-1.C
new file mode 100644
index 000..b8c8aef3b17
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr115961-run-1.C
@@ -0,0 +1,34 @@
+/* PR target/115961 */
+/* { dg-do run } */
+/* { dg-options "-O3 -fdump-rtl-expand-details" } */
+
+struct e
+{
+  unsigned pre : 12;
+  unsigned a : 4;
+};
+
+static unsigned min_u (unsigned a, unsigned b)
+{
+  return (b < a) ? b : a;
+}
+
+__attribute__((noipa))
+void bug (e * v, unsigned def, unsigned use) {
+  e & defE = *v;
+  defE.a = min_u (use + 1, 0xf);
+}
+
+__attribute__((noipa, optimize(0)))
+int main(void)
+{
+  e v = { 0xded, 3 };
+
+  bug(&v, 32, 33);
+
+  if (v.a != 0xf)
+__builtin_abort ();
+
+  return 0;
+}
+/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */
diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C 
b/gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C
new file mode 100644
index 000..b8c8aef3b17
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C
@@ -0,0 +1,34 @@
+/* PR target/115961 */
+/* { dg-do run } */
+/* { dg-options "-O3 -fdump-rtl-expand-details" } */
+
+struct e
+{
+  unsigned pre : 12;
+  unsigned a : 4;
+};
+
+static unsigned min_u (unsigned a, unsigned b)
+{
+  return (b < a) ? b : a;
+}
+
+__attribute__((noipa))
+void bug (e * v, unsigned def, unsigned use) {
+  e & defE = *v;
+  defE.a = min_u (use + 1, 0xf);
+}
+
+__attribute__((noipa, optimize(0)))
+int main(void)
+{
+  e v = { 0xded, 3 };
+
+  bug(&v, 32, 33);
+
+  if (v.a != 0xf)
+__builtin_abort ();
+
+  return 0;
+}
+/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */
-- 
2.34.1



[PATCH v1] Internal-fn: Only allow modes describe types for internal fn[PR115961]

2024-07-18 Thread pan2 . li
From: Pan Li 

The direct_internal_fn_supported_p has no restrictions for the type
modes.  For example the bitfield like below will be recog as .SAT_TRUNC.

struct e
{
  unsigned pre : 12;
  unsigned a : 4;
};

__attribute__((noipa))
void bug (e * v, unsigned def, unsigned use) {
  e & defE = *v;
  defE.a = min_u (use + 1, 0xf);
}

This patch would like to add checks for the direct_internal_fn_supported_p,
and only allows the tree types describled by modes.

The below test suites are passed for this patch:
1. The rv64gcv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.

PR target/115961

gcc/ChangeLog:

* internal-fn.cc (mode_describle_type_precision_p): Add new func
impl to check if mode describle the tree type.
(direct_internal_fn_supported_p): Add above check for the first
and second tree type of tree pair.

gcc/testsuite/ChangeLog:

* g++.target/i386/pr115961-run-1.C: New test.
* g++.target/riscv/rvv/base/pr115961-run-1.C: New test.

Signed-off-by: Pan Li 
---
 gcc/internal-fn.cc| 21 
 .../g++.target/i386/pr115961-run-1.C  | 34 +++
 .../riscv/rvv/base/pr115961-run-1.C   | 34 +++
 3 files changed, 89 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/i386/pr115961-run-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 95946bfd683..4dc69264a24 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4164,6 +4164,23 @@ direct_internal_fn_optab (internal_fn fn)
   gcc_unreachable ();
 }
 
+/* Return true if the mode describes the precision of tree type,  or false.  */
+
+static bool
+mode_describle_type_precision_p (const_tree type)
+{
+  if (VECTOR_TYPE_P (type))
+return VECTOR_MODE_P (TYPE_MODE (type));
+
+  if (INTEGRAL_TYPE_P (type))
+return type_has_mode_precision_p (type);
+
+  if (SCALAR_FLOAT_TYPE_P (type) || COMPLEX_FLOAT_TYPE_P (type))
+return true;
+
+  return false;
+}
+
 /* Return true if FN is supported for the types in TYPES when the
optimization type is OPT_TYPE.  The types are those associated with
the "type0" and "type1" fields of FN's direct_internal_fn_info
@@ -4173,6 +4190,10 @@ bool
 direct_internal_fn_supported_p (internal_fn fn, tree_pair types,
optimization_type opt_type)
 {
+  if (!mode_describle_type_precision_p (types.first)
+|| !mode_describle_type_precision_p (types.second))
+return false;
+
   switch (fn)
 {
 #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) \
diff --git a/gcc/testsuite/g++.target/i386/pr115961-run-1.C 
b/gcc/testsuite/g++.target/i386/pr115961-run-1.C
new file mode 100644
index 000..b8c8aef3b17
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr115961-run-1.C
@@ -0,0 +1,34 @@
+/* PR target/115961 */
+/* { dg-do run } */
+/* { dg-options "-O3 -fdump-rtl-expand-details" } */
+
+struct e
+{
+  unsigned pre : 12;
+  unsigned a : 4;
+};
+
+static unsigned min_u (unsigned a, unsigned b)
+{
+  return (b < a) ? b : a;
+}
+
+__attribute__((noipa))
+void bug (e * v, unsigned def, unsigned use) {
+  e & defE = *v;
+  defE.a = min_u (use + 1, 0xf);
+}
+
+__attribute__((noipa, optimize(0)))
+int main(void)
+{
+  e v = { 0xded, 3 };
+
+  bug(&v, 32, 33);
+
+  if (v.a != 0xf)
+__builtin_abort ();
+
+  return 0;
+}
+/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */
diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C 
b/gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C
new file mode 100644
index 000..b8c8aef3b17
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C
@@ -0,0 +1,34 @@
+/* PR target/115961 */
+/* { dg-do run } */
+/* { dg-options "-O3 -fdump-rtl-expand-details" } */
+
+struct e
+{
+  unsigned pre : 12;
+  unsigned a : 4;
+};
+
+static unsigned min_u (unsigned a, unsigned b)
+{
+  return (b < a) ? b : a;
+}
+
+__attribute__((noipa))
+void bug (e * v, unsigned def, unsigned use) {
+  e & defE = *v;
+  defE.a = min_u (use + 1, 0xf);
+}
+
+__attribute__((noipa, optimize(0)))
+int main(void)
+{
+  e v = { 0xded, 3 };
+
+  bug(&v, 32, 33);
+
+  if (v.a != 0xf)
+__builtin_abort ();
+
+  return 0;
+}
+/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */
-- 
2.34.1



  1   2   3   4   >