[PATCH v1] Widening-Mul: Fix one ICE when iterate on phi node
From: Pan Li We iterate all phi node of bb to try to match the SAT_* pattern for scalar integer. We also remove the phi mode when the relevant pattern matched. Unfortunately the iterator may have no idea the phi node is removed and continue leverage the free data and then ICE similar as below. [0] psi ptr 0x75216340c000 [0] psi ptr 0x75216340c400 [1] psi ptr 0xa5a5a5a5a5a5a5a5 <=== GC freed pointer. during GIMPLE pass: widening_mul tmp.c: In function ‘f’: tmp.c:45:6: internal compiler error: Segmentation fault 45 | void f(int rows, int cols) { | ^ 0x36e2788 internal_error(char const*, ...) ../../gcc/diagnostic-global-context.cc:517 0x18005f0 crash_signal ../../gcc/toplev.cc:321 0x752163c4531f ??? ./signal/../sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c:0 0x103ae0e bool is_a_helper::test(gimple*) ../../gcc/gimple.h:1256 0x103f9a5 bool is_a(gimple*) ../../gcc/is-a.h:232 0x103dc78 gphi* as_a(gimple*) ../../gcc/is-a.h:255 0x104f12e gphi_iterator::phi() const ../../gcc/gimple-iterator.h:47 0x1a57bef after_dom_children ../../gcc/tree-ssa-math-opts.cc:6140 0x3344482 dom_walker::walk(basic_block_def*) ../../gcc/domwalk.cc:354 0x1a58601 execute ../../gcc/tree-ssa-math-opts.cc:6312 This patch would like to fix the iterate on modified collection problem by backup the next phi in advance. The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. PR middle-end/116861 gcc/ChangeLog: * tree-ssa-math-opts.cc (math_opts_dom_walker::after_dom_children): Backup the next psi iterator before remove the phi node. gcc/testsuite/ChangeLog: * gcc.dg/torture/pr116861-1.c: New test. Signed-off-by: Pan Li --- gcc/testsuite/gcc.dg/torture/pr116861-1.c | 76 +++ gcc/tree-ssa-math-opts.cc | 9 ++- 2 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/torture/pr116861-1.c diff --git a/gcc/testsuite/gcc.dg/torture/pr116861-1.c b/gcc/testsuite/gcc.dg/torture/pr116861-1.c new file mode 100644 index 000..7dcfe664d89 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr116861-1.c @@ -0,0 +1,76 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +void pm_message(void); +struct CmdlineInfo { + _Bool wantCrop[4]; + unsigned int margin; +}; +typedef struct { + unsigned int removeSize; +} CropOp; +typedef struct { + CropOp op[4]; +} CropSet; +static void divideAllBackgroundIntoBorders(unsigned int const totalSz, + _Bool const wantCropSideA, + _Bool const wantCropSideB, + unsigned int const wantMargin, + unsigned int *const sideASzP, + unsigned int *const sideBSzP) { + unsigned int sideASz, sideBSz; + if (wantCropSideA && wantCropSideB) + { +sideASz = totalSz / 2; +if (wantMargin) + sideBSz = totalSz - sideASz; + } + else if (wantCropSideB) + { +sideBSz = 0; + } + *sideASzP = sideASz; + *sideBSzP = sideBSz; +} +static CropOp oneSideCrop(_Bool const wantCrop, unsigned int const borderSz, + unsigned int const margin) { + CropOp retval; + if (wantCrop) + { +if (borderSz >= margin) + retval.removeSize = borderSz - margin; +else + retval.removeSize = 0; + } + return retval; +} +struct CmdlineInfo cmdline1; +void f(int rows, int cols) { + struct CmdlineInfo cmdline0 = cmdline1; + CropSet crop; + struct CmdlineInfo cmdline = cmdline0; + CropSet retval; + unsigned int leftBorderSz, rghtBorderSz; + unsigned int topBorderSz, botBorderSz; + divideAllBackgroundIntoBorders(cols, cmdline.wantCrop[0], + cmdline.wantCrop[1], cmdline.margin > 0, + &leftBorderSz, &rghtBorderSz); + divideAllBackgroundIntoBorders(rows, cmdline.wantCrop[2], + cmdline.wantCrop[3], cmdline.margin > 0, + &topBorderSz, &botBorderSz); + retval.op[0] = + oneSideCrop(cmdline.wantCrop[0], leftBorderSz, cmdline.margin); + retval.op[1] = + oneSideCrop(cmdline.wantCrop[1], rghtBorderSz, cmdline.margin); + retval.op[2] = + oneSideCrop(cmdline.wantCrop[2], topBorderSz, cmdline.margin); + retval.op[3] = + oneSideCrop(cmdline.wantCrop[3], botBorderSz, cmdline.margin); + crop = retval; + unsigned int i = 0; + for (i = 0; i < 4; ++i) + { +if (crop.op[i].removeSize == 0) + pm_message(); + } +} diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index 8c622514dbd..f1cfe7dfab0 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -6129,10 +6129,15 @@ math_opts_dom_walker::after_dom_children (basic_block bb) fma_d
[PATCH v1 1/2] Match: Support form 2 for scalar signed integer SAT_SUB
From: Pan Li This patch would like to support the form 2 of the scalar signed integer SAT_SUB. Aka below example: Form 2: #define DEF_SAT_S_SUB_FMT_2(T, UT, MIN, MAX) \ T __attribute__((noinline)) \ sat_s_sub_##T##_fmt_1 (T x, T y) \ {\ T minus = (UT)x - (UT)y; \ if ((x ^ y) >= 0 || (minus ^ x) >= 0) \ return minus;\ return x < 0 ? MIN : MAX; \ } DEF_SAT_S_SUB_FMT_2(int8_t, uint8_t, INT8_MIN, INT8_MAX) Before this patch: 4 │ __attribute__((noinline)) 5 │ int8_t sat_s_sub_int8_t_fmt_2 (int8_t x, int8_t y) 6 │ { 7 │ int8_t minus; 8 │ unsigned char x.0_1; 9 │ unsigned char y.1_2; 10 │ unsigned char _3; 11 │ signed char _4; 12 │ signed char _5; 13 │ int8_t _6; 14 │ _Bool _11; 15 │ signed char _12; 16 │ signed char _13; 17 │ signed char _14; 18 │ signed char _15; 19 │ 20 │ ;; basic block 2, loop depth 0 21 │ ;;pred: ENTRY 22 │ x.0_1 = (unsigned char) x_7(D); 23 │ y.1_2 = (unsigned char) y_8(D); 24 │ _3 = x.0_1 - y.1_2; 25 │ minus_9 = (int8_t) _3; 26 │ _4 = x_7(D) ^ y_8(D); 27 │ _5 = x_7(D) ^ minus_9; 28 │ _15 = _4 & _5; 29 │ if (_15 >= 0) 30 │ goto ; [42.57%] 31 │ else 32 │ goto ; [57.43%] 33 │ ;;succ: 4 34 │ ;;3 35 │ 36 │ ;; basic block 3, loop depth 0 37 │ ;;pred: 2 38 │ _11 = x_7(D) < 0; 39 │ _12 = (signed char) _11; 40 │ _13 = -_12; 41 │ _14 = _13 ^ 127; 42 │ ;;succ: 4 43 │ 44 │ ;; basic block 4, loop depth 0 45 │ ;;pred: 2 46 │ ;;3 47 │ # _6 = PHI 48 │ return _6; 49 │ ;;succ: EXIT 50 │ 51 │ } After this patch: 4 │ __attribute__((noinline)) 5 │ int8_t sat_s_sub_int8_t_fmt_2 (int8_t x, int8_t y) 6 │ { 7 │ int8_t _6; 8 │ 9 │ ;; basic block 2, loop depth 0 10 │ ;;pred: ENTRY 11 │ _6 = .SAT_SUB (x_7(D), y_8(D)); [tail call] 12 │ return _6; 13 │ ;;succ: EXIT 14 │ 15 │ } The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add case 2 matching pattern for signed SAT_SUB. Signed-off-by: Pan Li --- gcc/match.pd | 14 ++ 1 file changed, 14 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index 63f7f3142c4..3baf209350b 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3372,6 +3372,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) @2) (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type +/* Signed saturation sub, case 2: + T minus = (T)((UT)X - (UT)Y); + SAT_S_SUB = (X ^ Y) & (X ^ minus) < 0 ? (-(T)(X < 0) ^ MAX) : minus; + + The T and UT are type pair like T=int8_t, UT=uint8_t. */ +(match (signed_integer_sat_sub @0 @1) + (cond^ (ge (bit_and:c (bit_xor:c @0 @1) + (bit_xor @0 (nop_convert@2 (minus (nop_convert @0) +(nop_convert @1) + integer_zerop) + @2 + (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)) + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type + /* Unsigned saturation truncate, case 1, sizeof (WT) > sizeof (NT). SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))). */ (match (unsigned_integer_sat_trunc @0) -- 2.43.0
[PATCH v1 2/2] RISC-V: Add testcases for form 2 of scalar signed SAT_SUB
From: Pan Li Form 2: #define DEF_SAT_S_SUB_FMT_2(T, UT, MIN, MAX) \ T __attribute__((noinline)) \ sat_s_sub_##T##_fmt_1 (T x, T y) \ {\ T minus = (UT)x - (UT)y; \ if ((x ^ y) >= 0 || (minus ^ x) >= 0) \ return minus;\ return x < 0 ? MIN : MAX; \ } DEF_SAT_S_SUB_FMT_2(int8_t, uint8_t, INT8_MIN, INT8_MAX) The below test are passed for this patch. * The rv64gcv fully regression test. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_s_sub-2-i16.c: New test. * gcc.target/riscv/sat_s_sub-2-i32.c: New test. * gcc.target/riscv/sat_s_sub-2-i64.c: New test. * gcc.target/riscv/sat_s_sub-2-i8.c: New test. * gcc.target/riscv/sat_s_sub-run-2-i16.c: New test. * gcc.target/riscv/sat_s_sub-run-2-i32.c: New test. * gcc.target/riscv/sat_s_sub-run-2-i64.c: New test. * gcc.target/riscv/sat_s_sub-run-2-i8.c: New test. Signed-off-by: Pan Li --- gcc/testsuite/gcc.target/riscv/sat_arith.h| 15 ++ .../gcc.target/riscv/sat_s_sub-2-i16.c| 30 +++ .../gcc.target/riscv/sat_s_sub-2-i32.c| 28 + .../gcc.target/riscv/sat_s_sub-2-i64.c| 27 + .../gcc.target/riscv/sat_s_sub-2-i8.c | 28 + .../gcc.target/riscv/sat_s_sub-run-2-i16.c| 16 ++ .../gcc.target/riscv/sat_s_sub-run-2-i32.c| 16 ++ .../gcc.target/riscv/sat_s_sub-run-2-i64.c| 16 ++ .../gcc.target/riscv/sat_s_sub-run-2-i8.c | 16 ++ 9 files changed, 192 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-2-i16.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-2-i32.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-2-i64.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-2-i8.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-run-2-i16.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-run-2-i32.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-run-2-i64.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-run-2-i8.c diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index 587f3f8348c..66d393399a2 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -367,9 +367,24 @@ sat_s_sub_##T##_fmt_1 (T x, T y) \ #define DEF_SAT_S_SUB_FMT_1_WRAP(T, UT, MIN, MAX) \ DEF_SAT_S_SUB_FMT_1(T, UT, MIN, MAX) +#define DEF_SAT_S_SUB_FMT_2(T, UT, MIN, MAX) \ +T __attribute__((noinline)) \ +sat_s_sub_##T##_fmt_2 (T x, T y) \ +{\ + T minus = (UT)x - (UT)y; \ + if ((x ^ y) >= 0 || (minus ^ x) >= 0) \ +return minus;\ + return x < 0 ? MIN : MAX; \ +} +#define DEF_SAT_S_SUB_FMT_2_WRAP(T, UT, MIN, MAX) \ + DEF_SAT_S_SUB_FMT_2(T, UT, MIN, MAX) + #define RUN_SAT_S_SUB_FMT_1(T, x, y) sat_s_sub_##T##_fmt_1(x, y) #define RUN_SAT_S_SUB_FMT_1_WRAP(T, x, y) RUN_SAT_S_SUB_FMT_1(T, x, y) +#define RUN_SAT_S_SUB_FMT_2(T, x, y) sat_s_sub_##T##_fmt_2(x, y) +#define RUN_SAT_S_SUB_FMT_2_WRAP(T, x, y) RUN_SAT_S_SUB_FMT_2(T, x, y) + /**/ /* Saturation Truncate (unsigned and signed) */ /**/ diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_sub-2-i16.c b/gcc/testsuite/gcc.target/riscv/sat_s_sub-2-i16.c new file mode 100644 index 000..6aac2c71ba4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_s_sub-2-i16.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_s_sub_int16_t_fmt_2: +** sub\s+[atx][0-9]+,\s*a0,\s*a1 +** xor\s+[atx][0-9]+,\s*a0,\s*a1 +** xor\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+ +** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*15 +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1 +** srai\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63 +** li\s+[atx][0-9]+,\s*32768 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** xor\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** neg\s+[atx][0-9]+,\s*[atx][0-9]+ +** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** or\s+a0,\s*[atx][0-9]+,\
[PATCH v1] RISC-V: Cleanup debug code for SAT_* testcases [NFC]
From: Pan Li Some print code for debugging is committed by mistake, remove them from the test header file. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/scalar_sat_binary_run_xxx.h: Remove printf code for debugging. Signed-off-by: Pan Li --- gcc/testsuite/gcc.target/riscv/scalar_sat_binary_run_xxx.h | 5 - 1 file changed, 5 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/scalar_sat_binary_run_xxx.h b/gcc/testsuite/gcc.target/riscv/scalar_sat_binary_run_xxx.h index 7578453e944..a7e0d988626 100644 --- a/gcc/testsuite/gcc.target/riscv/scalar_sat_binary_run_xxx.h +++ b/gcc/testsuite/gcc.target/riscv/scalar_sat_binary_run_xxx.h @@ -1,8 +1,6 @@ #ifndef HAVE_DEFINED_SCALAR_SAT_BINARY_RUN_XXX #define HAVE_DEFINED_SCALAR_SAT_BINARY_RUN_XXX -#include - int main () { @@ -14,10 +12,7 @@ main () d = DATA[i]; if (RUN_BINARY (d.a, d.b) != d.expect) - { - printf ("%d + %d = %d, but %d\n", d.a, d.b, d.expect, RUN_BINARY (d.a, d.b)); __builtin_abort (); - } } return 0; -- 2.43.0
[PATCH v1 2/3] RISC-V: Refine the testcase of vector SAT_SUB
From: Pan Li Take scan-assembler-times for vsadd insn check instead of function body, as we only care about if we can generate the fixed point insn vssub. The below test are passed for this patch. * The rv64gcv fully regression test. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-1.c: Remove func body check and take scan asm times instead. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-10.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-11.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-12.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-13.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-16.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-17.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-18.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-19.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-20.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-21.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-23.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-24.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-25.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-26.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-27.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-28.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-29.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-30.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-31.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-32.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-33.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-34.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-35.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-36.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-37.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-38.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-39.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-4.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-40.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-5.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-6.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-7.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-8.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-9.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_zip.c: Ditto. Signed-off-by: Pan Li --- .../riscv/rvv/autovec/binop/vec_sat_u_sub-1.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-10.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-11.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-12.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-13.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-14.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-15.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-16.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-17.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-18.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-19.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-2.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-20.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-21.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-22.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-23.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-24.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-25.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-26.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-27.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_
[PATCH v1 3/3] RISC-V: Refine the testcase of vector SAT_TRUNC
From: Pan Li Take scan-assembler-times for vsadd insn check instead of function body, as we only care about if we can generate the fixed point insn vnclip. The below test are passed for this patch. * The rv64gcv fully regression test. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c: Remove func body check and take scan asm times instead. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-11.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-12.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-14.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-15.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-16.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-17.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-18.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-20.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-21.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-22.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-23.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-24.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-7.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-8.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-9.c: Ditto. Signed-off-by: Pan Li --- .../rvv/autovec/unop/vec_sat_u_trunc-1.c | 13 ++-- .../rvv/autovec/unop/vec_sat_u_trunc-10.c | 13 ++-- .../rvv/autovec/unop/vec_sat_u_trunc-11.c | 16 +- .../rvv/autovec/unop/vec_sat_u_trunc-12.c | 12 +-- .../rvv/autovec/unop/vec_sat_u_trunc-13.c | 13 ++-- .../rvv/autovec/unop/vec_sat_u_trunc-14.c | 17 ++- .../rvv/autovec/unop/vec_sat_u_trunc-15.c | 21 ++- .../rvv/autovec/unop/vec_sat_u_trunc-16.c | 13 ++-- .../rvv/autovec/unop/vec_sat_u_trunc-17.c | 17 ++- .../rvv/autovec/unop/vec_sat_u_trunc-18.c | 13 ++-- .../rvv/autovec/unop/vec_sat_u_trunc-19.c | 13 ++-- .../rvv/autovec/unop/vec_sat_u_trunc-2.c | 17 ++- .../rvv/autovec/unop/vec_sat_u_trunc-20.c | 17 ++- .../rvv/autovec/unop/vec_sat_u_trunc-21.c | 21 ++- .../rvv/autovec/unop/vec_sat_u_trunc-22.c | 13 ++-- .../rvv/autovec/unop/vec_sat_u_trunc-23.c | 17 ++- .../rvv/autovec/unop/vec_sat_u_trunc-24.c | 13 ++-- .../rvv/autovec/unop/vec_sat_u_trunc-3.c | 21 ++- .../rvv/autovec/unop/vec_sat_u_trunc-4.c | 13 ++-- .../rvv/autovec/unop/vec_sat_u_trunc-5.c | 17 ++- .../rvv/autovec/unop/vec_sat_u_trunc-6.c | 13 ++-- .../rvv/autovec/unop/vec_sat_u_trunc-7.c | 13 ++-- .../rvv/autovec/unop/vec_sat_u_trunc-8.c | 17 ++- .../rvv/autovec/unop/vec_sat_u_trunc-9.c | 21 ++- 24 files changed, 46 insertions(+), 328 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c index 186005733ec..3d29d26abff 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c @@ -1,18 +1,9 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ -/* { dg-skip-if "" { *-*-* } { "-flto" } } */ -/* { dg-final { check-function-bodies "**" "" } } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details" } */ #include "../vec_sat_arith.h" -/* -** vec_sat_u_trunc_uint8_t_uint16_t_fmt_1: -** ... -** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*mf2,\s*ta,\s*ma -** ... -** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 -** ... -*/ DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint16_t) /* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 4 "expand" } } */ +/* { dg-final { scan-assembler-times {vnclipu\.wi} 1 } } */ diff --git a/gcc/te
[PATCH v1 1/3] RISC-V: Refine the testcase of vector SAT_ADD
From: Pan Li Take scan-assembler-times for vsadd insn check instead of function body, as we only care about if we can generate the fixed point insn vsadd. The below test are passed for this patch. * The rv64gcv fully regression test. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-1.c: Remove func body check and take scan asm times instead. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-4.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-12.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-9.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-1.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-16.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-4.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-6.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-7.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-8.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-9.c: Ditto. Signed-off-by: Pan Li --- .../riscv/rvv/autovec/binop/vec_sat_s_add-1.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_s_add-2.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_s_add-3.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_s_add-4.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_add-1.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_add-10.c | 5 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_add-11.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_add-12.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_add-13.c | 12 +--- .../riscv/rvv/autovec/binop/vec_sat_u_add-14.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_add-15.c
[PATCH v1 3/3] RISC-V: Add testcases for form 1 of scalar signed SAT_SUB
From: Pan Li Form 1: #define DEF_SAT_S_SUB_FMT_1(T, UT, MIN, MAX) \ T __attribute__((noinline)) \ sat_s_sub_##T##_fmt_1 (T x, T y) \ {\ T minus = (UT)x - (UT)y; \ return (x ^ y) >= 0\ ? minus \ : (minus ^ x) >= 0 \ ? minus\ : x < 0 ? MIN : MAX; \ } DEF_SAT_S_SUB_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX) The below test are passed for this patch. * The rv64gcv fully regression test. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_arith_data.h: Add test data for SAT_SUB. * gcc.target/riscv/sat_s_sub-1-i16.c: New test. * gcc.target/riscv/sat_s_sub-1-i32.c: New test. * gcc.target/riscv/sat_s_sub-1-i64.c: New test. * gcc.target/riscv/sat_s_sub-1-i8.c: New test. * gcc.target/riscv/sat_s_sub-run-1-i16.c: New test. * gcc.target/riscv/sat_s_sub-run-1-i32.c: New test. * gcc.target/riscv/sat_s_sub-run-1-i64.c: New test. * gcc.target/riscv/sat_s_sub-run-1-i8.c: New test. Signed-off-by: Pan Li --- gcc/testsuite/gcc.target/riscv/sat_arith.h| 17 + .../gcc.target/riscv/sat_arith_data.h | 73 +++ .../gcc.target/riscv/sat_s_sub-1-i16.c| 30 .../gcc.target/riscv/sat_s_sub-1-i32.c| 28 +++ .../gcc.target/riscv/sat_s_sub-1-i64.c| 27 +++ .../gcc.target/riscv/sat_s_sub-1-i8.c | 28 +++ .../gcc.target/riscv/sat_s_sub-run-1-i16.c| 16 .../gcc.target/riscv/sat_s_sub-run-1-i32.c| 16 .../gcc.target/riscv/sat_s_sub-run-1-i64.c| 16 .../gcc.target/riscv/sat_s_sub-run-1-i8.c | 16 10 files changed, 267 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-1-i16.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-1-i32.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-1-i64.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-1-i8.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-run-1-i16.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-run-1-i32.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-run-1-i64.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_sub-run-1-i8.c diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index a2617b6db70..587f3f8348c 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -353,6 +353,23 @@ sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_4 (T x)\ return x > IMM ? x - IMM : 0; \ } +#define DEF_SAT_S_SUB_FMT_1(T, UT, MIN, MAX) \ +T __attribute__((noinline)) \ +sat_s_sub_##T##_fmt_1 (T x, T y) \ +{\ + T minus = (UT)x - (UT)y; \ + return (x ^ y) >= 0\ +? minus \ +: (minus ^ x) >= 0 \ + ? minus\ + : x < 0 ? MIN : MAX; \ +} +#define DEF_SAT_S_SUB_FMT_1_WRAP(T, UT, MIN, MAX) \ + DEF_SAT_S_SUB_FMT_1(T, UT, MIN, MAX) + +#define RUN_SAT_S_SUB_FMT_1(T, x, y) sat_s_sub_##T##_fmt_1(x, y) +#define RUN_SAT_S_SUB_FMT_1_WRAP(T, x, y) RUN_SAT_S_SUB_FMT_1(T, x, y) + /**/ /* Saturation Truncate (unsigned and signed) */ /**/ diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h index 75037c5d806..39a1e17cd3d 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h @@ -37,6 +37,11 @@ TEST_BINARY_STRUCT (int16_t, ssadd) TEST_BINARY_STRUCT (int32_t, ssadd) TEST_BINARY_STRUCT (int64_t, ssadd) +TEST_BINARY_STRUCT (int8_t, sssub) +TEST_BINARY_STRUCT (int16_t, sssub) +TEST_BINARY_STRUCT (int32_t, sssub) +TEST_BINARY_STRUCT (int64_t, sssub) + TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \ TEST_UNARY_DATA(uint8_t, uint16_t)[] = { @@ -189,4 +194,72 @@ TEST_BINARY_STRUCT_DECL(int64_t, ssadd) TEST_BINARY_DATA(int64_t, ssadd)[] = { -9223372036854775803ll, 9223372036854775805ll, 2}, }; +TEST_BINARY_STRUCT_DECL(int8_t, sssub) TEST_BINARY_DATA(int8_t, sssub)[] = +{ + { 0,0,0}, + { 2,4, -2}, + { 126, -1, 127}, + { 127, -1, 127}, + { 127, -127, 127}, + { -7, -4, -3}, +
[PATCH v1 2/3] RISC-V: Implement scalar SAT_SUB for signed integer
From: Pan Li This patch would like to implement the sssub form 1. Aka: Form 1: #define DEF_SAT_S_SUB_FMT_1(T, UT, MIN, MAX) \ T __attribute__((noinline)) \ sat_s_sub_##T##_fmt_1 (T x, T y) \ {\ T minus = (UT)x - (UT)y; \ return (x ^ y) >= 0\ ? minus \ : (minus ^ x) >= 0 \ ? minus\ : x < 0 ? MIN : MAX; \ } DEF_SAT_S_SUB_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX) Before this patch: 10 │ sat_s_sub_int8_t_fmt_1: 11 │ subwa5,a0,a1 12 │ slliw a5,a5,24 13 │ sraiw a5,a5,24 14 │ xor a1,a0,a1 15 │ xor a4,a0,a5 16 │ and a1,a1,a4 17 │ blt a1,zero,.L4 18 │ mv a0,a5 19 │ ret 20 │ .L4: 21 │ sraia0,a0,63 22 │ xoria5,a0,127 23 │ mv a0,a5 24 │ ret After this patch: 10 │ sat_s_sub_int8_t_fmt_1: 11 │ sub a4,a0,a1 12 │ xor a5,a0,a4 13 │ xor a1,a0,a1 14 │ and a5,a5,a1 15 │ srlia5,a5,7 16 │ andia5,a5,1 17 │ sraia0,a0,63 18 │ xoria3,a0,127 19 │ neg a0,a5 20 │ addia5,a5,-1 21 │ and a3,a3,a0 22 │ and a0,a4,a5 23 │ or a0,a0,a3 24 │ slliw a0,a0,24 25 │ sraiw a0,a0,24 26 │ ret The below test suites are passed for this patch. * The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/riscv-protos.h (riscv_expand_sssub): Add new func decl for expanding signed SAT_SUB. * config/riscv/riscv.cc (riscv_expand_sssub): Add new func impl for expanding signed SAT_SUB. * config/riscv/riscv.md (sssub3): Add new pattern sssub for scalar signed integer. Signed-off-by: Pan Li --- gcc/config/riscv/riscv-protos.h | 1 + gcc/config/riscv/riscv.cc | 69 + gcc/config/riscv/riscv.md | 11 ++ 3 files changed, 81 insertions(+) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 07a4d42e3a5..3d8775e582d 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -136,6 +136,7 @@ extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx); extern void riscv_expand_usadd (rtx, rtx, rtx); extern void riscv_expand_ssadd (rtx, rtx, rtx); extern void riscv_expand_ussub (rtx, rtx, rtx); +extern void riscv_expand_sssub (rtx, rtx, rtx); extern void riscv_expand_ustrunc (rtx, rtx); #ifdef RTX_CODE diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 7be3939a7f9..8708a7b42c6 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -12329,6 +12329,75 @@ riscv_expand_ussub (rtx dest, rtx x, rtx y) emit_move_insn (dest, gen_lowpart (mode, xmode_dest)); } +/* Implements the signed saturation sub standard name ssadd for int mode. + + z = SAT_SUB(x, y). + => + 1. minus = x - y + 2. xor_0 = x ^ y + 3. xor_1 = x ^ minus + 4. lt_0 = xor_1 < 0 + 5. lt_1 = xor_0 < 0 + 6. and = lt_0 & lt_1 + 7. lt = x < 0 + 8. neg = -lt + 9. max = INT_MAX + 10. max = max ^ neg + 11. neg = -and + 12. max = max & neg + 13. and = and - 1 + 14. z = minus & and + 15. z = z | max */ + +void +riscv_expand_sssub (rtx dest, rtx x, rtx y) +{ + machine_mode mode = GET_MODE (dest); + unsigned bitsize = GET_MODE_BITSIZE (mode).to_constant (); + rtx shift_bits = GEN_INT (bitsize - 1); + rtx xmode_x = gen_lowpart (Xmode, x); + rtx xmode_y = gen_lowpart (Xmode, y); + rtx xmode_minus = gen_reg_rtx (Xmode); + rtx xmode_xor_0 = gen_reg_rtx (Xmode); + rtx xmode_xor_1 = gen_reg_rtx (Xmode); + rtx xmode_lt_0 = gen_reg_rtx (Xmode); + rtx xmode_lt_1 = gen_reg_rtx (Xmode); + rtx xmode_and = gen_reg_rtx (Xmode); + rtx xmode_lt = gen_reg_rtx (Xmode); + rtx xmode_neg = gen_reg_rtx (Xmode); + rtx xmode_max = gen_reg_rtx (Xmode); + rtx xmode_dest = gen_reg_rtx (Xmode); + + /* Step-1: mins = x - y, xor_0 = x ^ y, xor_1 = x ^ minus. */ + riscv_emit_binary (MINUS, xmode_minus, xmode_x, xmode_y); + riscv_emit_binary (XOR, xmode_xor_0, xmode_x, xmode_y); + riscv_emit_binary (XOR, xmode_xor_1, xmode_x, xmode_minus); + + /* Step-2: and = xor_0 < 0 & xor_1 < 0. */ + riscv_emit_binary (LSHIFTRT, xmode_lt_0, xmode_xor_0, shift_bits); + riscv_emit_binary (LSHIFTRT, xmode_lt_1, xmode_xor_1, shift_bits); + riscv_emit_binary (AND, xmode_and, xmode_lt_0, xmode_lt_1); + riscv_emit_binary (AND, xmode_and, xmode_and, CONST1_RTX (Xmode)); + + /* Step-3: lt = x < 0, neg = -lt. */ + riscv_emit_binary (LT, xmode_lt, xmode_x, CONST0_RTX (Xmode)); + riscv_emit_unary (NEG, xmode_neg, xmode_lt); + + /* Step-4: max = 0x7f..., max = max ^ neg, neg = -and, max = max & neg. */ + riscv_emit_move (xmode_max
[PATCH v1 1/3] Match: Support form 1 for scalar signed integer SAT_SUB
From: Pan Li This patch would like to support the form 1 of the scalar signed integer SAT_SUB. Aka below example: Form 1: #define DEF_SAT_S_SUB_FMT_1(T, UT, MIN, MAX) \ T __attribute__((noinline)) \ sat_s_sub_##T##_fmt_1 (T x, T y) \ {\ T minus = (UT)x - (UT)y; \ return (x ^ y) >= 0\ ? minus \ : (minus ^ x) >= 0 \ ? minus\ : x < 0 ? MIN : MAX; \ } DEF_SAT_S_SUB_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX) Before this patch: 4 │ __attribute__((noinline)) 5 │ int8_t sat_s_sub_int8_t_fmt_1 (int8_t x, int8_t y) 6 │ { 7 │ int8_t minus; 8 │ unsigned char x.0_1; 9 │ unsigned char y.1_2; 10 │ unsigned char _3; 11 │ signed char _4; 12 │ signed char _5; 13 │ int8_t _6; 14 │ _Bool _11; 15 │ signed char _12; 16 │ signed char _13; 17 │ signed char _14; 18 │ signed char _15; 19 │ 20 │ ;; basic block 2, loop depth 0 21 │ ;;pred: ENTRY 22 │ x.0_1 = (unsigned char) x_7(D); 23 │ y.1_2 = (unsigned char) y_8(D); 24 │ _3 = x.0_1 - y.1_2; 25 │ minus_9 = (int8_t) _3; 26 │ _4 = x_7(D) ^ y_8(D); 27 │ _5 = x_7(D) ^ minus_9; 28 │ _15 = _4 & _5; 29 │ if (_15 < 0) 30 │ goto ; [41.00%] 31 │ else 32 │ goto ; [59.00%] 33 │ ;;succ: 3 34 │ ;;4 35 │ 36 │ ;; basic block 3, loop depth 0 37 │ ;;pred: 2 38 │ _11 = x_7(D) < 0; 39 │ _12 = (signed char) _11; 40 │ _13 = -_12; 41 │ _14 = _13 ^ 127; 42 │ ;;succ: 4 43 │ 44 │ ;; basic block 4, loop depth 0 45 │ ;;pred: 2 46 │ ;;3 47 │ # _6 = PHI 48 │ return _6; 49 │ ;;succ: EXIT 50 │ 51 │ } After this patch: 4 │ __attribute__((noinline)) 5 │ int8_t sat_s_sub_int8_t_fmt_1 (int8_t x, int8_t y) 6 │ { 7 │ int8_t _6; 8 │ 9 │ ;; basic block 2, loop depth 0 10 │ ;;pred: ENTRY 11 │ _6 = .SAT_SUB (x_7(D), y_8(D)); [tail call] 12 │ return _6; 13 │ ;;succ: EXIT 14 │ 15 │ } The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add case 1 matching pattern for signed SAT_SUB. * tree-ssa-math-opts.cc (gimple_signed_integer_sat_sub): Add new decl for generated SAT_SUB matching func. (match_unsigned_saturation_sub): Rename from... (match_saturation_sub): ...Rename to and add signed SAT_SUB matching. (math_opts_dom_walker::after_dom_children): Leverage the named match func for both the unsigned and signed SAT_SUB. Signed-off-by: Pan Li --- gcc/match.pd | 14 ++ gcc/tree-ssa-math-opts.cc | 8 +--- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index 940292d0d49..63f7f3142c4 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3358,6 +3358,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) } (if (wi::eq_p (sum, wi::uhwi (0, precision))) +/* Signed saturation sub, case 1: + T minus = (T)((UT)X - (UT)Y); + SAT_S_SUB = (X ^ Y) & (X ^ minus) < 0 ? (-(T)(X < 0) ^ MAX) : minus; + + The T and UT are type pair like T=int8_t, UT=uint8_t. */ +(match (signed_integer_sat_sub @0 @1) + (cond^ (lt (bit_and:c (bit_xor:c @0 @1) + (bit_xor @0 (nop_convert@2 (minus (nop_convert @0) +(nop_convert @1) + integer_zerop) + (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value) + @2) + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type + /* Unsigned saturation truncate, case 1, sizeof (WT) > sizeof (NT). SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))). */ (match (unsigned_integer_sat_trunc @0) diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index d61668aacfc..f04b17101db 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -4024,6 +4024,7 @@ extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree)); extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree)); extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree)); +extern bool gimple_signed_integer_sat_sub (tree, tree*, tree (*)(tree)); static void build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn, @@ -4162,7 +4163,7 @@ match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt) * [local count: 1073741824]: * _1 = .SAT_SUB (x_2(D), y_3(D)); */ static void -match_unsigned_sat
[PATCH v3] Widening-Mul: Fix one ICE for SAT_SUB matching operand checking
From: Pan Li This patch would like to fix the following ICE for -O2 -m32 of x86_64. during RTL pass: expand JackMidiAsyncWaitQueue.cpp.cpp: In function 'void DequeueEvent(unsigned int)': JackMidiAsyncWaitQueue.cpp.cpp:3:6: internal compiler error: in expand_fn_using_insn, at internal-fn.cc:263 3 | void DequeueEvent(unsigned frame) { | ^~~~ 0x27b580d diagnostic_context::diagnostic_impl(rich_location*, diagnostic_metadata const*, diagnostic_option_id, char const*, __va_list_tag (*) [1], diagnostic_t) ???:0 0x27c4a3f internal_error(char const*, ...) ???:0 0x27b3994 fancy_abort(char const*, int, char const*) ???:0 0xf25ae5 expand_fn_using_insn(gcall*, insn_code, unsigned int, unsigned int) ???:0 0xf2a124 expand_direct_optab_fn(internal_fn, gcall*, optab_tag, unsigned int) ???:0 0xf2c87c expand_SAT_SUB(internal_fn, gcall*) ???:0 We allowed the operand convert when matching SAT_SUB in match.pd, to support the zip benchmark SAT_SUB pattern. Aka, (convert? (minus (convert1? @0) (convert1? @1))) for below sample code. void test (uint16_t *x, unsigned b, unsigned n) { unsigned a = 0; register uint16_t *p = x; do { a = *--p; *p = (uint16_t)(a >= b ? a - b : 0); // Truncate after .SAT_SUB } while (--n); } The pattern match for SAT_SUB itself may also act on below scalar sample code too. unsigned long long GetTimeFromFrames(int); unsigned long long GetMicroSeconds(); void DequeueEvent(unsigned frame) { long long frame_time = GetTimeFromFrames(frame); unsigned long long current_time = GetMicroSeconds(); DequeueEvent(frame_time < current_time ? 0 : frame_time - current_time); } Aka: uint32_t a = (uint32_t)SAT_SUB(uint64_t, uint64_t); Then there will be a problem when ia32 or -m32 is given when compiling. Because we only check the lhs (aka uint32_t) type is supported by ifn instead of the operand (aka uint64_t). Mostly DImode is disabled for 32 bits target like ia32 or rv32gcv, and then trigger ICE when expanding. The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. PR middle-end/116814 gcc/ChangeLog: * tree-ssa-math-opts.cc (build_saturation_binary_arith_call): Make ifn is_supported type check based on operand instead of lhs. gcc/testsuite/ChangeLog: * g++.dg/torture/pr116814-1.C: New test. Signed-off-by: Pan Li --- gcc/testsuite/g++.dg/torture/pr116814-1.C | 12 gcc/tree-ssa-math-opts.cc | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.dg/torture/pr116814-1.C diff --git a/gcc/testsuite/g++.dg/torture/pr116814-1.C b/gcc/testsuite/g++.dg/torture/pr116814-1.C new file mode 100644 index 000..dd6f29daa7c --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/pr116814-1.C @@ -0,0 +1,12 @@ +/* { dg-do compile { target { ia32 } } } */ +/* { dg-options "-O2" } */ + +unsigned long long GetTimeFromFrames(int); +unsigned long long GetMicroSeconds(); + +void DequeueEvent(unsigned frame) { + long long frame_time = GetTimeFromFrames(frame); + unsigned long long current_time = GetMicroSeconds(); + + DequeueEvent(frame_time < current_time ? 0 : frame_time - current_time); +} diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index d61668aacfc..8c622514dbd 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -4042,7 +4042,7 @@ build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, gphi *phi, internal_fn fn, tree lhs, tree op_0, tree op_1) { - if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH)) + if (direct_internal_fn_supported_p (fn, TREE_TYPE (op_0), OPTIMIZE_FOR_BOTH)) { gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1); gimple_call_set_lhs (call, lhs); -- 2.43.0
[PATCH v2] Widening-Mul: Fix one ICE for SAT_SUB matching operand checking
From: Pan Li This patch would like to fix the following ICE for -O2 -m32 of x86_64. during RTL pass: expand JackMidiAsyncWaitQueue.cpp.cpp: In function 'void DequeueEvent(unsigned int)': JackMidiAsyncWaitQueue.cpp.cpp:3:6: internal compiler error: in expand_fn_using_insn, at internal-fn.cc:263 3 | void DequeueEvent(unsigned frame) { | ^~~~ 0x27b580d diagnostic_context::diagnostic_impl(rich_location*, diagnostic_metadata const*, diagnostic_option_id, char const*, __va_list_tag (*) [1], diagnostic_t) ???:0 0x27c4a3f internal_error(char const*, ...) ???:0 0x27b3994 fancy_abort(char const*, int, char const*) ???:0 0xf25ae5 expand_fn_using_insn(gcall*, insn_code, unsigned int, unsigned int) ???:0 0xf2a124 expand_direct_optab_fn(internal_fn, gcall*, optab_tag, unsigned int) ???:0 0xf2c87c expand_SAT_SUB(internal_fn, gcall*) ???:0 We allowed the operand convert when matching SAT_SUB in match.pd, to support the zip benchmark SAT_SUB pattern. Aka, (convert? (minus (convert1? @0) (convert1? @1))) for below sample code. void test (uint16_t *x, unsigned b, unsigned n) { unsigned a = 0; register uint16_t *p = x; do { a = *--p; *p = (uint16_t)(a >= b ? a - b : 0); // Truncate after .SAT_SUB } while (--n); } The pattern match for SAT_SUB itself may also act on below scalar sample code too. unsigned long long GetTimeFromFrames(int); unsigned long long GetMicroSeconds(); void DequeueEvent(unsigned frame) { long long frame_time = GetTimeFromFrames(frame); unsigned long long current_time = GetMicroSeconds(); DequeueEvent(frame_time < current_time ? 0 : frame_time - current_time); } Aka: uint32_t a = (uint32_t)SAT_SUB(uint64_t, uint64_t); Then there will be a problem when ia32 or -m32 is given when compiling. Because we only check the lhs (aka uint32_t) type is supported by ifn and missed the operand (aka uint64_t). Mostly DImode is disabled for 32 bits target like ia32 or rv32gcv, and then trigger ICE when expanding. The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. PR middle-end/116814 gcc/ChangeLog: * tree-ssa-math-opts.cc (build_saturation_binary_arith_call): Add ifn is_supported check for operand TREE type. gcc/testsuite/ChangeLog: * g++.dg/torture/pr116814-1.C: New test. Signed-off-by: Pan Li --- gcc/testsuite/g++.dg/torture/pr116814-1.C | 12 gcc/tree-ssa-math-opts.cc | 23 +++ 2 files changed, 27 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/g++.dg/torture/pr116814-1.C diff --git a/gcc/testsuite/g++.dg/torture/pr116814-1.C b/gcc/testsuite/g++.dg/torture/pr116814-1.C new file mode 100644 index 000..dd6f29daa7c --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/pr116814-1.C @@ -0,0 +1,12 @@ +/* { dg-do compile { target { ia32 } } } */ +/* { dg-options "-O2" } */ + +unsigned long long GetTimeFromFrames(int); +unsigned long long GetMicroSeconds(); + +void DequeueEvent(unsigned frame) { + long long frame_time = GetTimeFromFrames(frame); + unsigned long long current_time = GetMicroSeconds(); + + DequeueEvent(frame_time < current_time ? 0 : frame_time - current_time); +} diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index d61668aacfc..361761cedef 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -4042,15 +4042,22 @@ build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, gphi *phi, internal_fn fn, tree lhs, tree op_0, tree op_1) { - if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH)) -{ - gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1); - gimple_call_set_lhs (call, lhs); - gsi_insert_before (gsi, call, GSI_SAME_STMT); + tree lhs_type = TREE_TYPE (lhs); + tree op_type = TREE_TYPE (op_0); - gimple_stmt_iterator psi = gsi_for_stmt (phi); - remove_phi_node (&psi, /* release_lhs_p */ false); -} + if (!direct_internal_fn_supported_p (fn, lhs_type, OPTIMIZE_FOR_BOTH)) +return; + + if (lhs_type != op_type + && !direct_internal_fn_supported_p (fn, op_type, OPTIMIZE_FOR_BOTH)) +return; + + gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1); + gimple_call_set_lhs (call, lhs); + gsi_insert_before (gsi, call, GSI_SAME_STMT); + + gimple_stmt_iterator psi = gsi_for_stmt (phi); + remove_phi_node (&psi, /* release_lhs_p */ false); } /* -- 2.43.0
[PATCH v1] RISC-V: Fix incorrect test macro for signed scalar SAT_ADD form 2 run test
From: Pan Li This patch would like to fix one incorrect test macro usage for form 2 of signed scalar SAT_ADD run test. It should leverage the _FMT_2 instead of _FMT_1 for form 2. The below test are passed for this patch. * The rv64gcv fully regression test. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macro. * gcc.target/riscv/sat_s_add-run-5.c: Take form 2 for run test. * gcc.target/riscv/sat_s_add-run-6.c: Ditto. * gcc.target/riscv/sat_s_add-run-7.c: Ditto. * gcc.target/riscv/sat_s_add-run-8.c: Ditto. Signed-off-by: Pan Li --- gcc/testsuite/gcc.target/riscv/sat_arith.h | 2 ++ gcc/testsuite/gcc.target/riscv/sat_s_add-run-5.c | 4 ++-- gcc/testsuite/gcc.target/riscv/sat_s_add-run-6.c | 4 ++-- gcc/testsuite/gcc.target/riscv/sat_s_add-run-7.c | 4 ++-- gcc/testsuite/gcc.target/riscv/sat_s_add-run-8.c | 4 ++-- 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index a2617b6db70..77b5ef1807b 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -141,6 +141,8 @@ sat_s_add_##T##_fmt_2 (T x, T y) \ return sum; \ return x < 0 ? MIN : MAX; \ } +#define DEF_SAT_S_ADD_FMT_2_WRAP(T, UT, MIN, MAX) \ + DEF_SAT_S_ADD_FMT_2(T, UT, MIN, MAX) #define DEF_SAT_S_ADD_FMT_3(T, UT, MIN, MAX) \ T __attribute__((noinline))\ diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-run-5.c b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-5.c index 9a4ce338d0c..d57e0a0d195 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_s_add-run-5.c +++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-5.c @@ -7,10 +7,10 @@ #define T1 int8_t #define T2 uint8_t -DEF_SAT_S_ADD_FMT_1_WRAP(T1, T2, INT8_MIN, INT8_MAX) +DEF_SAT_S_ADD_FMT_2_WRAP(T1, T2, INT8_MIN, INT8_MAX) #define DATA TEST_BINARY_DATA_WRAP(T1, ssadd) #define TTEST_BINARY_STRUCT_DECL(T1, ssadd) -#define RUN_BINARY(x, y) RUN_SAT_S_ADD_FMT_1_WRAP(T1, x, y) +#define RUN_BINARY(x, y) RUN_SAT_S_ADD_FMT_2_WRAP(T1, x, y) #include "scalar_sat_binary_run_xxx.h" diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-run-6.c b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-6.c index 34459b85e2b..cdac5bdb883 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_s_add-run-6.c +++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-6.c @@ -7,10 +7,10 @@ #define T1 int16_t #define T2 uint16_t -DEF_SAT_S_ADD_FMT_1_WRAP(T1, T2, INT16_MIN, INT16_MAX) +DEF_SAT_S_ADD_FMT_2_WRAP(T1, T2, INT16_MIN, INT16_MAX) #define DATA TEST_BINARY_DATA_WRAP(T1, ssadd) #define TTEST_BINARY_STRUCT_DECL(T1, ssadd) -#define RUN_BINARY(x, y) RUN_SAT_S_ADD_FMT_1_WRAP(T1, x, y) +#define RUN_BINARY(x, y) RUN_SAT_S_ADD_FMT_2_WRAP(T1, x, y) #include "scalar_sat_binary_run_xxx.h" diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-run-7.c b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-7.c index 4d4841f4066..4ac952e27fa 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_s_add-run-7.c +++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-7.c @@ -7,10 +7,10 @@ #define T1 int32_t #define T2 uint32_t -DEF_SAT_S_ADD_FMT_1_WRAP(T1, T2, INT32_MIN, INT32_MAX) +DEF_SAT_S_ADD_FMT_2_WRAP(T1, T2, INT32_MIN, INT32_MAX) #define DATA TEST_BINARY_DATA_WRAP(T1, ssadd) #define TTEST_BINARY_STRUCT_DECL(T1, ssadd) -#define RUN_BINARY(x, y) RUN_SAT_S_ADD_FMT_1_WRAP(T1, x, y) +#define RUN_BINARY(x, y) RUN_SAT_S_ADD_FMT_2_WRAP(T1, x, y) #include "scalar_sat_binary_run_xxx.h" diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-run-8.c b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-8.c index df818879628..4d25e7f171d 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_s_add-run-8.c +++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-8.c @@ -7,10 +7,10 @@ #define T1 int64_t #define T2 uint64_t -DEF_SAT_S_ADD_FMT_1_WRAP(T1, T2, INT64_MIN, INT64_MAX) +DEF_SAT_S_ADD_FMT_2_WRAP(T1, T2, INT64_MIN, INT64_MAX) #define DATA TEST_BINARY_DATA_WRAP(T1, ssadd) #define TTEST_BINARY_STRUCT_DECL(T1, ssadd) -#define RUN_BINARY(x, y) RUN_SAT_S_ADD_FMT_1_WRAP(T1, x, y) +#define RUN_BINARY(x, y) RUN_SAT_S_ADD_FMT_2_WRAP(T1, x, y) #include "scalar_sat_binary_run_xxx.h" -- 2.43.0
[PATCH v1] Widening-Mul: Fix one ICE for SAT_SUB matching operand promotion
From: Pan Li This patch would like to fix the following ICE for -O2 -m32 of x86_64. during RTL pass: expand JackMidiAsyncWaitQueue.cpp.cpp: In function 'void DequeueEvent(unsigned int)': JackMidiAsyncWaitQueue.cpp.cpp:3:6: internal compiler error: in expand_fn_using_insn, at internal-fn.cc:263 3 | void DequeueEvent(unsigned frame) { | ^~~~ 0x27b580d diagnostic_context::diagnostic_impl(rich_location*, diagnostic_metadata const*, diagnostic_option_id, char const*, __va_list_tag (*) [1], diagnostic_t) ???:0 0x27c4a3f internal_error(char const*, ...) ???:0 0x27b3994 fancy_abort(char const*, int, char const*) ???:0 0xf25ae5 expand_fn_using_insn(gcall*, insn_code, unsigned int, unsigned int) ???:0 0xf2a124 expand_direct_optab_fn(internal_fn, gcall*, optab_tag, unsigned int) ???:0 0xf2c87c expand_SAT_SUB(internal_fn, gcall*) ???:0 We allowed the operand convert when matching SAT_SUB in match.pd, to support the zip benchmark SAT_SUB pattern. Aka, (convert? (minus (convert1? @0) (convert1? @1))) for below sample code. void test (uint16_t *x, unsigned b, unsigned n) { unsigned a = 0; register uint16_t *p = x; do { a = *--p; *p = (uint16_t)(a >= b ? a - b : 0); // Truncate after .SAT_SUB } while (--n); } The pattern match for SAT_SUB itself may also act on below scalar sample code too. unsigned long long GetTimeFromFrames(int); unsigned long long GetMicroSeconds(); void DequeueEvent(unsigned frame) { long long frame_time = GetTimeFromFrames(frame); unsigned long long current_time = GetMicroSeconds(); DequeueEvent(frame_time < current_time ? 0 : frame_time - current_time); } Aka: uint32_t a = (uint32_t)SAT_SUB(uint64_t, uint64_t); Then there will be a problem when ia32 or -m32 is given when compiling. Because we only check the lhs (aka uint32_t) type is supported by ifn and missed the operand (aka uint64_t). Mostly DImode is disabled for 32 bits target like ia32 or rv32gcv, and then trigger ICE when expanding. The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. PR target/116814 gcc/ChangeLog: * tree-ssa-math-opts.cc (build_saturation_binary_arith_call): Add ifn is_supported check for operand TREE type. gcc/testsuite/ChangeLog: * g++.dg/torture/pr116814-1.C: New test. Signed-off-by: Pan Li --- gcc/testsuite/g++.dg/torture/pr116814-1.C | 12 gcc/tree-ssa-math-opts.cc | 23 +++ 2 files changed, 27 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/g++.dg/torture/pr116814-1.C diff --git a/gcc/testsuite/g++.dg/torture/pr116814-1.C b/gcc/testsuite/g++.dg/torture/pr116814-1.C new file mode 100644 index 000..8db5b020cfd --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/pr116814-1.C @@ -0,0 +1,12 @@ +/* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */ +/* { dg-options "-O2 -m32" } */ + +unsigned long long GetTimeFromFrames(int); +unsigned long long GetMicroSeconds(); + +void DequeueEvent(unsigned frame) { + long long frame_time = GetTimeFromFrames(frame); + unsigned long long current_time = GetMicroSeconds(); + + DequeueEvent(frame_time < current_time ? 0 : frame_time - current_time); +} diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index d61668aacfc..361761cedef 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -4042,15 +4042,22 @@ build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, gphi *phi, internal_fn fn, tree lhs, tree op_0, tree op_1) { - if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH)) -{ - gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1); - gimple_call_set_lhs (call, lhs); - gsi_insert_before (gsi, call, GSI_SAME_STMT); + tree lhs_type = TREE_TYPE (lhs); + tree op_type = TREE_TYPE (op_0); - gimple_stmt_iterator psi = gsi_for_stmt (phi); - remove_phi_node (&psi, /* release_lhs_p */ false); -} + if (!direct_internal_fn_supported_p (fn, lhs_type, OPTIMIZE_FOR_BOTH)) +return; + + if (lhs_type != op_type + && !direct_internal_fn_supported_p (fn, op_type, OPTIMIZE_FOR_BOTH)) +return; + + gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1); + gimple_call_set_lhs (call, lhs); + gsi_insert_before (gsi, call, GSI_SAME_STMT); + + gimple_stmt_iterator psi = gsi_for_stmt (phi); + remove_phi_node (&psi, /* release_lhs_p */ false); } /* -- 2.43.0
[PATCH v1] RISC-V: RISC-V: Add testcases for form 4 of signed vector SAT_ADD
From: Pan Li Form 4: #define DEF_VEC_SAT_S_ADD_FMT_4(T, UT, MIN, MAX) \ void __attribute__((noinline)) \ vec_sat_s_add_##T##_fmt_4 (T *out, T *op_1, T *op_2, unsigned limit) \ {\ unsigned i;\ for (i = 0; i < limit; i++)\ {\ T x = op_1[i]; \ T y = op_2[i]; \ T sum; \ bool overflow = __builtin_add_overflow (x, y, &sum); \ out[i] = !overflow ? sum : x < 0 ? MIN : MAX; \ }\ } DEF_VEC_SAT_S_ADD_FMT_4 (int8_t, uint8_t, INT8_MIN, INT8_MAX) The below test are passed for this patch. * The rv64gcv fully regression test. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-13.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-14.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-15.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-16.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-13.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-14.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-15.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-16.c: New test. Signed-off-by: Pan Li --- .../rvv/autovec/binop/vec_sat_s_add-13.c | 9 .../rvv/autovec/binop/vec_sat_s_add-14.c | 9 .../rvv/autovec/binop/vec_sat_s_add-15.c | 9 .../rvv/autovec/binop/vec_sat_s_add-16.c | 9 .../rvv/autovec/binop/vec_sat_s_add-run-13.c | 17 ++ .../rvv/autovec/binop/vec_sat_s_add-run-14.c | 17 ++ .../rvv/autovec/binop/vec_sat_s_add-run-15.c | 17 ++ .../rvv/autovec/binop/vec_sat_s_add-run-16.c | 17 ++ .../riscv/rvv/autovec/vec_sat_arith.h | 22 +++ 9 files changed, 126 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-13.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-14.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-15.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-16.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-13.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-14.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-15.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-16.c diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-13.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-13.c new file mode 100644 index 000..ec3f8aee434 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-13.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details" } */ + +#include "../vec_sat_arith.h" + +DEF_VEC_SAT_S_ADD_FMT_4(int8_t, uint8_t, INT8_MIN, INT8_MAX) + +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ +/* { dg-final { scan-assembler-times {vsadd\.vv} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-14.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-14.c new file mode 100644 index 000..5542616c90a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-14.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details" } */ + +#include "../vec_sat_arith.h" + +DEF_VEC_SAT_S_ADD_FMT_4(int16_t, uint16_t, INT16_MIN, INT16_MAX) + +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ +/* { dg-final { scan-assembler-times {vsadd\.vv} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-15.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-15.c new file mode 100644 index 000..091bfd15edf --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-15.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march
[PATCH v1] Genmatch: Fix ICE for binary phi cfg mismatching [PR116795]
From: Pan Li This patch would like to fix one ICE when try to match the binary phi for below cfg. We check the first edge of the Phi block comes from b0, instead of check the only one edge of b1 comes from the b0 too. Thus, it will result in some code to be recog as .SAT_SUB but it is not, and finally result the verify_ssa failure. +--+ | b0: | | def | +-+ | ... | | b1: | | cond |-->| def | +--+ | ... | | +-+ | | | | v | +-+ | | b2: | | | Phi |<--+ +-+ The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. PR target/116795 gcc/ChangeLog: * gimple-match-head.cc (match_cond_with_binary_phi): Fix the incorrect cfg check as b0->b1 in above example. gcc/testsuite/ChangeLog: * gcc.dg/torture/pr116795-1.c: New test. Signed-off-by: Pan Li --- gcc/gimple-match-head.cc | 2 +- gcc/testsuite/gcc.dg/torture/pr116795-1.c | 14 ++ 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/torture/pr116795-1.c diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc index b63b66e9485..b5d4a71ddc5 100644 --- a/gcc/gimple-match-head.cc +++ b/gcc/gimple-match-head.cc @@ -402,7 +402,7 @@ match_cond_with_binary_phi (gphi *phi, tree *true_arg, tree *false_arg) if (EDGE_COUNT (pred_b0->succs) == 2 && EDGE_COUNT (pred_b1->succs) == 1 && EDGE_COUNT (pred_b1->preds) == 1 - && pred_b0 == EDGE_PRED (gimple_bb (phi), 0)->src) + && pred_b0 == EDGE_PRED (pred_b1, 0)->src) /* * +--+ * | b0: | diff --git a/gcc/testsuite/gcc.dg/torture/pr116795-1.c b/gcc/testsuite/gcc.dg/torture/pr116795-1.c new file mode 100644 index 000..629bdf4bacd --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr116795-1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ + +volatile int a, b; +int c; +int main() { + unsigned e = 0; + for (; e < 2; e++) { +a && b; +if (c) + e = -(c ^ e); + } + return 0; +} -- 2.43.0
[PATCH v1 2/2] RISC-V: Add testcases for form 3 of signed vector SAT_ADD
From: Pan Li Form 3: #define DEF_VEC_SAT_S_ADD_FMT_3(T, UT, MIN, MAX) \ void __attribute__((noinline)) \ vec_sat_s_add_##T##_fmt_3 (T *out, T *op_1, T *op_2, unsigned limit) \ {\ unsigned i;\ for (i = 0; i < limit; i++)\ {\ T x = op_1[i]; \ T y = op_2[i]; \ T sum; \ bool overflow = __builtin_add_overflow (x, y, &sum); \ out[i] = overflow ? x < 0 ? MIN : MAX : sum; \ }\ } DEF_VEC_SAT_S_ADD_FMT_3 (int8_t, uint8_t, INT8_MIN, INT8_MAX) The below test are passed for this patch. * The rv64gcv fully regression test. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-10.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-11.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-12.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-9.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-10.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-11.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-12.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-9.c: New test. Signed-off-by: Pan Li --- .../rvv/autovec/binop/vec_sat_s_add-10.c | 9 .../rvv/autovec/binop/vec_sat_s_add-11.c | 9 .../rvv/autovec/binop/vec_sat_s_add-12.c | 9 .../riscv/rvv/autovec/binop/vec_sat_s_add-9.c | 9 .../rvv/autovec/binop/vec_sat_s_add-run-10.c | 17 ++ .../rvv/autovec/binop/vec_sat_s_add-run-11.c | 17 ++ .../rvv/autovec/binop/vec_sat_s_add-run-12.c | 17 ++ .../rvv/autovec/binop/vec_sat_s_add-run-9.c | 17 ++ .../riscv/rvv/autovec/vec_sat_arith.h | 22 +++ 9 files changed, 126 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-10.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-11.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-12.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-9.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-10.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-11.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-12.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-9.c diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-10.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-10.c new file mode 100644 index 000..5dfecdb1732 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-10.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details" } */ + +#include "../vec_sat_arith.h" + +DEF_VEC_SAT_S_ADD_FMT_3(int16_t, uint16_t, INT16_MIN, INT16_MAX) + +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ +/* { dg-final { scan-assembler-times {vsadd\.vv} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-11.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-11.c new file mode 100644 index 000..ebf825e0dd8 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-11.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details" } */ + +#include "../vec_sat_arith.h" + +DEF_VEC_SAT_S_ADD_FMT_3(int32_t, uint32_t, INT32_MIN, INT32_MAX) + +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ +/* { dg-final { scan-assembler-times {vsadd\.vv} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-12.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-12.c new file mode 100644 index 000..82b29a089f4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-12.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march
[PATCH v1 1/2] Match: Support form 3 for vector signed integer .SAT_ADD
From: Pan Li This patch would like to support the form 3 of the vector signed integer .SAT_ADD. Aka below example: Form 3: #define DEF_VEC_SAT_S_ADD_FMT_3(T, UT, MIN, MAX) \ void __attribute__((noinline)) \ vec_sat_s_add_##T##_fmt_3 (T *out, T *op_1, T *op_2, unsigned limit) \ {\ unsigned i;\ for (i = 0; i < limit; i++)\ {\ T x = op_1[i]; \ T y = op_2[i]; \ T sum; \ bool overflow = __builtin_add_overflow (x, y, &sum); \ out[i] = overflow ? x < 0 ? MIN : MAX : sum; \ }\ } DEF_VEC_SAT_S_ADD_FMT_3(int8_t, uint8_t, INT8_MIN, INT8_MAX) Before this patch: 40 │ # ivtmp.7_34 = PHI <0(3), ivtmp.7_30(7)> 41 │ _26 = op_1_12(D) + ivtmp.7_34; 42 │ x_29 = MEM[(int8_t *)_26]; 43 │ _1 = op_2_14(D) + ivtmp.7_34; 44 │ y_24 = MEM[(int8_t *)_1]; 45 │ _9 = .ADD_OVERFLOW (y_24, x_29); 46 │ _7 = IMAGPART_EXPR <_9>; 47 │ if (_7 != 0) 48 │ goto ; [50.00%] 49 │ else 50 │ goto ; [50.00%] 51 │ ;;succ: 6 52 │ ;;5 53 │ 54 │ ;; basic block 5, loop depth 1 55 │ ;;pred: 4 56 │ _42 = REALPART_EXPR <_9>; 57 │ _2 = out_17(D) + ivtmp.7_34; 58 │ MEM[(int8_t *)_2] = _42; 59 │ ivtmp.7_27 = ivtmp.7_34 + 1; 60 │ if (_13 != ivtmp.7_27) 61 │ goto ; [89.00%] 62 │ else 63 │ goto ; [11.00%] 64 │ ;;succ: 7 65 │ ;;8 66 │ 67 │ ;; basic block 6, loop depth 1 68 │ ;;pred: 4 69 │ _38 = x_29 < 0; 70 │ _39 = (signed char) _38; 71 │ _40 = -_39; 72 │ _41 = _40 ^ 127; 73 │ _33 = out_17(D) + ivtmp.7_34; 74 │ MEM[(int8_t *)_33] = _41; 75 │ ivtmp.7_25 = ivtmp.7_34 + 1; 76 │ if (_13 != ivtmp.7_25) After this patch: 77 │ _94 = .SELECT_VL (ivtmp_92, POLY_INT_CST [16, 16]); 78 │ vect_x_13.9_81 = .MASK_LEN_LOAD (vectp_op_1.7_79, 8B, { -1, ... }, _94, 0); 79 │ vect_y_15.12_85 = .MASK_LEN_LOAD (vectp_op_2.10_83, 8B, { -1, ... }, _94, 0); 80 │ vect_patt_49.13_86 = .SAT_ADD (vect_x_13.9_81, vect_y_15.12_85); 81 │ .MASK_LEN_STORE (vectp_out.14_88, 8B, { -1, ... }, _94, 0, vect_patt_49.13_86); 82 │ vectp_op_1.7_80 = vectp_op_1.7_79 + _94; 83 │ vectp_op_2.10_84 = vectp_op_2.10_83 + _94; 84 │ vectp_out.14_89 = vectp_out.14_88 + _94; 85 │ ivtmp_93 = ivtmp_92 - _94; The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add optional nop_convert for signed SAT_ADD case 4. Signed-off-by: Pan Li --- gcc/match.pd | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/match.pd b/gcc/match.pd index 940292d0d49..c271a8e4c9d 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3246,7 +3246,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) SAT_S_ADD = IMAGPART_EXPR (Z) != 0 ? (-(T)(X < 0) ^ MAX) : sum; */ (match (signed_integer_sat_add @0 @1) (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop) - (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value) + (bit_xor:c (nop_convert? + (negate (nop_convert? (convert (lt @0 integer_zerop) + max_value) (realpart @2)) (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) && types_match (type, @0, @1 -- 2.43.0
[PATCH v1 1/2] Match: Support form 2 for vector signed integer .SAT_ADD
From: Pan Li This patch would like to support the form 2 of the vector signed integer .SAT_ADD. Aka below example: Form 2: #define DEF_VEC_SAT_S_ADD_FMT_2(T, UT, MIN, MAX) \ void __attribute__((noinline)) \ vec_sat_s_add_##T##_fmt_2 (T *out, T *op_1, T *op_2, unsigned limit) \ {\ unsigned i;\ for (i = 0; i < limit; i++)\ {\ T x = op_1[i]; \ T y = op_2[i]; \ T sum = (UT)x + (UT)y; \ if ((x ^ y) < 0 || (sum ^ x) >= 0) \ out[i] = sum;\ else \ out[i] = x < 0 ? MIN : MAX; \ }\ } DEF_VEC_SAT_S_ADD_FMT_2(int8_t, uint8_t, INT8_MIN, INT8_MAX) Before this patch: 104 │ loop_len_79 = MIN_EXPR ; 105 │ _50 = &MEM [(int8_t *)vectp_op_1.9_77]; 106 │ vect_x_18.11_80 = .MASK_LEN_LOAD (_50, 8B, { -1, ... }, loop_len_79, 0); 107 │ _70 = vect_x_18.11_80 >> 7; 108 │ vect_x.12_81 = VIEW_CONVERT_EXPR(vect_x_18.11_80); 109 │ _26 = (void *) ivtmp.47_20; 110 │ _27 = &MEM [(int8_t *)_26]; 111 │ vect_y_20.15_84 = .MASK_LEN_LOAD (_27, 8B, { -1, ... }, loop_len_79, 0); 112 │ vect__7.21_90 = vect_x_18.11_80 ^ vect_y_20.15_84; 113 │ mask__50.23_92 = vect__7.21_90 >= { 0, ... }; 114 │ vect_y.16_85 = VIEW_CONVERT_EXPR(vect_y_20.15_84); 115 │ vect__6.17_86 = vect_x.12_81 + vect_y.16_85; 116 │ vect_sum_21.18_87 = VIEW_CONVERT_EXPR(vect__6.17_86); 117 │ vect__8.19_88 = vect_x_18.11_80 ^ vect_sum_21.18_87; 118 │ mask__45.20_89 = vect__8.19_88 < { 0, ... }; 119 │ mask__44.24_93 = mask__45.20_89 & mask__50.23_92; 120 │ _40 = .COND_XOR (mask__44.24_93, _70, { 127, ... }, vect_sum_21.18_87); 121 │ _60 = (void *) ivtmp.49_6; 122 │ _61 = &MEM [(int8_t *)_60]; 123 │ .MASK_LEN_STORE (_61, 8B, { -1, ... }, loop_len_79, 0, _40); 124 │ vectp_op_1.9_78 = vectp_op_1.9_77 + POLY_INT_CST [16, 16]; 125 │ ivtmp.47_4 = ivtmp.47_20 + POLY_INT_CST [16, 16]; 126 │ ivtmp.49_21 = ivtmp.49_6 + POLY_INT_CST [16, 16]; 127 │ ivtmp.51_98 = ivtmp.51_53; 128 │ ivtmp.51_8 = ivtmp.51_53 + POLY_INT_CST [18446744073709551600, 18446744073709551600]; After this patch: 88 │ _103 = .SELECT_VL (ivtmp_101, POLY_INT_CST [16, 16]); 89 │ vect_x_18.11_90 = .MASK_LEN_LOAD (vectp_op_1.9_88, 8B, { -1, ... }, _103, 0); 90 │ vect_y_20.14_94 = .MASK_LEN_LOAD (vectp_op_2.12_92, 8B, { -1, ... }, _103, 0); 91 │ vect_patt_49.15_95 = .SAT_ADD (vect_x_18.11_90, vect_y_20.14_94); 92 │ .MASK_LEN_STORE (vectp_out.16_97, 8B, { -1, ... }, _103, 0, vect_patt_49.15_95); 93 │ vectp_op_1.9_89 = vectp_op_1.9_88 + _103; 94 │ vectp_op_2.12_93 = vectp_op_2.12_92 + _103; 95 │ vectp_out.16_98 = vectp_out.16_97 + _103; 96 │ ivtmp_102 = ivtmp_101 - _103; The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add the case 3 for signed .SAT_ADD matching. Signed-off-by: Pan Li --- gcc/match.pd | 16 1 file changed, 16 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index fdb59ff0d44..940292d0d49 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3251,6 +3251,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) && types_match (type, @0, @1 +/* Signed saturation add, case 5: + T sum = (T)((UT)X + (UT)Y); + SAT_S_ADD = (X ^ sum) < 0 & ~((X ^ Y) < 0) ? (-(T)(X < 0) ^ MAX) : sum; + + The T and UT are type pair like T=int8_t, UT=uint8_t. */ +(match (signed_integer_sat_add @0 @1) + (cond^ (bit_and:c (lt (bit_xor @0 (nop_convert@2 (plus (nop_convert @0) +(nop_convert @1 + integer_zerop) + (bit_not (lt (bit_xor:c @0 @1) integer_zerop))) + (bit_xor:c (nop_convert (negate (nop_convert (convert + (lt @0 integer_zerop) + max_value) + @2) + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type + /* Unsigned saturation sub, case 1 (branch with gt): SAT_U_SUB = X > Y ? X - Y : 0 */ (match (unsigned_integer_sat_sub @0 @1) -- 2.43.0
[PATCH v1 2/2] RISC-V: Add testcases for form 2 of signed vector SAT_ADD
From: Pan Li Form 2: #define DEF_VEC_SAT_S_ADD_FMT_2(T, UT, MIN, MAX) \ void __attribute__((noinline)) \ vec_sat_s_add_##T##_fmt_2 (T *out, T *op_1, T *op_2, unsigned limit) \ {\ unsigned i;\ for (i = 0; i < limit; i++)\ {\ T x = op_1[i]; \ T y = op_2[i]; \ T sum = (UT)x + (UT)y; \ if ((x ^ y) < 0 || (sum ^ x) >= 0) \ out[i] = sum;\ else \ out[i] = x < 0 ? MIN : MAX; \ }\ } DEF_VEC_SAT_S_ADD_FMT_2 (int8_t, uint8_t, INT8_MIN, INT8_MAX) The below test are passed for this patch. * The rv64gcv fully regression test. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macro. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-5.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-6.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-7.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-8.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-5.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-6.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-7.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-8.c: New test. Signed-off-by: Pan Li --- .../riscv/rvv/autovec/binop/vec_sat_s_add-5.c | 9 +++ .../riscv/rvv/autovec/binop/vec_sat_s_add-6.c | 9 +++ .../riscv/rvv/autovec/binop/vec_sat_s_add-7.c | 9 +++ .../riscv/rvv/autovec/binop/vec_sat_s_add-8.c | 9 +++ .../rvv/autovec/binop/vec_sat_s_add-run-5.c | 17 + .../rvv/autovec/binop/vec_sat_s_add-run-6.c | 17 + .../rvv/autovec/binop/vec_sat_s_add-run-7.c | 17 + .../rvv/autovec/binop/vec_sat_s_add-run-8.c | 17 + .../riscv/rvv/autovec/vec_sat_arith.h | 24 +++ 9 files changed, 128 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-6.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-7.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-8.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-6.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-7.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-8.c diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-5.c new file mode 100644 index 000..8cf0d06efdb --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-5.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details" } */ + +#include "../vec_sat_arith.h" + +DEF_VEC_SAT_S_ADD_FMT_2(int8_t, uint8_t, INT8_MIN, INT8_MAX) + +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ +/* { dg-final { scan-assembler-times {vsadd\.vv} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-6.c new file mode 100644 index 000..a26d3943e27 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-6.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details" } */ + +#include "../vec_sat_arith.h" + +DEF_VEC_SAT_S_ADD_FMT_2(int16_t, uint16_t, INT16_MIN, INT16_MAX) + +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ +/* { dg-final { scan-assembler-times {vsadd\.vv} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-7.c new file mode 100644 index 000..4ef1351dd29 --- /dev/null +++ b/gcc/testsuite/gcc.targe
[PATCH v1 2/2] RISC-V: Add testcases for form 4 of signed scalar SAT_ADD
From: Pan Li Form 4: #define DEF_SAT_S_ADD_FMT_4(T, UT, MIN, MAX) \ T __attribute__((noinline))\ sat_s_add_##T##_fmt_4 (T x, T y) \ { \ T sum; \ bool overflow = __builtin_add_overflow (x, y, &sum); \ return !overflow ? sum : x < 0 ? MIN : MAX; \ } DEF_SAT_S_ADD_FMT_4 (int64_t, uint64_t, INT64_MIN, INT64_MAX) The below test are passed for this patch. * The rv64gcv fully regression test. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_s_add-13.c: New test. * gcc.target/riscv/sat_s_add-14.c: New test. * gcc.target/riscv/sat_s_add-15.c: New test. * gcc.target/riscv/sat_s_add-16.c: New test. * gcc.target/riscv/sat_s_add-run-13.c: New test. * gcc.target/riscv/sat_s_add-run-14.c: New test. * gcc.target/riscv/sat_s_add-run-15.c: New test. * gcc.target/riscv/sat_s_add-run-16.c: New test. Signed-off-by: Pan Li --- gcc/testsuite/gcc.target/riscv/sat_arith.h| 14 gcc/testsuite/gcc.target/riscv/sat_s_add-13.c | 30 + gcc/testsuite/gcc.target/riscv/sat_s_add-14.c | 32 +++ gcc/testsuite/gcc.target/riscv/sat_s_add-15.c | 31 ++ gcc/testsuite/gcc.target/riscv/sat_s_add-16.c | 29 + .../gcc.target/riscv/sat_s_add-run-13.c | 16 ++ .../gcc.target/riscv/sat_s_add-run-14.c | 16 ++ .../gcc.target/riscv/sat_s_add-run-15.c | 16 ++ .../gcc.target/riscv/sat_s_add-run-16.c | 16 ++ 9 files changed, 200 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-13.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-14.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-15.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-16.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-13.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-14.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-15.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-16.c diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index ab141bb1779..a2617b6db70 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -153,6 +153,17 @@ sat_s_add_##T##_fmt_3 (T x, T y) \ #define DEF_SAT_S_ADD_FMT_3_WRAP(T, UT, MIN, MAX) \ DEF_SAT_S_ADD_FMT_3(T, UT, MIN, MAX) +#define DEF_SAT_S_ADD_FMT_4(T, UT, MIN, MAX) \ +T __attribute__((noinline))\ +sat_s_add_##T##_fmt_4 (T x, T y) \ +{ \ + T sum; \ + bool overflow = __builtin_add_overflow (x, y, &sum); \ + return !overflow ? sum : x < 0 ? MIN : MAX; \ +} +#define DEF_SAT_S_ADD_FMT_4_WRAP(T, UT, MIN, MAX) \ + DEF_SAT_S_ADD_FMT_4(T, UT, MIN, MAX) + #define RUN_SAT_S_ADD_FMT_1(T, x, y) sat_s_add_##T##_fmt_1(x, y) #define RUN_SAT_S_ADD_FMT_1_WRAP(T, x, y) RUN_SAT_S_ADD_FMT_1(T, x, y) @@ -162,6 +173,9 @@ sat_s_add_##T##_fmt_3 (T x, T y) \ #define RUN_SAT_S_ADD_FMT_3(T, x, y) sat_s_add_##T##_fmt_3(x, y) #define RUN_SAT_S_ADD_FMT_3_WRAP(T, x, y) RUN_SAT_S_ADD_FMT_3(T, x, y) +#define RUN_SAT_S_ADD_FMT_4(T, x, y) sat_s_add_##T##_fmt_4(x, y) +#define RUN_SAT_S_ADD_FMT_4_WRAP(T, x, y) RUN_SAT_S_ADD_FMT_4(T, x, y) + /**/ /* Saturation Sub (Unsigned and Signed) */ /**/ diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-13.c b/gcc/testsuite/gcc.target/riscv/sat_s_add-13.c new file mode 100644 index 000..0923764cde4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-13.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_s_add_int8_t_fmt_4: +** add\s+[atx][0-9]+,\s*a0,\s*a1 +** xor\s+[atx][0-9]+,\s*a0,\s*a1 +** xor\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+ +** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*7 +** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*7 +** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1 +** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1 +** srai\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63 +** xori\s+[atx][0-9]+,\s*[atx][0-
[PATCH v1 1/2] RISC-V: Add testcases for form 3 of signed scalar SAT_ADD
From: Pan Li This patch would like to add testcases of the signed scalar SAT_ADD for form 3. Aka: Form 3: #define DEF_SAT_S_ADD_FMT_3(T, UT, MIN, MAX) \ T __attribute__((noinline))\ sat_s_add_##T##_fmt_3 (T x, T y) \ { \ T sum; \ bool overflow = __builtin_add_overflow (x, y, &sum); \ return overflow ? x < 0 ? MIN : MAX : sum; \ } DEF_SAT_S_ADD_FMT_3 (int64_t, uint64_t, INT64_MIN, INT64_MAX) The below test are passed for this patch. * The rv64gcv fully regression test. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_s_add-10.c: New test. * gcc.target/riscv/sat_s_add-11.c: New test. * gcc.target/riscv/sat_s_add-12.c: New test. * gcc.target/riscv/sat_s_add-9.c: New test. * gcc.target/riscv/sat_s_add-run-10.c: New test. * gcc.target/riscv/sat_s_add-run-11.c: New test. * gcc.target/riscv/sat_s_add-run-12.c: New test. * gcc.target/riscv/sat_s_add-run-9.c: New test. Signed-off-by: Pan Li --- gcc/testsuite/gcc.target/riscv/sat_arith.h| 14 gcc/testsuite/gcc.target/riscv/sat_s_add-10.c | 32 +++ gcc/testsuite/gcc.target/riscv/sat_s_add-11.c | 31 ++ gcc/testsuite/gcc.target/riscv/sat_s_add-12.c | 29 + gcc/testsuite/gcc.target/riscv/sat_s_add-9.c | 30 + .../gcc.target/riscv/sat_s_add-run-10.c | 16 ++ .../gcc.target/riscv/sat_s_add-run-11.c | 16 ++ .../gcc.target/riscv/sat_s_add-run-12.c | 16 ++ .../gcc.target/riscv/sat_s_add-run-9.c| 16 ++ 9 files changed, 200 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-10.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-11.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-12.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-9.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-10.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-11.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-12.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-9.c diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index b4fbf5dc662..ab141bb1779 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -142,12 +142,26 @@ sat_s_add_##T##_fmt_2 (T x, T y) \ return x < 0 ? MIN : MAX; \ } +#define DEF_SAT_S_ADD_FMT_3(T, UT, MIN, MAX) \ +T __attribute__((noinline))\ +sat_s_add_##T##_fmt_3 (T x, T y) \ +{ \ + T sum; \ + bool overflow = __builtin_add_overflow (x, y, &sum); \ + return overflow ? x < 0 ? MIN : MAX : sum; \ +} +#define DEF_SAT_S_ADD_FMT_3_WRAP(T, UT, MIN, MAX) \ + DEF_SAT_S_ADD_FMT_3(T, UT, MIN, MAX) + #define RUN_SAT_S_ADD_FMT_1(T, x, y) sat_s_add_##T##_fmt_1(x, y) #define RUN_SAT_S_ADD_FMT_1_WRAP(T, x, y) RUN_SAT_S_ADD_FMT_1(T, x, y) #define RUN_SAT_S_ADD_FMT_2(T, x, y) sat_s_add_##T##_fmt_2(x, y) #define RUN_SAT_S_ADD_FMT_2_WRAP(T, x, y) RUN_SAT_S_ADD_FMT_2(T, x, y) +#define RUN_SAT_S_ADD_FMT_3(T, x, y) sat_s_add_##T##_fmt_3(x, y) +#define RUN_SAT_S_ADD_FMT_3_WRAP(T, x, y) RUN_SAT_S_ADD_FMT_3(T, x, y) + /**/ /* Saturation Sub (Unsigned and Signed) */ /**/ diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-10.c b/gcc/testsuite/gcc.target/riscv/sat_s_add-10.c new file mode 100644 index 000..45329619f9d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-10.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_s_add_int16_t_fmt_3: +** add\s+[atx][0-9]+,\s*a0,\s*a1 +** xor\s+[atx][0-9]+,\s*a0,\s*a1 +** xor\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+ +** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*15 +** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*15 +** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1 +** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1 +** srai\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63 +** li\s+[atx][0-9]+,\s*32768 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +*
[PATCH v5 3/4] Match: Support form 3 for scalar signed integer .SAT_ADD
From: Pan Li This patch would like to support the form 3 of the scalar signed integer .SAT_ADD. Aka below example: Form 3: #define DEF_SAT_S_ADD_FMT_3(T, UT, MIN, MAX) \ T __attribute__((noinline))\ sat_s_add_##T##_fmt_3 (T x, T y) \ { \ T sum; \ bool overflow = __builtin_add_overflow (x, y, &sum); \ return overflow ? x < 0 ? MIN : MAX : sum; \ } DEF_SAT_S_ADD_FMT_3(int8_t, uint8_t, INT8_MIN, INT8_MAX) We can tell the difference before and after this patch if backend implemented the ssadd3 pattern similar as below. Before this patch: 4 │ __attribute__((noinline)) 5 │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y) 6 │ { 7 │ signed char _1; 8 │ signed char _2; 9 │ int8_t _3; 10 │ __complex__ signed char _6; 11 │ _Bool _8; 12 │ signed char _9; 13 │ signed char _10; 14 │ signed char _11; 15 │ 16 │ ;; basic block 2, loop depth 0 17 │ ;;pred: ENTRY 18 │ _6 = .ADD_OVERFLOW (x_4(D), y_5(D)); 19 │ _2 = IMAGPART_EXPR <_6>; 20 │ if (_2 != 0) 21 │ goto ; [50.00%] 22 │ else 23 │ goto ; [50.00%] 24 │ ;;succ: 4 25 │ ;;3 26 │ 27 │ ;; basic block 3, loop depth 0 28 │ ;;pred: 2 29 │ _1 = REALPART_EXPR <_6>; 30 │ goto ; [100.00%] 31 │ ;;succ: 5 32 │ 33 │ ;; basic block 4, loop depth 0 34 │ ;;pred: 2 35 │ _8 = x_4(D) < 0; 36 │ _9 = (signed char) _8; 37 │ _10 = -_9; 38 │ _11 = _10 ^ 127; 39 │ ;;succ: 5 40 │ 41 │ ;; basic block 5, loop depth 0 42 │ ;;pred: 3 43 │ ;;4 44 │ # _3 = PHI <_1(3), _11(4)> 45 │ return _3; 46 │ ;;succ: EXIT 47 │ 48 │ } After this patch: 4 │ __attribute__((noinline)) 5 │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y) 6 │ { 7 │ int8_t _3; 8 │ 9 │ ;; basic block 2, loop depth 0 10 │ ;;pred: ENTRY 11 │ _3 = .SAT_ADD (x_4(D), y_5(D)); [tail call] 12 │ return _3; 13 │ ;;succ: EXIT 14 │ 15 │ } The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add the form 3 of signed .SAT_ADD matching. Signed-off-by: Pan Li --- gcc/match.pd | 10 ++ 1 file changed, 10 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index 4aa610e2270..fdb59ff0d44 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3241,6 +3241,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) @2) (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type +/* Signed saturation add, case 4: + Z = .ADD_OVERFLOW (X, Y) + SAT_S_ADD = IMAGPART_EXPR (Z) != 0 ? (-(T)(X < 0) ^ MAX) : sum; */ +(match (signed_integer_sat_add @0 @1) + (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop) + (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value) + (realpart @2)) + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) + && types_match (type, @0, @1 + /* Unsigned saturation sub, case 1 (branch with gt): SAT_U_SUB = X > Y ? X - Y : 0 */ (match (unsigned_integer_sat_sub @0 @1) -- 2.43.0
[PATCH v5 4/4] RISC-V: Fix vector SAT_ADD dump check due to middle-end change
From: Pan Li This patch would like fix the dump check times of vector SAT_ADD. The middle-end change makes the match times from 2 to 4 times. The below test suites are passed for this patch. * The rv64gcv fully regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Adjust the dump check times from 2 to 4. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto. Signed-off-by: Pan Li --- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c| 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c| 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c| 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c| 2 +- 16 files changed, 16 insertions(+), 16 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c index c525ba97c52..47dd5012cc6 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c @@ -15,4 +15,4 @@ */ DEF_VEC_SAT_U_ADD_FMT_6(uint8_t) -/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c index 41372d08e52..df8d5a8d275 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c @@ -15,4 +15,4 @@ */ DEF_VEC_SAT_U_ADD_FMT_6(uint16_t) -/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c index dddebb54426..f286bd10e4b 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c @@ -15,4 +15,4 @@ */ DEF_VEC_SAT_U_ADD_FMT_6(uint32_t) -/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c index ad5162d10a0..307ff36cc35 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c @@ -15,4 +15,4 @@ */ DEF_VEC_SAT_U_ADD_FMT_6(uint64_t) -/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c index 39c20b3cea6..3218962724c 10064
[PATCH v5 2/4] Genmatch: Refine the gen_phi_on_cond by match_cond_with_binary_phi
From: Pan Li This patch would like to leverage the match_cond_with_binary_phi to match the phi on cond, and get the true/false arg if matched. This helps a lot to simplify the implementation of gen_phi_on_cond. Before this patch: basic_block _b1 = gimple_bb (_a1); if (gimple_phi_num_args (_a1) == 2) { basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src; basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src; basic_block _db_1 = safe_dyn_cast (*gsi_last_bb (_pb_0_1)) ? _pb_0_1 : _pb_1_1; basic_block _other_db_1 = safe_dyn_cast (*gsi_last_bb (_pb_0_1)) ? _pb_1_1 : _pb_0_1; gcond *_ct_1 = safe_dyn_cast (*gsi_last_bb (_db_1)); if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1 && EDGE_COUNT (_other_db_1->succs) == 1 && EDGE_PRED (_other_db_1, 0)->src == _db_1) { tree _cond_lhs_1 = gimple_cond_lhs (_ct_1); tree _cond_rhs_1 = gimple_cond_rhs (_ct_1); tree _p0 = build2 (gimple_cond_code (_ct_1), boolean_type_node, _cond_lhs_1, _cond_rhs_1); bool _arg_0_is_true_1 = gimple_phi_arg_edge (_a1, 0)->flags & EDGE_TRUE_VALUE; tree _p1 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 0 : 1); tree _p2 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 1 : 0); ... After this patch: basic_block _b1 = gimple_bb (_a1); tree _p1, _p2; gcond *_cond_1 = match_cond_with_binary_phi (_a1, &_p1, &_p2); if (_cond_1 && _p1 && _p2) { tree _cond_lhs_1 = gimple_cond_lhs (_cond_1); tree _cond_rhs_1 = gimple_cond_rhs (_cond_1); tree _p0 = build2 (gimple_cond_code (_cond_1), boolean_type_node, _cond_lhs_1, _cond_rhs_1); ... The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * genmatch.cc (dt_operand::gen_phi_on_cond): Leverage the match_cond_with_binary_phi API to get cond gimple, true and false TREE arg. Signed-off-by: Pan Li --- gcc/genmatch.cc | 67 +++-- 1 file changed, 15 insertions(+), 52 deletions(-) diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc index f1ff1d18265..149458fffe1 100644 --- a/gcc/genmatch.cc +++ b/gcc/genmatch.cc @@ -3516,79 +3516,42 @@ dt_operand::gen (FILE *f, int indent, bool gimple, int depth) void dt_operand::gen_phi_on_cond (FILE *f, int indent, int depth) { - fprintf_indent (f, indent, -"basic_block _b%d = gimple_bb (_a%d);\n", depth, depth); - - fprintf_indent (f, indent, "if (gimple_phi_num_args (_a%d) == 2)\n", depth); + char opname_0[20]; + char opname_1[20]; + char opname_2[20]; - indent += 2; - fprintf_indent (f, indent, "{\n"); - indent += 2; + gen_opname (opname_0, 0); + gen_opname (opname_1, 1); + gen_opname (opname_2, 2); fprintf_indent (f, indent, -"basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth); - fprintf_indent (f, indent, -"basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth); - fprintf_indent (f, indent, -"basic_block _db_%d = safe_dyn_cast (*gsi_last_bb (_pb_0_%d)) ? " -"_pb_0_%d : _pb_1_%d;\n", depth, depth, depth, depth); +"basic_block _b%d = gimple_bb (_a%d);\n", depth, depth); + fprintf_indent (f, indent, "tree %s, %s;\n", opname_1, opname_2); fprintf_indent (f, indent, -"basic_block _other_db_%d = safe_dyn_cast " -"(*gsi_last_bb (_pb_0_%d)) ? _pb_1_%d : _pb_0_%d;\n", -depth, depth, depth, depth); +"gcond *_cond_%d = match_cond_with_binary_phi (_a%d, &%s, &%s);\n", +depth, depth, opname_1, opname_2); - fprintf_indent (f, indent, -"gcond *_ct_%d = safe_dyn_cast (*gsi_last_bb (_db_%d));\n", -depth, depth); - fprintf_indent (f, indent, "if (_ct_%d" -" && EDGE_COUNT (_other_db_%d->preds) == 1\n", depth, depth); - fprintf_indent (f, indent, -" && EDGE_COUNT (_other_db_%d->succs) == 1\n", depth); - fprintf_indent (f, indent, -" && EDGE_PRED (_other_db_%d, 0)->src == _db_%d)\n", depth, depth); + fprintf_indent (f, indent, "if (_cond_%d && %s && %s)\n", +depth, opname_1, opname_2); indent += 2; fprintf_indent (f, indent, "{\n"); indent += 2; fprintf_indent (f, indent, -"tree _cond_lhs_%d = gimple_cond_lhs (_ct_%d);\n", depth, depth); +"tree _cond_lhs_%d = gimple_cond_lhs (_cond_%d);\n", depth, depth); fprintf_indent (f, indent, -"tree _cond_rhs_%d = gimple_cond_rhs (_ct_%d);\n", depth, depth); - - char opname_0[20]; - char opname_1[20]; - char opname_2[20]; - gen_opname (opname_0, 0); - +"tree _cond_rhs_%d = gimple_cond_rhs (_cond_%d);\n", depth, depth); fprintf_indent (f, indent, -"tree %s = build2 (gimple_cond_code (_ct_%d), " +"tree %s = build2 (gimple_cond_code (_cond_%d), " "boolean_type_node, _cond_lhs_%d, _cond_rhs_%d);\n", opname_0, depth, depth, depth); - fprintf_indent (f, indent, -"bool _arg_0_is_true_%d = gimple_phi_arg_edge (_a%d, 0)->flags" -" & EDGE_TRUE_VALUE;\n", depth, depth); - - ge
[PATCH v5 1/4] Match: Add interface match_cond_with_binary_phi for true/false arg
From: Pan Li When matching the cond with 2 args phi node, we need to figure out which arg of phi node comes from the true edge of cond block, as well as the false edge. This patch would like to add interface to perform the action and return the true and false arg in TREE type. The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * gimple-match-head.cc (match_cond_with_binary_phi): Add new func impl to match binary phi for true and false arg. Signed-off-by: Pan Li --- gcc/gimple-match-head.cc | 120 +++ 1 file changed, 120 insertions(+) diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc index 924d3f1e710..b63b66e9485 100644 --- a/gcc/gimple-match-head.cc +++ b/gcc/gimple-match-head.cc @@ -375,3 +375,123 @@ gimple_bitwise_inverted_equal_p (tree expr1, tree expr2, bool &wascmp, tree (*va return true; return false; } + +/* + * Return the relevant gcond * of the given phi, as well as the true + * and false TREE args of the phi. Or return nullptr. + * + * If matched the gcond *, the output argument TREE true_arg and false_arg + * will be updated to the relevant args of phi. + * + * If failed to match, nullptr gcond * will be returned, as well as the output + * arguments will be set to NULL_TREE. + */ + +static inline gcond * +match_cond_with_binary_phi (gphi *phi, tree *true_arg, tree *false_arg) +{ + *true_arg = *false_arg = NULL_TREE; + + if (gimple_phi_num_args (phi) != 2) +return nullptr; + + basic_block pred_b0 = EDGE_PRED (gimple_bb (phi), 0)->src; + basic_block pred_b1 = EDGE_PRED (gimple_bb (phi), 1)->src; + edge edge_for_pred_0 = nullptr; + + if (EDGE_COUNT (pred_b0->succs) == 2 + && EDGE_COUNT (pred_b1->succs) == 1 + && EDGE_COUNT (pred_b1->preds) == 1 + && pred_b0 == EDGE_PRED (gimple_bb (phi), 0)->src) +/* + * +--+ + * | b0: | + * | def | +-+ + * | ... | | b1: | + * | cond |-->| def | + * +--+ | ... | + *| +-+ + *# | + *| | + *v | + * +-+ | + * | b2: | | + * | def |<--+ + * +-+ + * #: edge_for_pred_0. + */ +edge_for_pred_0 = EDGE_PRED (gimple_bb (phi), 0); + else if (EDGE_COUNT (pred_b1->succs) == 2 + && EDGE_COUNT (pred_b0->succs) == 1 + && EDGE_COUNT (pred_b0->preds) == 1 + && pred_b1 == EDGE_PRED (pred_b0, 0)->src) +/* + *+--+ + *| b1: | + * +-+| def | + * | b0: || ... | + * | def |<---#---| cond | + * | ... |+--+ + * +-+ | + *| | + *| | + *| | + *v | + * +-+ | + * | b2: | | + * | def |<--+ + * +-+ + * #: edge_for_pred_0. + */ +edge_for_pred_0 = EDGE_PRED (pred_b0, 0); + else if (EDGE_COUNT (pred_b0->succs) == 1 + && EDGE_COUNT (pred_b1->succs) == 1 + && EDGE_COUNT (pred_b0->preds) == 1 + && EDGE_COUNT (pred_b1->preds) == 1 + && EDGE_COUNT (EDGE_PRED (pred_b0, 0)->src->succs) == 2 + && EDGE_PRED (pred_b0, 0)->src == EDGE_PRED (pred_b1, 0)->src) +/* +--+ + * | b0: | + * | ... | +-+ + * | cond |-->| b2: | + * +--+ | ... | + *| +-+ + *# | + *| | + *v | + * +-+ | + * | b1: | | + * | ... | | + * +-+ | + *| | + *| | + *v | + * +-+ | + * | b3: |<--+ + * | ... | + * +-+ + * #: edge_for_pred_0. + */ +edge_for_pred_0 = EDGE_PRED (pred_b0, 0); + + if (!edge_for_pred_0) +return nullptr; + + gcond *cond = safe_dyn_cast (*gsi_last_bb (edge_for_pred_0->src)); + + if (!cond) +return nullptr; + + if (edge_for_pred_0->flags & EDGE_TRUE_VALUE) +{ + *true_arg = gimple_phi_arg_def (phi, 0); + *false_arg = gimple_phi_arg_def (phi, 1); +} + else /* Aka edge_for_pred_0->flags & EDGE_FALSE_VALUE */ +{ + *false_arg = gimple_phi_arg_def (phi, 0); + *true_arg = gimple_phi_arg_def (phi, 1); +} + + return cond; +} -- 2.43.0
[PATCH v1] Match: Remove unnecessary types_match for case 1 of signed SAT_ADD
From: Pan Li Given all commutative binary operators requires types matching for both operands. Remove the types_match check for case 1 of the signed SAT_ADD, because we have (bit_xor @0 @1), which ensure the operands have the correct TREE type. The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Remove the types_match check for signed SAT_ADD case 1. Signed-off-by: Pan Li --- gcc/match.pd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index 4cef965c9c7..5566c0e4c41 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3204,8 +3204,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) integer_zerop) (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value) @2) - (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) - && types_match (type, @0, @1 + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type /* Signed saturation add, case 2: T sum = (T)((UT)X + (UT)Y) -- 2.43.0
[PATCH v1] RISC-V: Add testcases for form 2 of signed scalar SAT_ADD
From: Pan Li This patch would like to add testcases of the signed scalar SAT_ADD for form 2. Aka: Form 2: #define DEF_SAT_S_ADD_FMT_2(T, UT, MIN, MAX) \ T __attribute__((noinline)) \ sat_s_add_##T##_fmt_2 (T x, T y) \ {\ T sum = (UT)x + (UT)y; \ if ((x ^ y) < 0 || (sum ^ x) >= 0) \ return sum; \ return x < 0 ? MIN : MAX; \ } DEF_SAT_S_ADD_FMT_2 (int64_t, uint64_t, INT64_MIN, INT64_MAX) The below test are passed for this patch. * The rv64gcv fully regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_s_add-5.c: New test. * gcc.target/riscv/sat_s_add-6.c: New test. * gcc.target/riscv/sat_s_add-7.c: New test. * gcc.target/riscv/sat_s_add-8.c: New test. * gcc.target/riscv/sat_s_add-run-5.c: New test. * gcc.target/riscv/sat_s_add-run-6.c: New test. * gcc.target/riscv/sat_s_add-run-7.c: New test. * gcc.target/riscv/sat_s_add-run-8.c: New test. Signed-off-by: Pan Li --- gcc/testsuite/gcc.target/riscv/sat_arith.h| 13 gcc/testsuite/gcc.target/riscv/sat_s_add-5.c | 30 + gcc/testsuite/gcc.target/riscv/sat_s_add-6.c | 32 +++ gcc/testsuite/gcc.target/riscv/sat_s_add-7.c | 31 ++ gcc/testsuite/gcc.target/riscv/sat_s_add-8.c | 29 + .../gcc.target/riscv/sat_s_add-run-5.c| 16 ++ .../gcc.target/riscv/sat_s_add-run-6.c| 16 ++ .../gcc.target/riscv/sat_s_add-run-7.c| 16 ++ .../gcc.target/riscv/sat_s_add-run-8.c| 16 ++ 9 files changed, 199 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-6.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-7.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-8.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-6.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-7.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-8.c diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index a8672f66322..b4fbf5dc662 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -132,9 +132,22 @@ sat_s_add_##T##_fmt_1 (T x, T y) \ #define DEF_SAT_S_ADD_FMT_1_WRAP(T, UT, MIN, MAX) \ DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) +#define DEF_SAT_S_ADD_FMT_2(T, UT, MIN, MAX) \ +T __attribute__((noinline)) \ +sat_s_add_##T##_fmt_2 (T x, T y) \ +{\ + T sum = (UT)x + (UT)y; \ + if ((x ^ y) < 0 || (sum ^ x) >= 0) \ +return sum; \ + return x < 0 ? MIN : MAX; \ +} + #define RUN_SAT_S_ADD_FMT_1(T, x, y) sat_s_add_##T##_fmt_1(x, y) #define RUN_SAT_S_ADD_FMT_1_WRAP(T, x, y) RUN_SAT_S_ADD_FMT_1(T, x, y) +#define RUN_SAT_S_ADD_FMT_2(T, x, y) sat_s_add_##T##_fmt_2(x, y) +#define RUN_SAT_S_ADD_FMT_2_WRAP(T, x, y) RUN_SAT_S_ADD_FMT_2(T, x, y) + /**/ /* Saturation Sub (Unsigned and Signed) */ /**/ diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-5.c b/gcc/testsuite/gcc.target/riscv/sat_s_add-5.c new file mode 100644 index 000..b644022eb4e --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-5.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_s_add_int8_t_fmt_2: +** add\s+[atx][0-9]+,\s*a0,\s*a1 +** xor\s+[atx][0-9]+,\s*a0,\s*a1 +** xor\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+ +** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*7 +** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*7 +** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1 +** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1 +** srai\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63 +** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*127 +** neg\s+[atx][0-9]+,\s*[atx][0-9]+ +** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** or\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+ +** slliw\s+a0,\s*a0,\s*24 +** sraiw\s+a0,\s*a0,\s*24 +** ret +*/ +DEF_SAT_S_ADD_FMT_2(int8_t, uint8_t, INT8_MIN, INT8_MAX) + +/* { dg-final { scan-rtl-dump-ti
[PATCH v1] RISC-V: Fix signed SAT_ADD test case for int64_t
From: Pan Li The int8_t test for signed SAT_ADD is sat_s_add-1.c, the sat_s_add-4.c should be for int64_t. Thus, update sat_s_add-4.c for int64_t type. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_s_add-4.c: Update test for int64_t instead of int8_t. Signed-off-by: Pan Li --- gcc/testsuite/gcc.target/riscv/sat_s_add-4.c | 15 +++ 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-4.c b/gcc/testsuite/gcc.target/riscv/sat_s_add-4.c index f85675c1a05..12c9540eaec 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_s_add-4.c +++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-4.c @@ -5,26 +5,25 @@ #include "sat_arith.h" /* -** sat_s_add_int8_t_fmt_1: +** sat_s_add_int64_t_fmt_1: ** add\s+[atx][0-9]+,\s*a0,\s*a1 ** xor\s+[atx][0-9]+,\s*a0,\s*a1 ** xor\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+ -** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*7 -** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*7 +** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63 +** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63 ** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1 ** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ -** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1 ** srai\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63 -** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*127 +** li\s+[atx][0-9]+,\s*-1 +** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1 +** xor\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ ** neg\s+[atx][0-9]+,\s*[atx][0-9]+ ** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ ** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 ** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ ** or\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+ -** slliw\s+a0,\s*a0,\s*24 -** sraiw\s+a0,\s*a0,\s*24 ** ret */ -DEF_SAT_S_ADD_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX) +DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX) /* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ -- 2.43.0
[PATCH v4 4/4] RISC-V: Fix vector SAT_ADD dump check due to middle-end change
From: Pan Li This patch would like fix the dump check times of vector SAT_ADD. The middle-end change makes the match times from 2 to 4 times. The below test suites are passed for this patch. * The rv64gcv fully regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Adjust the dump check times from 2 to 4. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto. Signed-off-by: Pan Li --- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c| 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c| 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c| 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c| 2 +- 16 files changed, 16 insertions(+), 16 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c index c525ba97c52..47dd5012cc6 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c @@ -15,4 +15,4 @@ */ DEF_VEC_SAT_U_ADD_FMT_6(uint8_t) -/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c index 41372d08e52..df8d5a8d275 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c @@ -15,4 +15,4 @@ */ DEF_VEC_SAT_U_ADD_FMT_6(uint16_t) -/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c index dddebb54426..f286bd10e4b 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c @@ -15,4 +15,4 @@ */ DEF_VEC_SAT_U_ADD_FMT_6(uint32_t) -/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c index ad5162d10a0..307ff36cc35 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c @@ -15,4 +15,4 @@ */ DEF_VEC_SAT_U_ADD_FMT_6(uint64_t) -/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c index 39c20b3cea6..3218962724c 10064
[PATCH v4 3/4] Match: Support form 3 for scalar signed integer .SAT_ADD
From: Pan Li This patch would like to support the form 3 of the scalar signed integer .SAT_ADD. Aka below example: Form 3: #define DEF_SAT_S_ADD_FMT_3(T, UT, MIN, MAX) \ T __attribute__((noinline))\ sat_s_add_##T##_fmt_3 (T x, T y) \ { \ T sum; \ bool overflow = __builtin_add_overflow (x, y, &sum); \ return overflow ? x < 0 ? MIN : MAX : sum; \ } DEF_SAT_S_ADD_FMT_3(int8_t, uint8_t, INT8_MIN, INT8_MAX) We can tell the difference before and after this patch if backend implemented the ssadd3 pattern similar as below. Before this patch: 4 │ __attribute__((noinline)) 5 │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y) 6 │ { 7 │ signed char _1; 8 │ signed char _2; 9 │ int8_t _3; 10 │ __complex__ signed char _6; 11 │ _Bool _8; 12 │ signed char _9; 13 │ signed char _10; 14 │ signed char _11; 15 │ 16 │ ;; basic block 2, loop depth 0 17 │ ;;pred: ENTRY 18 │ _6 = .ADD_OVERFLOW (x_4(D), y_5(D)); 19 │ _2 = IMAGPART_EXPR <_6>; 20 │ if (_2 != 0) 21 │ goto ; [50.00%] 22 │ else 23 │ goto ; [50.00%] 24 │ ;;succ: 4 25 │ ;;3 26 │ 27 │ ;; basic block 3, loop depth 0 28 │ ;;pred: 2 29 │ _1 = REALPART_EXPR <_6>; 30 │ goto ; [100.00%] 31 │ ;;succ: 5 32 │ 33 │ ;; basic block 4, loop depth 0 34 │ ;;pred: 2 35 │ _8 = x_4(D) < 0; 36 │ _9 = (signed char) _8; 37 │ _10 = -_9; 38 │ _11 = _10 ^ 127; 39 │ ;;succ: 5 40 │ 41 │ ;; basic block 5, loop depth 0 42 │ ;;pred: 3 43 │ ;;4 44 │ # _3 = PHI <_1(3), _11(4)> 45 │ return _3; 46 │ ;;succ: EXIT 47 │ 48 │ } After this patch: 4 │ __attribute__((noinline)) 5 │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y) 6 │ { 7 │ int8_t _3; 8 │ 9 │ ;; basic block 2, loop depth 0 10 │ ;;pred: ENTRY 11 │ _3 = .SAT_ADD (x_4(D), y_5(D)); [tail call] 12 │ return _3; 13 │ ;;succ: EXIT 14 │ 15 │ } The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add the form 3 of signed .SAT_ADD matching. Signed-off-by: Pan Li --- gcc/match.pd | 10 ++ 1 file changed, 10 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index 4cef965c9c7..167b1b106dd 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3237,6 +3237,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) @2) (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type +/* Signed saturation add, case 4: + Z = .ADD_OVERFLOW (X, Y) + SAT_S_ADD = IMAGPART_EXPR (Z) != 0 ? (-(T)(X < 0) ^ MAX) : sum; */ +(match (signed_integer_sat_add @0 @1) + (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop) + (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value) + (realpart @2)) + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) + && types_match (type, @0, @1 + /* Unsigned saturation sub, case 1 (branch with gt): SAT_U_SUB = X > Y ? X - Y : 0 */ (match (unsigned_integer_sat_sub @0 @1) -- 2.43.0
[PATCH v4 2/4] Genmatch: Refine the gen_phi_on_cond by match_cond_with_binary_phi
From: Pan Li This patch would like to leverage the match_cond_with_binary_phi to match the phi on cond, and get the true/false arg if matched. This helps a lot to simplify the implementation of gen_phi_on_cond. Before this patch: basic_block _b1 = gimple_bb (_a1); if (gimple_phi_num_args (_a1) == 2) { basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src; basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src; basic_block _db_1 = safe_dyn_cast (*gsi_last_bb (_pb_0_1)) ? _pb_0_1 : _pb_1_1; basic_block _other_db_1 = safe_dyn_cast (*gsi_last_bb (_pb_0_1)) ? _pb_1_1 : _pb_0_1; gcond *_ct_1 = safe_dyn_cast (*gsi_last_bb (_db_1)); if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1 && EDGE_COUNT (_other_db_1->succs) == 1 && EDGE_PRED (_other_db_1, 0)->src == _db_1) { tree _cond_lhs_1 = gimple_cond_lhs (_ct_1); tree _cond_rhs_1 = gimple_cond_rhs (_ct_1); tree _p0 = build2 (gimple_cond_code (_ct_1), boolean_type_node, _cond_lhs_1, _cond_rhs_1); bool _arg_0_is_true_1 = gimple_phi_arg_edge (_a1, 0)->flags & EDGE_TRUE_VALUE; tree _p1 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 0 : 1); tree _p2 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 1 : 0); ... After this patch: basic_block _b1 = gimple_bb (_a1); tree _p1, _p2; gcond *_cond_1 = match_cond_with_binary_phi (_a1, &_p1, &_p2); if (_cond_1 && _p1 && _p2) { tree _cond_lhs_1 = gimple_cond_lhs (_cond_1); tree _cond_rhs_1 = gimple_cond_rhs (_cond_1); tree _p0 = build2 (gimple_cond_code (_cond_1), boolean_type_node, _cond_lhs_1, _cond_rhs_1); ... The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * genmatch.cc (dt_operand::gen_phi_on_cond): Leverage the match_cond_with_binary_phi API to get cond gimple, true and false TREE arg. Signed-off-by: Pan Li --- gcc/genmatch.cc | 67 +++-- 1 file changed, 15 insertions(+), 52 deletions(-) diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc index a56bd90cb2c..e3d2ecc6266 100644 --- a/gcc/genmatch.cc +++ b/gcc/genmatch.cc @@ -3516,79 +3516,42 @@ dt_operand::gen (FILE *f, int indent, bool gimple, int depth) void dt_operand::gen_phi_on_cond (FILE *f, int indent, int depth) { - fprintf_indent (f, indent, -"basic_block _b%d = gimple_bb (_a%d);\n", depth, depth); - - fprintf_indent (f, indent, "if (gimple_phi_num_args (_a%d) == 2)\n", depth); + char opname_0[20]; + char opname_1[20]; + char opname_2[20]; - indent += 2; - fprintf_indent (f, indent, "{\n"); - indent += 2; + gen_opname (opname_0, 0); + gen_opname (opname_1, 1); + gen_opname (opname_2, 2); fprintf_indent (f, indent, -"basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth); - fprintf_indent (f, indent, -"basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth); - fprintf_indent (f, indent, -"basic_block _db_%d = safe_dyn_cast (*gsi_last_bb (_pb_0_%d)) ? " -"_pb_0_%d : _pb_1_%d;\n", depth, depth, depth, depth); +"basic_block _b%d = gimple_bb (_a%d);\n", depth, depth); + fprintf_indent (f, indent, "tree %s, %s;\n", opname_1, opname_2); fprintf_indent (f, indent, -"basic_block _other_db_%d = safe_dyn_cast " -"(*gsi_last_bb (_pb_0_%d)) ? _pb_1_%d : _pb_0_%d;\n", -depth, depth, depth, depth); +"gcond *_cond_%d = match_cond_with_binary_phi (_a%d, &%s, &%s);\n", +depth, depth, opname_1, opname_2); - fprintf_indent (f, indent, -"gcond *_ct_%d = safe_dyn_cast (*gsi_last_bb (_db_%d));\n", -depth, depth); - fprintf_indent (f, indent, "if (_ct_%d" -" && EDGE_COUNT (_other_db_%d->preds) == 1\n", depth, depth); - fprintf_indent (f, indent, -" && EDGE_COUNT (_other_db_%d->succs) == 1\n", depth); - fprintf_indent (f, indent, -" && EDGE_PRED (_other_db_%d, 0)->src == _db_%d)\n", depth, depth); + fprintf_indent (f, indent, "if (_cond_%d && %s && %s)\n", +depth, opname_1, opname_2); indent += 2; fprintf_indent (f, indent, "{\n"); indent += 2; fprintf_indent (f, indent, -"tree _cond_lhs_%d = gimple_cond_lhs (_ct_%d);\n", depth, depth); +"tree _cond_lhs_%d = gimple_cond_lhs (_cond_%d);\n", depth, depth); fprintf_indent (f, indent, -"tree _cond_rhs_%d = gimple_cond_rhs (_ct_%d);\n", depth, depth); - - char opname_0[20]; - char opname_1[20]; - char opname_2[20]; - gen_opname (opname_0, 0); - +"tree _cond_rhs_%d = gimple_cond_rhs (_cond_%d);\n", depth, depth); fprintf_indent (f, indent, -"tree %s = build2 (gimple_cond_code (_ct_%d), " +"tree %s = build2 (gimple_cond_code (_cond_%d), " "boolean_type_node, _cond_lhs_%d, _cond_rhs_%d);\n", opname_0, depth, depth, depth); - fprintf_indent (f, indent, -"bool _arg_0_is_true_%d = gimple_phi_arg_edge (_a%d, 0)->flags" -" & EDGE_TRUE_VALUE;\n", depth, depth); - - ge
[PATCH v4 1/4] Match: Add interface match_cond_with_binary_phi for true/false arg
From: Pan Li When matching the cond with 2 args phi node, we need to figure out which arg of phi node comes from the true edge of cond block, as well as the false edge. This patch would like to add interface to perform the action and return the true and false arg in TREE type. There will be some additional handling if one of the arg is INTEGER_CST. Because the INTEGER_CST args may have no source block, thus its' edge source points to the condition block. See below example in line 31, the 255 INTEGER_CST has block 2 as source. Thus, we need to find the non-INTEGER_CST (aka _1) to tell which one is the true/false edge. For example, the _1(3) takes block 3 as source, which is the dest of false edge of the condition block. 4 │ __attribute__((noinline)) 5 │ uint8_t sat_u_add_imm_type_check_uint8_t_fmt_2 (uint8_t x) 6 │ { 7 │ unsigned char _1; 8 │ unsigned char _2; 9 │ uint8_t _3; 10 │ __complex__ unsigned char _5; 11 │ 12 │ ;; basic block 2, loop depth 0 13 │ ;;pred: ENTRY 14 │ _5 = .ADD_OVERFLOW (x_4(D), 9); 15 │ _2 = IMAGPART_EXPR <_5>; 16 │ if (_2 != 0) 17 │ goto ; [35.00%] 18 │ else 19 │ goto ; [65.00%] 20 │ ;;succ: 3 21 │ ;;4 22 │ 23 │ ;; basic block 3, loop depth 0 24 │ ;;pred: 2 25 │ _1 = REALPART_EXPR <_5>; 26 │ ;;succ: 4 27 │ 28 │ ;; basic block 4, loop depth 0 29 │ ;;pred: 2 30 │ ;;3 31 │ # _3 = PHI <255(2), _1(3)> 32 │ return _3; 33 │ ;;succ: EXIT 34 │ 35 │ } The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * gimple-match-head.cc (match_cond_with_binary_phi): Add new func impl to match binary phi for true and false arg. Signed-off-by: Pan Li --- gcc/gimple-match-head.cc | 118 +++ 1 file changed, 118 insertions(+) diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc index 924d3f1e710..6e7a3a0d62e 100644 --- a/gcc/gimple-match-head.cc +++ b/gcc/gimple-match-head.cc @@ -375,3 +375,121 @@ gimple_bitwise_inverted_equal_p (tree expr1, tree expr2, bool &wascmp, tree (*va return true; return false; } + +/* + * Return the relevant gcond * of the given phi, as well as the true + * and false TREE args of the phi. Or return NULL. + * + * If matched the gcond *, the output argument TREE true_arg and false_arg + * will be updated to the relevant args of phi. + * + * If failed to match, NULL gcond * will be returned, as well as the output + * arguments will be set to NULL_TREE. + */ + +static inline gcond * +match_cond_with_binary_phi (gphi *phi, tree *true_arg, tree *false_arg) +{ + *true_arg = *false_arg = NULL_TREE; + + if (gimple_phi_num_args (phi) != 2 + || EDGE_COUNT (gimple_bb (phi)->preds) != 2) +return NULL; + + basic_block pred_0 = EDGE_PRED (gimple_bb (phi), 0)->src; + basic_block pred_1 = EDGE_PRED (gimple_bb (phi), 1)->src; + basic_block cond_block = NULL; + + if ((EDGE_COUNT (pred_0->succs) == 2 && EDGE_COUNT (pred_1->succs) == 1) + || (EDGE_COUNT (pred_0->succs) == 1 && EDGE_COUNT (pred_1->succs) == 2)) +{ + /* For below control flow graph: + *| + *v + * +--+ + * | b0: | + * | def | +-+ + * | ... | | b1: | + * | cond |-->| def | + * +--+ | ... | + *| +-+ + *| | + *v | + * +-+ | + * | b2: | | + * | def |<--+ + * +-+ + */ + basic_block b0 = EDGE_COUNT (pred_0->succs) == 2 ? pred_0 : pred_1; + basic_block b1 = EDGE_COUNT (pred_0->succs) == 1 ? pred_0 : pred_1; + + if (EDGE_COUNT (b1->preds) == 1 && EDGE_PRED (b1, 0)->src == b0) + cond_block = b0; +} + + if (EDGE_COUNT (pred_0->succs) == 1 && EDGE_COUNT (pred_0->preds) == 1 + && EDGE_COUNT (pred_1->succs) == 1 && EDGE_COUNT (pred_1->preds) == 1) +{ + /* For below control flow graph: + *| + *v + * +--+ + * | b0: | + * | ... | +-+ + * | cond |-->| b2: | + * +--+ | ... | + *| +-+ + *| | + *v | + * +-+ | + * | b1: | | + * | ... | | + * +-+ | + *| | + *| | + *v | + * +-+ | + * | b3: |<--+ + * | ... | + * +-+ + */ + basic_block b0 = EDGE_PRED (pred_0, 0)->src; + + if (EDGE_COUNT (b0->succs) == 2 && EDGE_PRED (pred_1, 0)->src == b0) + cond_block = b0; +} +
[PATCH v1] RISC-V: Implement SAT_ADD for signed integer vector
From: Pan Li This patch would like to implement the ssadd for vector integer. Aka form 1 of ssadd vector. Form 1: #define DEF_VEC_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \ void __attribute__((noinline)) \ vec_sat_s_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned limit) \ {\ unsigned i;\ for (i = 0; i < limit; i++)\ {\ T x = op_1[i]; \ T y = op_2[i]; \ T sum = (UT)x + (UT)y; \ out[i] = (x ^ y) < 0 \ ? sum\ : (sum ^ x) >= 0 \ ? sum \ : x < 0 ? MIN : MAX; \ }\ } DEF_VEC_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX) Before this patch: vec_sat_s_add_int64_t_fmt_1: ... vsetvli t1,zero,e64,m1,ta,mu vadd.vv v3,v1,v2 vxor.vv v0,v1,v3 vmslt.vi v0,v0,0 vxor.vv v2,v1,v2 vmsge.vi v2,v2,0 vmand.mm v0,v0,v2 vsra.vx v1,v1,t3 vxor.vv v3,v1,v4,v0.t ... After this patch: vec_sat_s_add_int64_t_fmt_1: ... vsetvli a6,zero,e64,m1,ta,ma vsadd.vv v1,v1,v2 ... The below test suites are passed for this patch. * The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/autovec.md (ssadd3): Add new pattern for signed integer vector SAT_ADD. * config/riscv/riscv-protos.h (expand_vec_ssadd): Add new func decl for vector ssadd expanding. * config/riscv/riscv-v.cc (expand_vec_ssadd): Add new func impl to expand vector ssadd pattern. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_data.h: Add test data for vector ssadd. * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-1.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-2.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-3.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-4.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-1.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-2.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-3.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-4.c: New test. Signed-off-by: Pan Li --- gcc/config/riscv/autovec.md | 11 + gcc/config/riscv/riscv-protos.h | 1 + gcc/config/riscv/riscv-v.cc | 9 + .../riscv/rvv/autovec/binop/vec_sat_data.h| 264 ++ .../riscv/rvv/autovec/binop/vec_sat_s_add-1.c | 18 ++ .../riscv/rvv/autovec/binop/vec_sat_s_add-2.c | 18 ++ .../riscv/rvv/autovec/binop/vec_sat_s_add-3.c | 18 ++ .../riscv/rvv/autovec/binop/vec_sat_s_add-4.c | 18 ++ .../rvv/autovec/binop/vec_sat_s_add-run-1.c | 17 ++ .../rvv/autovec/binop/vec_sat_s_add-run-2.c | 17 ++ .../rvv/autovec/binop/vec_sat_s_add-run-3.c | 17 ++ .../rvv/autovec/binop/vec_sat_s_add-run-4.c | 17 ++ .../riscv/rvv/autovec/vec_sat_arith.h | 25 ++ 13 files changed, 450 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-4.c diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index a4e108268b4..a53c44659f0 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2684,6 +2684,17 @@ (define_expand "usadd3" } ) +(define_expand "ssadd3" + [(match_operand:V_VLSI 0 "register_operand") + (match_operand:V_VLSI 1 "register_operand") + (match_operand:V_VLSI 2 "register_operand")] + "TARGET_VECTOR" + { +riscv_vector::expand_vec_ssadd (operands[0], operands[1], op
[PATCH v3 5/5] RISC-V: Fix vector SAT_ADD dump check due to middle-end change
From: Pan Li This patch would like fix the dump check times of vector SAT_ADD. The middle-end change makes the match times from 2 to 4 times. The below test suites are passed for this patch. * The rv64gcv fully regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Adjust the dump check times from 2 to 4. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto. Signed-off-by: Pan Li --- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c| 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c| 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c| 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c| 2 +- 16 files changed, 16 insertions(+), 16 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c index c525ba97c52..47dd5012cc6 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c @@ -15,4 +15,4 @@ */ DEF_VEC_SAT_U_ADD_FMT_6(uint8_t) -/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c index 41372d08e52..df8d5a8d275 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c @@ -15,4 +15,4 @@ */ DEF_VEC_SAT_U_ADD_FMT_6(uint16_t) -/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c index dddebb54426..f286bd10e4b 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c @@ -15,4 +15,4 @@ */ DEF_VEC_SAT_U_ADD_FMT_6(uint32_t) -/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c index ad5162d10a0..307ff36cc35 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c @@ -15,4 +15,4 @@ */ DEF_VEC_SAT_U_ADD_FMT_6(uint64_t) -/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c index 39c20b3cea6..3218962724c 10064
[PATCH v3 2/5] Match: Add interface match_cond_with_binary_phi for true/false arg
From: Pan Li When matching the cond with 2 args phi node, we need to figure out which arg of phi node comes from the true edge of cond block, as well as the false edge. This patch would like to add interface to perform the action and return the true and false arg in TREE type. There will be some additional handling if one of the arg is INTEGER_CST. Because the INTEGER_CST args may have no source block, thus its' edge source points to the condition block. See below example in line 31, the 255 INTEGER_CST has block 2 as source. Thus, we need to find the non-INTEGER_CST (aka _1) to tell which one is the true/false edge. For example, the _1(3) takes block 3 as source, which is the dest of false edge of the condition block. 4 │ __attribute__((noinline)) 5 │ uint8_t sat_u_add_imm_type_check_uint8_t_fmt_2 (uint8_t x) 6 │ { 7 │ unsigned char _1; 8 │ unsigned char _2; 9 │ uint8_t _3; 10 │ __complex__ unsigned char _5; 11 │ 12 │ ;; basic block 2, loop depth 0 13 │ ;;pred: ENTRY 14 │ _5 = .ADD_OVERFLOW (x_4(D), 9); 15 │ _2 = IMAGPART_EXPR <_5>; 16 │ if (_2 != 0) 17 │ goto ; [35.00%] 18 │ else 19 │ goto ; [65.00%] 20 │ ;;succ: 3 21 │ ;;4 22 │ 23 │ ;; basic block 3, loop depth 0 24 │ ;;pred: 2 25 │ _1 = REALPART_EXPR <_5>; 26 │ ;;succ: 4 27 │ 28 │ ;; basic block 4, loop depth 0 29 │ ;;pred: 2 30 │ ;;3 31 │ # _3 = PHI <255(2), _1(3)> 32 │ return _3; 33 │ ;;succ: EXIT 34 │ 35 │ } The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * gimple-match-head.cc (match_cond_with_binary_phi): Add new func impl to match binary phi for true and false arg. Signed-off-by: Pan Li --- gcc/gimple-match-head.cc | 60 1 file changed, 60 insertions(+) diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc index c51728ae742..64f4f28cc72 100644 --- a/gcc/gimple-match-head.cc +++ b/gcc/gimple-match-head.cc @@ -490,3 +490,63 @@ match_control_flow_graph_case_1 (basic_block b3, basic_block *b_out) *b_out = b0; return true; } + +/* + * Return the relevant gcond * of the given phi, as well as the true + * and false TREE args of the phi. Or return NULL. + * + * If matched the gcond *, the output argument TREE true_arg and false_arg + * will be updated to the relevant args of phi. + * + * If failed to match, NULL gcond * will be returned, as well as the output + * arguments will be set to NULL_TREE. + */ + +static inline gcond * +match_cond_with_binary_phi (gphi *phi, tree *true_arg, tree *false_arg) +{ + basic_block cond_block; + *true_arg = *false_arg = NULL_TREE; + + if (gimple_phi_num_args (phi) != 2) +return NULL; + + if (!match_control_flow_graph_case_0 (gimple_bb (phi), &cond_block) + && !match_control_flow_graph_case_1 (gimple_bb (phi), &cond_block)) +return NULL; + + gcond *cond = safe_dyn_cast (*gsi_last_bb (cond_block)); + + if (!cond || EDGE_COUNT (cond_block->succs) != 2) +return NULL; + + tree t0 = gimple_phi_arg_def (phi, 0); + tree t1 = gimple_phi_arg_def (phi, 1); + edge e0 = gimple_phi_arg_edge (phi, 0); + edge e1 = gimple_phi_arg_edge (phi, 1); + + if (TREE_CODE (t0) == INTEGER_CST && TREE_CODE (t1) == INTEGER_CST) +return NULL; + + bool arg_0_cst_p = TREE_CODE (t0) == INTEGER_CST; + edge arg_edge = arg_0_cst_p ? e1 : e0; + tree arg = arg_0_cst_p ? t1 : t0; + tree other_arg = arg_0_cst_p ? t0 : t1; + + edge cond_e0 = EDGE_SUCC (cond_block, 0); + edge cond_e1 = EDGE_SUCC (cond_block, 1); + edge matched_edge = arg_edge->src == cond_e0->dest ? cond_e0 : cond_e1; + + if (matched_edge->flags & EDGE_TRUE_VALUE) +{ + *true_arg = arg; + *false_arg = other_arg; +} + else +{ + *false_arg = arg; + *true_arg = other_arg; +} + + return cond; +} -- 2.43.0
[PATCH v3 4/5] Match: Support form 3 for scalar signed integer .SAT_ADD
From: Pan Li This patch would like to support the form 3 of the scalar signed integer .SAT_ADD. Aka below example: Form 3: #define DEF_SAT_S_ADD_FMT_3(T, UT, MIN, MAX) \ T __attribute__((noinline))\ sat_s_add_##T##_fmt_3 (T x, T y) \ { \ T sum; \ bool overflow = __builtin_add_overflow (x, y, &sum); \ return overflow ? x < 0 ? MIN : MAX : sum; \ } DEF_SAT_S_ADD_FMT_3(int8_t, uint8_t, INT8_MIN, INT8_MAX) We can tell the difference before and after this patch if backend implemented the ssadd3 pattern similar as below. Before this patch: 4 │ __attribute__((noinline)) 5 │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y) 6 │ { 7 │ signed char _1; 8 │ signed char _2; 9 │ int8_t _3; 10 │ __complex__ signed char _6; 11 │ _Bool _8; 12 │ signed char _9; 13 │ signed char _10; 14 │ signed char _11; 15 │ 16 │ ;; basic block 2, loop depth 0 17 │ ;;pred: ENTRY 18 │ _6 = .ADD_OVERFLOW (x_4(D), y_5(D)); 19 │ _2 = IMAGPART_EXPR <_6>; 20 │ if (_2 != 0) 21 │ goto ; [50.00%] 22 │ else 23 │ goto ; [50.00%] 24 │ ;;succ: 4 25 │ ;;3 26 │ 27 │ ;; basic block 3, loop depth 0 28 │ ;;pred: 2 29 │ _1 = REALPART_EXPR <_6>; 30 │ goto ; [100.00%] 31 │ ;;succ: 5 32 │ 33 │ ;; basic block 4, loop depth 0 34 │ ;;pred: 2 35 │ _8 = x_4(D) < 0; 36 │ _9 = (signed char) _8; 37 │ _10 = -_9; 38 │ _11 = _10 ^ 127; 39 │ ;;succ: 5 40 │ 41 │ ;; basic block 5, loop depth 0 42 │ ;;pred: 3 43 │ ;;4 44 │ # _3 = PHI <_1(3), _11(4)> 45 │ return _3; 46 │ ;;succ: EXIT 47 │ 48 │ } After this patch: 4 │ __attribute__((noinline)) 5 │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y) 6 │ { 7 │ int8_t _3; 8 │ 9 │ ;; basic block 2, loop depth 0 10 │ ;;pred: ENTRY 11 │ _3 = .SAT_ADD (x_4(D), y_5(D)); [tail call] 12 │ return _3; 13 │ ;;succ: EXIT 14 │ 15 │ } The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add the form 3 of signed .SAT_ADD matching. Signed-off-by: Pan Li --- gcc/match.pd | 10 ++ 1 file changed, 10 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index 4cef965c9c7..167b1b106dd 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3237,6 +3237,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) @2) (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type +/* Signed saturation add, case 4: + Z = .ADD_OVERFLOW (X, Y) + SAT_S_ADD = IMAGPART_EXPR (Z) != 0 ? (-(T)(X < 0) ^ MAX) : sum; */ +(match (signed_integer_sat_add @0 @1) + (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop) + (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value) + (realpart @2)) + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) + && types_match (type, @0, @1 + /* Unsigned saturation sub, case 1 (branch with gt): SAT_U_SUB = X > Y ? X - Y : 0 */ (match (unsigned_integer_sat_sub @0 @1) -- 2.43.0
[PATCH v3 3/5] Genmatch: Refine the gen_phi_on_cond by match_cond_with_binary_phi
From: Pan Li This patch would like to leverage the match_cond_with_binary_phi to match the phi on cond, and get the true/false arg if matched. This helps a lot to simplify the implementation of gen_phi_on_cond. Before this patch: basic_block _b1 = gimple_bb (_a1); if (gimple_phi_num_args (_a1) == 2) { basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src; basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src; basic_block _db_1 = safe_dyn_cast (*gsi_last_bb (_pb_0_1)) ? _pb_0_1 : _pb_1_1; basic_block _other_db_1 = safe_dyn_cast (*gsi_last_bb (_pb_0_1)) ? _pb_1_1 : _pb_0_1; gcond *_ct_1 = safe_dyn_cast (*gsi_last_bb (_db_1)); if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1 && EDGE_COUNT (_other_db_1->succs) == 1 && EDGE_PRED (_other_db_1, 0)->src == _db_1) { tree _cond_lhs_1 = gimple_cond_lhs (_ct_1); tree _cond_rhs_1 = gimple_cond_rhs (_ct_1); tree _p0 = build2 (gimple_cond_code (_ct_1), boolean_type_node, _cond_lhs_1, _cond_rhs_1); bool _arg_0_is_true_1 = gimple_phi_arg_edge (_a1, 0)->flags & EDGE_TRUE_VALUE; tree _p1 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 0 : 1); tree _p2 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 1 : 0); ... After this patch: basic_block _b1 = gimple_bb (_a1); tree _p1, _p2; gcond *_cond_1 = match_cond_with_binary_phi (_a1, &_p1, &_p2); if (_cond_1 && _p1 && _p2) { tree _cond_lhs_1 = gimple_cond_lhs (_cond_1); tree _cond_rhs_1 = gimple_cond_rhs (_cond_1); tree _p0 = build2 (gimple_cond_code (_cond_1), boolean_type_node, _cond_lhs_1, _cond_rhs_1); ... The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * genmatch.cc (dt_operand::gen_phi_on_cond): Leverage the match_cond_with_binary_phi API to get cond gimple, true and false TREE arg. Signed-off-by: Pan Li --- gcc/genmatch.cc | 67 +++-- 1 file changed, 15 insertions(+), 52 deletions(-) diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc index a56bd90cb2c..e3d2ecc6266 100644 --- a/gcc/genmatch.cc +++ b/gcc/genmatch.cc @@ -3516,79 +3516,42 @@ dt_operand::gen (FILE *f, int indent, bool gimple, int depth) void dt_operand::gen_phi_on_cond (FILE *f, int indent, int depth) { - fprintf_indent (f, indent, -"basic_block _b%d = gimple_bb (_a%d);\n", depth, depth); - - fprintf_indent (f, indent, "if (gimple_phi_num_args (_a%d) == 2)\n", depth); + char opname_0[20]; + char opname_1[20]; + char opname_2[20]; - indent += 2; - fprintf_indent (f, indent, "{\n"); - indent += 2; + gen_opname (opname_0, 0); + gen_opname (opname_1, 1); + gen_opname (opname_2, 2); fprintf_indent (f, indent, -"basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth); - fprintf_indent (f, indent, -"basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth); - fprintf_indent (f, indent, -"basic_block _db_%d = safe_dyn_cast (*gsi_last_bb (_pb_0_%d)) ? " -"_pb_0_%d : _pb_1_%d;\n", depth, depth, depth, depth); +"basic_block _b%d = gimple_bb (_a%d);\n", depth, depth); + fprintf_indent (f, indent, "tree %s, %s;\n", opname_1, opname_2); fprintf_indent (f, indent, -"basic_block _other_db_%d = safe_dyn_cast " -"(*gsi_last_bb (_pb_0_%d)) ? _pb_1_%d : _pb_0_%d;\n", -depth, depth, depth, depth); +"gcond *_cond_%d = match_cond_with_binary_phi (_a%d, &%s, &%s);\n", +depth, depth, opname_1, opname_2); - fprintf_indent (f, indent, -"gcond *_ct_%d = safe_dyn_cast (*gsi_last_bb (_db_%d));\n", -depth, depth); - fprintf_indent (f, indent, "if (_ct_%d" -" && EDGE_COUNT (_other_db_%d->preds) == 1\n", depth, depth); - fprintf_indent (f, indent, -" && EDGE_COUNT (_other_db_%d->succs) == 1\n", depth); - fprintf_indent (f, indent, -" && EDGE_PRED (_other_db_%d, 0)->src == _db_%d)\n", depth, depth); + fprintf_indent (f, indent, "if (_cond_%d && %s && %s)\n", +depth, opname_1, opname_2); indent += 2; fprintf_indent (f, indent, "{\n"); indent += 2; fprintf_indent (f, indent, -"tree _cond_lhs_%d = gimple_cond_lhs (_ct_%d);\n", depth, depth); +"tree _cond_lhs_%d = gimple_cond_lhs (_cond_%d);\n", depth, depth); fprintf_indent (f, indent, -"tree _cond_rhs_%d = gimple_cond_rhs (_ct_%d);\n", depth, depth); - - char opname_0[20]; - char opname_1[20]; - char opname_2[20]; - gen_opname (opname_0, 0); - +"tree _cond_rhs_%d = gimple_cond_rhs (_cond_%d);\n", depth, depth); fprintf_indent (f, indent, -"tree %s = build2 (gimple_cond_code (_ct_%d), " +"tree %s = build2 (gimple_cond_code (_cond_%d), " "boolean_type_node, _cond_lhs_%d, _cond_rhs_%d);\n", opname_0, depth, depth, depth); - fprintf_indent (f, indent, -"bool _arg_0_is_true_%d = gimple_phi_arg_edge (_a%d, 0)->flags" -" & EDGE_TRUE_VALUE;\n", depth, depth); - - ge
[PATCH v3 1/5] Genmatch: Add control flow graph match for case 0 and case 1
From: Pan Li The gen_phi_on_cond can only support below control flow for cond from day 1. Aka: +--+ | def | | ... | +-+ | cond |-->| def | +--+ | ... | | +-+ | | v | +-+ | | PHI |<--+ +-+ Unfortunately, there will be more scenarios of control flow on PHI. For example as below: T __attribute__((noinline))\ sat_s_add_##T##_fmt_3 (T x, T y) \ { \ T sum; \ bool overflow = __builtin_add_overflow (x, y, &sum); \ return overflow ? x < 0 ? MIN : MAX : sum; \ } DEF_SAT_S_ADD_FMT_3(int8_t, uint8_t, INT8_MIN, INT8_MAX) With expanded RTL like below. 3 │ 4 │ __attribute__((noinline)) 5 │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y) 6 │ { 7 │ signed char _1; 8 │ signed char _2; 9 │ int8_t _3; 10 │ __complex__ signed char _6; 11 │ _Bool _8; 12 │ signed char _9; 13 │ signed char _10; 14 │ signed char _11; 15 │ 16 │ ;; basic block 2, loop depth 0 17 │ ;;pred: ENTRY 18 │ _6 = .ADD_OVERFLOW (x_4(D), y_5(D)); 19 │ _2 = IMAGPART_EXPR <_6>; 20 │ if (_2 != 0) 21 │ goto ; [50.00%] 22 │ else 23 │ goto ; [50.00%] 24 │ ;;succ: 4 25 │ ;;3 26 │ 27 │ ;; basic block 3, loop depth 0 28 │ ;;pred: 2 29 │ _1 = REALPART_EXPR <_6>; 30 │ goto ; [100.00%] 31 │ ;;succ: 5 32 │ 33 │ ;; basic block 4, loop depth 0 34 │ ;;pred: 2 35 │ _8 = x_4(D) < 0; 36 │ _9 = (signed char) _8; 37 │ _10 = -_9; 38 │ _11 = _10 ^ 127; 39 │ ;;succ: 5 40 │ 41 │ ;; basic block 5, loop depth 0 42 │ ;;pred: 3 43 │ ;;4 44 │ # _3 = PHI <_1(3), _11(4)> 45 │ return _3; 46 │ ;;succ: EXIT 47 │ 48 │ } The above code will have below control flow which is not supported by the gen_phi_on_cond. +--+ | def | | ... | +-+ | cond |-->| def | +--+ | ... | | +-+ | | v | +-+ | | def | | | ... | | +-+ | | | | | v | +-+ | | PHI |<--+ +-+ This patch would like to add support above control flow matching for the gen_phi_on_cond. The below testsuites are passed for this patch: * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * gimple-match-head.cc (match_control_flow_graph_case_0): Add new func impl to match case 0 of cfg. (match_control_flow_graph_case_1): Ditto but for case 1. Signed-off-by: Pan Li --- gcc/gimple-match-head.cc | 115 +++ 1 file changed, 115 insertions(+) diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc index 924d3f1e710..c51728ae742 100644 --- a/gcc/gimple-match-head.cc +++ b/gcc/gimple-match-head.cc @@ -375,3 +375,118 @@ gimple_bitwise_inverted_equal_p (tree expr1, tree expr2, bool &wascmp, tree (*va return true; return false; } + +/* + * Return TRUE if the cfg matches the below layout by the given b2 in + * the first argument. Or return FALSE. + * + * If return TRUE, the output argument b_out will be updated to the b0 + * block as below example. + * + * If return FALSE, the output argument b_out will be NULL_BLOCK. + * + *| + *| + *v + * +--+ + * | b0: | + * | def | +-+ + * | ... | | b1: | + * | cond |-->| def | + * +--+ | ... | + *| +-+ + *| | + *v | + * +-+ | + * | b2: | | + * | def |<--+ + * +-+ + */ +static inline bool +match_control_flow_graph_case_0 (basic_block b2, basic_block *b_out) +{ + *b_out = NULL; + + if (EDGE_COUNT (b2->preds) != 2) +return false; + + basic_block pred_0 = EDGE_PRED (b2, 0)->src; + basic_block pred_1 = EDGE_PRED (b2, 1)->src; + + if (pred_0 == NULL || pred_1 == NULL) +return false; + + if (!(EDGE_COUNT (pred_0->succs) == 2 && EDGE_COUNT (pred_1->succs) == 1) + && !(EDGE_COUNT (pred_0->succs) == 1 && EDGE_COUNT (pred_1->succs) == 2)) +return false; + + basic_block b0 = EDGE_COUNT (pred_0->succs) == 2 ? pred_0 : pred_1; + basic_block b1 = EDGE_COUNT (pred_0->succs) == 1 ? pred_0 : pred_1; + + if (EDGE_COUNT (b1->preds) != 1 || EDGE_PRED (b1, 0)->src != b0) +return false; + + *b_out = b0; + return true; +} + +/* + * Return TRUE if the cfg matches the below layout by the given b3 in + * the first argument. Or return FALSE. + * + * If return TRUE, the output argument b_out will be updated to the b0 +
[PATCH v1] RISC-V: Fix asm check for Vector SAT_* due to middle-end change
From: Pan Li The middle-end change makes the effect on the layout of the assembly for vector SAT_*. This patch would like to fix it and make it robust. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: Adjust asm check and make it robust. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-12.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-9.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-1.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-10.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-11.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-12.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-13.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-16.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-17.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-18.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-19.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-20.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-21.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-23.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-24.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-25.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-26.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-27.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-28.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-29.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-30.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-31.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-32.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-33.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-34.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-35.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-36.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-37.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-38.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-39.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-4.c: Ditto. * gcc.target/riscv/rvv/autovec/b
[PATCH v2 2/2] Match: Support form 3 for scalar signed integer .SAT_ADD
From: Pan Li This patch would like to support the form 3 of the scalar signed integer .SAT_ADD. Aka below example: Form 3: #define DEF_SAT_S_ADD_FMT_3(T, UT, MIN, MAX) \ T __attribute__((noinline))\ sat_s_add_##T##_fmt_3 (T x, T y) \ { \ T sum; \ bool overflow = __builtin_add_overflow (x, y, &sum); \ return overflow ? x < 0 ? MIN : MAX : sum; \ } DEF_SAT_S_ADD_FMT_3(int8_t, uint8_t, INT8_MIN, INT8_MAX) We can tell the difference before and after this patch if backend implemented the ssadd3 pattern similar as below. Before this patch: 4 │ __attribute__((noinline)) 5 │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y) 6 │ { 7 │ signed char _1; 8 │ signed char _2; 9 │ int8_t _3; 10 │ __complex__ signed char _6; 11 │ _Bool _8; 12 │ signed char _9; 13 │ signed char _10; 14 │ signed char _11; 15 │ 16 │ ;; basic block 2, loop depth 0 17 │ ;;pred: ENTRY 18 │ _6 = .ADD_OVERFLOW (x_4(D), y_5(D)); 19 │ _2 = IMAGPART_EXPR <_6>; 20 │ if (_2 != 0) 21 │ goto ; [50.00%] 22 │ else 23 │ goto ; [50.00%] 24 │ ;;succ: 4 25 │ ;;3 26 │ 27 │ ;; basic block 3, loop depth 0 28 │ ;;pred: 2 29 │ _1 = REALPART_EXPR <_6>; 30 │ goto ; [100.00%] 31 │ ;;succ: 5 32 │ 33 │ ;; basic block 4, loop depth 0 34 │ ;;pred: 2 35 │ _8 = x_4(D) < 0; 36 │ _9 = (signed char) _8; 37 │ _10 = -_9; 38 │ _11 = _10 ^ 127; 39 │ ;;succ: 5 40 │ 41 │ ;; basic block 5, loop depth 0 42 │ ;;pred: 3 43 │ ;;4 44 │ # _3 = PHI <_1(3), _11(4)> 45 │ return _3; 46 │ ;;succ: EXIT 47 │ 48 │ } After this patch: 4 │ __attribute__((noinline)) 5 │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y) 6 │ { 7 │ int8_t _3; 8 │ 9 │ ;; basic block 2, loop depth 0 10 │ ;;pred: ENTRY 11 │ _3 = .SAT_ADD (x_4(D), y_5(D)); [tail call] 12 │ return _3; 13 │ ;;succ: EXIT 14 │ 15 │ } The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add the form 3 of signed .SAT_ADD matching. Signed-off-by: Pan Li --- gcc/match.pd | 10 ++ 1 file changed, 10 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index 621306213e4..1d478d42ed5 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3207,6 +3207,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) && types_match (type, @0, @1 +/* Signed saturation add, case 3: + Z = .ADD_OVERFLOW (X, Y) + SAT_S_ADD = IMAGPART_EXPR (Z) != 0 ? (-(T)(X < 0) ^ MAX) : sum; */ +(match (signed_integer_sat_add @0 @1) + (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop) + (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value) + (realpart @2)) + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) + && types_match (type, @0, @1 + /* Unsigned saturation sub, case 1 (branch with gt): SAT_U_SUB = X > Y ? X - Y : 0 */ (match (unsigned_integer_sat_sub @0 @1) -- 2.43.0
[PATCH v2 1/2] Genmatch: Support control flow graph case 1 for phi on condition
From: Pan Li The gen_phi_on_cond can only support below control flow for cond from day 1. Aka: +--+ | def | | ... | +-+ | cond |-->| def | +--+ | ... | | +-+ | | v | +-+ | | PHI |<--+ +-+ Unfortunately, there will be more scenarios of control flow on PHI. For example as below: T __attribute__((noinline))\ sat_s_add_##T##_fmt_3 (T x, T y) \ { \ T sum; \ bool overflow = __builtin_add_overflow (x, y, &sum); \ return overflow ? x < 0 ? MIN : MAX : sum; \ } DEF_SAT_S_ADD_FMT_3(int8_t, uint8_t, INT8_MIN, INT8_MAX) With expanded RTL like below. 3 │ 4 │ __attribute__((noinline)) 5 │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y) 6 │ { 7 │ signed char _1; 8 │ signed char _2; 9 │ int8_t _3; 10 │ __complex__ signed char _6; 11 │ _Bool _8; 12 │ signed char _9; 13 │ signed char _10; 14 │ signed char _11; 15 │ 16 │ ;; basic block 2, loop depth 0 17 │ ;;pred: ENTRY 18 │ _6 = .ADD_OVERFLOW (x_4(D), y_5(D)); 19 │ _2 = IMAGPART_EXPR <_6>; 20 │ if (_2 != 0) 21 │ goto ; [50.00%] 22 │ else 23 │ goto ; [50.00%] 24 │ ;;succ: 4 25 │ ;;3 26 │ 27 │ ;; basic block 3, loop depth 0 28 │ ;;pred: 2 29 │ _1 = REALPART_EXPR <_6>; 30 │ goto ; [100.00%] 31 │ ;;succ: 5 32 │ 33 │ ;; basic block 4, loop depth 0 34 │ ;;pred: 2 35 │ _8 = x_4(D) < 0; 36 │ _9 = (signed char) _8; 37 │ _10 = -_9; 38 │ _11 = _10 ^ 127; 39 │ ;;succ: 5 40 │ 41 │ ;; basic block 5, loop depth 0 42 │ ;;pred: 3 43 │ ;;4 44 │ # _3 = PHI <_1(3), _11(4)> 45 │ return _3; 46 │ ;;succ: EXIT 47 │ 48 │ } The above code will have below control flow which is not supported by the gen_phi_on_cond. +--+ | def | | ... | +-+ | cond |-->| def | +--+ | ... | | +-+ | | v | +-+ | | def | | | ... | | +-+ | | | | | v | +-+ | | PHI |<--+ +-+ This patch would like to add support above control flow for the gen_phi_on_cond. The generated match code looks like below. Before this patch: basic_block _b1 = gimple_bb (_a1); if (gimple_phi_num_args (_a1) == 2) { basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src; basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src; basic_block _db_1 = safe_dyn_cast (*gsi_last_bb (_pb_0_1)) ? _pb_0_1 : _pb_1_1; basic_block _other_db_1 = safe_dyn_cast (*gsi_last_bb (_pb_0_1)) ? _pb_1_1 : _pb_0_1; gcond *_ct_1 = safe_dyn_cast (*gsi_last_bb (_db_1)); if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1 && EDGE_COUNT (_other_db_1->succs) == 1 && EDGE_PRED (_other_db_1, 0)->src == _db_1) { ... After this patch: basic_block _b1 = gimple_bb (_a1); basic_block _b_cond_1; if (gimple_phi_num_args (_a1) == 2 && (control_flow_graph_case_0_match (_b1, &_b_cond_1) || control_flow_graph_case_1_match (_b1, &_b_cond_1))) { ... The below testsuites are passed for this patch: * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * genmatch.cc (dt_operand::gen_phi_on_cond): Add support control flow graph case 1 for gen phi on condition. * gimple-match-head.cc (control_flow_graph_case_0_match): Add new func impl to match case 0 of cfg. (control_flow_graph_case_1_match): Ditto but for case 1. Signed-off-by: Pan Li --- gcc/genmatch.cc | 37 + gcc/gimple-match-head.cc | 115 +++ 2 files changed, 130 insertions(+), 22 deletions(-) diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc index a56bd90cb2c..e0ec1c0e928 100644 --- a/gcc/genmatch.cc +++ b/gcc/genmatch.cc @@ -3518,43 +3518,36 @@ dt_operand::gen_phi_on_cond (FILE *f, int indent, int depth) { fprintf_indent (f, indent, "basic_block _b%d = gimple_bb (_a%d);\n", depth, depth); + fprintf_indent (f, indent, "basic_block _b_cond_%d;\n", depth); - fprintf_indent (f, indent, "if (gimple_phi_num_args (_a%d) == 2)\n", depth); + fprintf_indent (f, indent, "if (gimple_phi_num_args (_a%d) == 2\n", depth); - indent += 2; - fprintf_indent (f, indent, "{\n"); indent += 2; fprintf_indent (f, indent, -"basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth); - fprintf_indent (f, indent, -"basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth); - fprintf_indent (f
[PATCH v1] RISC-V: Fix SAT_* dump check failure due to middle-end change.
From: Pan Li Some middl-end change may effect on the times of .SAT_*. Thus, refine the dump check for SAT_*, from the scan-times to scan as we only care about the .SAT_* exist or not. And there will an other PATCH to perform similar refinement and this PATCH only fix the failed test cases. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: Refine the dump check from times to exist. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-1.c: Diito. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-10.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-13.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-5.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-6.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-9.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-10.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-11.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-12.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-13.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-16.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-17.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-18.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-19.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-20.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-21.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-23.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-24.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-33.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-34.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-35.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-36.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-37.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-38.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-39.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-40.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-9.c: Ditto. Signed-off-by: Pan Li --- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c | 2 +- .../riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-1.c | 2 +- .../riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-10.c| 2 +- .../riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-13.c| 2 +- .../riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-14.c| 2 +- .../riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-15.c| 2 +- .../riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-2.c | 2 +- .../riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-5.c | 2 +- .../riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-6.c | 2 +- .../riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-9.c | 2 +- .../gcc.tar
[PATCH v1 2/2] Match: Support form 3 for scalar signed integer .SAT_ADD
From: Pan Li This patch would like to support the form 3 of the scalar signed integer .SAT_ADD. Aka below example: Form 3: #define DEF_SAT_S_ADD_FMT_3(T, UT, MIN, MAX) \ T __attribute__((noinline))\ sat_s_add_##T##_fmt_3 (T x, T y) \ { \ T sum; \ bool overflow = __builtin_add_overflow (x, y, &sum); \ return overflow ? x < 0 ? MIN : MAX : sum; \ } DEF_SAT_S_ADD_FMT_3(int8_t, uint8_t, INT8_MIN, INT8_MAX) We can tell the difference before and after this patch if backend implemented the ssadd3 pattern similar as below. Before this patch: 4 │ __attribute__((noinline)) 5 │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y) 6 │ { 7 │ signed char _1; 8 │ signed char _2; 9 │ int8_t _3; 10 │ __complex__ signed char _6; 11 │ _Bool _8; 12 │ signed char _9; 13 │ signed char _10; 14 │ signed char _11; 15 │ 16 │ ;; basic block 2, loop depth 0 17 │ ;;pred: ENTRY 18 │ _6 = .ADD_OVERFLOW (x_4(D), y_5(D)); 19 │ _2 = IMAGPART_EXPR <_6>; 20 │ if (_2 != 0) 21 │ goto ; [50.00%] 22 │ else 23 │ goto ; [50.00%] 24 │ ;;succ: 4 25 │ ;;3 26 │ 27 │ ;; basic block 3, loop depth 0 28 │ ;;pred: 2 29 │ _1 = REALPART_EXPR <_6>; 30 │ goto ; [100.00%] 31 │ ;;succ: 5 32 │ 33 │ ;; basic block 4, loop depth 0 34 │ ;;pred: 2 35 │ _8 = x_4(D) < 0; 36 │ _9 = (signed char) _8; 37 │ _10 = -_9; 38 │ _11 = _10 ^ 127; 39 │ ;;succ: 5 40 │ 41 │ ;; basic block 5, loop depth 0 42 │ ;;pred: 3 43 │ ;;4 44 │ # _3 = PHI <_1(3), _11(4)> 45 │ return _3; 46 │ ;;succ: EXIT 47 │ 48 │ } After this patch: 4 │ __attribute__((noinline)) 5 │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y) 6 │ { 7 │ int8_t _3; 8 │ 9 │ ;; basic block 2, loop depth 0 10 │ ;;pred: ENTRY 11 │ _3 = .SAT_ADD (x_4(D), y_5(D)); [tail call] 12 │ return _3; 13 │ ;;succ: EXIT 14 │ 15 │ } The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add the form 3 of signed .SAT_ADD matching. Signed-off-by: Pan Li --- gcc/match.pd | 10 ++ 1 file changed, 10 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index 1372f2ba377..1218abcd01a 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3222,6 +3222,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) && types_match (type, @0, @1 +/* Signed saturation add, case 3: + Z = .ADD_OVERFLOW (X, Y) + SAT_S_ADD = IMAGPART_EXPR (Z) != 0 ? (-(T)(X < 0) ^ MAX) : sum; */ +(match (signed_integer_sat_add @0 @1) + (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop) + (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value) + (realpart @2)) + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) + && types_match (type, @0, @1 + /* Unsigned saturation sub, case 1 (branch with gt): SAT_U_SUB = X > Y ? X - Y : 0 */ (match (unsigned_integer_sat_sub @0 @1) -- 2.43.0
[PATCH v1 1/2] Genmatch: Support new flow for phi on condition
From: Pan Li The gen_phi_on_cond can only support below control flow for cond from day 1. Aka: +--+ | def | | ... | +-+ | cond |-->| def | +--+ | ... | | +-+ | | v | +-+ | | PHI |<--+ +-+ Unfortunately, there will be more scenarios of control flow on PHI. For example as below: T __attribute__((noinline))\ sat_s_add_##T##_fmt_3 (T x, T y) \ { \ T sum; \ bool overflow = __builtin_add_overflow (x, y, &sum); \ return overflow ? x < 0 ? MIN : MAX : sum; \ } DEF_SAT_S_ADD_FMT_3(int8_t, uint8_t, INT8_MIN, INT8_MAX) With expanded RTL like below. 3 │ 4 │ __attribute__((noinline)) 5 │ int8_t sat_s_add_int8_t_fmt_3 (int8_t x, int8_t y) 6 │ { 7 │ signed char _1; 8 │ signed char _2; 9 │ int8_t _3; 10 │ __complex__ signed char _6; 11 │ _Bool _8; 12 │ signed char _9; 13 │ signed char _10; 14 │ signed char _11; 15 │ 16 │ ;; basic block 2, loop depth 0 17 │ ;;pred: ENTRY 18 │ _6 = .ADD_OVERFLOW (x_4(D), y_5(D)); 19 │ _2 = IMAGPART_EXPR <_6>; 20 │ if (_2 != 0) 21 │ goto ; [50.00%] 22 │ else 23 │ goto ; [50.00%] 24 │ ;;succ: 4 25 │ ;;3 26 │ 27 │ ;; basic block 3, loop depth 0 28 │ ;;pred: 2 29 │ _1 = REALPART_EXPR <_6>; 30 │ goto ; [100.00%] 31 │ ;;succ: 5 32 │ 33 │ ;; basic block 4, loop depth 0 34 │ ;;pred: 2 35 │ _8 = x_4(D) < 0; 36 │ _9 = (signed char) _8; 37 │ _10 = -_9; 38 │ _11 = _10 ^ 127; 39 │ ;;succ: 5 40 │ 41 │ ;; basic block 5, loop depth 0 42 │ ;;pred: 3 43 │ ;;4 44 │ # _3 = PHI <_1(3), _11(4)> 45 │ return _3; 46 │ ;;succ: EXIT 47 │ 48 │ } The above code will have below control flow which is not supported by the gen_phi_on_cond. +--+ | def | | ... | +-+ | cond |-->| def | +--+ | ... | | +-+ | | v | +-+ | | def | | | ... | | +-+ | | | | | v | +-+ | | PHI |<--+ +-+ This patch would like to add support above control flow for the gen_phi_on_cond. The below testsuites are passed for this patch: * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * genmatch.cc (dt_operand::gen_phi_on_cond): Add support for a new control flow when gen phi on condition. Signed-off-by: Pan Li --- gcc/genmatch.cc | 85 +++-- 1 file changed, 76 insertions(+), 9 deletions(-) diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc index a56bd90cb2c..f538df1be62 100644 --- a/gcc/genmatch.cc +++ b/gcc/genmatch.cc @@ -3529,28 +3529,95 @@ dt_operand::gen_phi_on_cond (FILE *f, int indent, int depth) "basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth); fprintf_indent (f, indent, "basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth); + fprintf_indent (f, indent, -"basic_block _db_%d = safe_dyn_cast (*gsi_last_bb (_pb_0_%d)) ? " -"_pb_0_%d : _pb_1_%d;\n", depth, depth, depth, depth); +"gcond *_ct_0_%d = safe_dyn_cast (*gsi_last_bb (_pb_0_%d));\n", +depth, depth); fprintf_indent (f, indent, -"basic_block _other_db_%d = safe_dyn_cast " -"(*gsi_last_bb (_pb_0_%d)) ? _pb_1_%d : _pb_0_%d;\n", +"gcond *_ct_1_%d = safe_dyn_cast (*gsi_last_bb (_pb_1_%d));\n", +depth, depth); + fprintf_indent (f, indent, +"gcond *_ct_a_%d = _ct_0_%d ? _ct_0_%d : _ct_1_%d;\n", +depth, depth, depth, depth); + fprintf_indent (f, indent, +"basic_block _db_%d = _ct_0_%d ? _pb_0_%d : _pb_1_%d;\n", +depth, depth, depth, depth); + fprintf_indent (f, indent, +"basic_block _other_db_%d = _ct_0_%d ? _pb_1_%d : _pb_0_%d;\n", depth, depth, depth, depth); fprintf_indent (f, indent, -"gcond *_ct_%d = safe_dyn_cast (*gsi_last_bb (_db_%d));\n", -depth, depth); - fprintf_indent (f, indent, "if (_ct_%d" +"edge _e_00_%d = _pb_0_%d->preds ? EDGE_PRED (_pb_0_%d, 0) : NULL;\n", +depth, depth, depth); + fprintf_indent (f, indent, +"basic_block _pb_00_%d = _e_00_%d ? _e_00_%d->src : NULL;\n", +depth, depth, depth); + fprintf_indent (f, indent, +"gcond *_ct_b_%d = _pb_00_%d ? " +"safe_dyn_cast (*gsi_last_bb (_pb_00_%d)) : NULL;\n", +depth, depth, depth); + + /* Case 1 flow for PHI. + * +--+ + * | def | + * | ... | +-+ + * | cond |-->| def | + * +--+ | ... | + *
[PATCH v1] Match: Support form 2 for scalar signed integer .SAT_ADD
From: Pan Li This patch would like to support the form 2 of the scalar signed integer .SAT_ADD. Aka below example: Form 2: #define DEF_SAT_S_ADD_FMT_2(T, UT, MIN, MAX) \ T __attribute__((noinline)) \ sat_s_add_##T##_fmt_2 (T x, T y) \ {\ T sum = (UT)x + (UT)y; \ \ if ((x ^ y) < 0 || (sum ^ x) >= 0) \ return sum; \ \ return x < 0 ? MIN : MAX; \ } DEF_SAT_S_ADD_FMT_2(int8_t, uint8_t, INT8_MIN, INT8_MAX) We can tell the difference before and after this patch if backend implemented the ssadd3 pattern similar as below. Before this patch: 4 │ __attribute__((noinline)) 5 │ int8_t sat_s_add_int8_t_fmt_2 (int8_t x, int8_t y) 6 │ { 7 │ int8_t sum; 8 │ unsigned char x.0_1; 9 │ unsigned char y.1_2; 10 │ unsigned char _3; 11 │ signed char _4; 12 │ signed char _5; 13 │ int8_t _6; 14 │ _Bool _11; 15 │ signed char _12; 16 │ signed char _13; 17 │ signed char _14; 18 │ signed char _22; 19 │ signed char _23; 20 │ 21 │ ;; basic block 2, loop depth 0 22 │ ;;pred: ENTRY 23 │ x.0_1 = (unsigned char) x_7(D); 24 │ y.1_2 = (unsigned char) y_8(D); 25 │ _3 = x.0_1 + y.1_2; 26 │ sum_9 = (int8_t) _3; 27 │ _4 = x_7(D) ^ y_8(D); 28 │ _5 = x_7(D) ^ sum_9; 29 │ _23 = ~_4; 30 │ _22 = _5 & _23; 31 │ if (_22 >= 0) 32 │ goto ; [42.57%] 33 │ else 34 │ goto ; [57.43%] 35 │ ;;succ: 4 36 │ ;;3 37 │ 38 │ ;; basic block 3, loop depth 0 39 │ ;;pred: 2 40 │ _11 = x_7(D) < 0; 41 │ _12 = (signed char) _11; 42 │ _13 = -_12; 43 │ _14 = _13 ^ 127; 44 │ ;;succ: 4 45 │ 46 │ ;; basic block 4, loop depth 0 47 │ ;;pred: 2 48 │ ;;3 49 │ # _6 = PHI 50 │ return _6; 51 │ ;;succ: EXIT 52 │ 53 │ } After this patch: 4 │ __attribute__((noinline)) 5 │ int8_t sat_s_add_int8_t_fmt_2 (int8_t x, int8_t y) 6 │ { 7 │ int8_t _6; 8 │ 9 │ ;; basic block 2, loop depth 0 10 │ ;;pred: ENTRY 11 │ _6 = .SAT_ADD (x_7(D), y_8(D)); [tail call] 12 │ return _6; 13 │ ;;succ: EXIT 14 │ 15 │ } The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add the form 2 of signed .SAT_ADD matching. Signed-off-by: Pan Li --- gcc/match.pd | 15 +++ 1 file changed, 15 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index 4298e89dad6..1372f2ba377 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3207,6 +3207,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) && types_match (type, @0, @1 +/* Signed saturation add, case 2: + T sum = (T)((UT)X + (UT)Y) + SAT_S_ADD = (X ^ sum) & !(X ^ Y) >= 0 ? sum : (-(T)(X < 0) ^ MAX); + + The T and UT are type pair like T=int8_t, UT=uint8_t. */ +(match (signed_integer_sat_add @0 @1) + (cond^ (ge (bit_and:c (bit_xor:c @0 (nop_convert@2 (plus (nop_convert @0) + (nop_convert @1 + (bit_not (bit_xor:c @0 @1))) + integer_zerop) + @2 + (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)) + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) + && types_match (type, @0, @1 + /* Unsigned saturation sub, case 1 (branch with gt): SAT_U_SUB = X > Y ? X - Y : 0 */ (match (unsigned_integer_sat_sub @0 @1) -- 2.43.0
[PATCH v1] RISC-V: Allow IMM operand for unsigned scalar .SAT_ADD
From: Pan Li This patch would like to allow the IMM operand of the unsigned scalar .SAT_ADD. Like the operand 0, the operand 1 of .SAT_ADD will be zero extended to Xmode before underlying code generation. The below test suites are passed for this patch. * The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_expand_usadd): Zero extend the second operand of usadd as the first operand does. * config/riscv/riscv.md (usadd3): Allow imm operand for scalar usadd pattern. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_u_add-11.c: Make asm check robust. * gcc.target/riscv/sat_u_add-15.c: Ditto. * gcc.target/riscv/sat_u_add-19.c: Ditto. * gcc.target/riscv/sat_u_add-23.c: Ditto. * gcc.target/riscv/sat_u_add-3.c: Ditto. * gcc.target/riscv/sat_u_add-7.c: Ditto. Signed-off-by: Pan Li --- gcc/config/riscv/riscv.cc | 2 +- gcc/config/riscv/riscv.md | 4 ++-- gcc/testsuite/gcc.target/riscv/sat_u_add-11.c | 2 +- gcc/testsuite/gcc.target/riscv/sat_u_add-15.c | 2 +- gcc/testsuite/gcc.target/riscv/sat_u_add-19.c | 2 +- gcc/testsuite/gcc.target/riscv/sat_u_add-23.c | 2 +- gcc/testsuite/gcc.target/riscv/sat_u_add-3.c | 2 +- gcc/testsuite/gcc.target/riscv/sat_u_add-7.c | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index d03e51f3a68..4061d2372b4 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11970,7 +11970,7 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y) rtx xmode_sum = gen_reg_rtx (Xmode); rtx xmode_lt = gen_reg_rtx (Xmode); rtx xmode_x = riscv_gen_zero_extend_rtx (x, mode); - rtx xmode_y = gen_lowpart (Xmode, y); + rtx xmode_y = riscv_gen_zero_extend_rtx (y, mode); rtx xmode_dest = gen_reg_rtx (Xmode); /* Step-1: sum = x + y */ diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 3289ed2155a..4b0be43f436 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -4358,8 +4358,8 @@ (define_insn_and_split "" (define_expand "usadd3" [(match_operand:ANYI 0 "register_operand") - (match_operand:ANYI 1 "register_operand") - (match_operand:ANYI 2 "register_operand")] + (match_operand:ANYI 1 "reg_or_int_operand") + (match_operand:ANYI 2 "reg_or_int_operand")] "" { riscv_expand_usadd (operands[0], operands[1], operands[2]); diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c b/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c index e248aeafa8e..bd830ececad 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c @@ -8,7 +8,7 @@ ** sat_u_add_uint32_t_fmt_3: ** slli\s+[atx][0-9]+,\s*a0,\s*32 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32 -** add\s+[atx][0-9]+,\s*a0,\s*a1 +** add\s+[atx][0-9]+,\s*a[01],\s*a[01] ** slli\s+[atx][0-9]+,\s*[atx][0-9],\s*32 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32 ** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-15.c b/gcc/testsuite/gcc.target/riscv/sat_u_add-15.c index bb8b991a84e..de615a6225e 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_u_add-15.c +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-15.c @@ -8,7 +8,7 @@ ** sat_u_add_uint32_t_fmt_4: ** slli\s+[atx][0-9]+,\s*a0,\s*32 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32 -** add\s+[atx][0-9]+,\s*a0,\s*a1 +** add\s+[atx][0-9]+,\s*a[01],\s*a[01] ** slli\s+[atx][0-9]+,\s*[atx][0-9],\s*32 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32 ** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-19.c b/gcc/testsuite/gcc.target/riscv/sat_u_add-19.c index 7e4ae12f2f5..2b793e2f8fd 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_u_add-19.c +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-19.c @@ -8,7 +8,7 @@ ** sat_u_add_uint32_t_fmt_5: ** slli\s+[atx][0-9]+,\s*a0,\s*32 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32 -** add\s+[atx][0-9]+,\s*a0,\s*a1 +** add\s+[atx][0-9]+,\s*a[01],\s*a[01] ** slli\s+[atx][0-9]+,\s*[atx][0-9],\s*32 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32 ** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-23.c b/gcc/testsuite/gcc.target/riscv/sat_u_add-23.c index 49bbb74a401..5de086e1138 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_u_add-23.c +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-23.c @@ -8,7 +8,7 @@ ** sat_u_add_uint32_t_fmt_6: ** slli\s+[atx][0-9]+,\s*a0,\s*32 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32 -** add\s+[atx][0-9]+,\s*a0,\s*a1 +** add\s+[atx][0-9]+,\s*a[01],\s*a[01] ** slli\s+[atx][0-9]+,\s*[atx][0-9],\s*32 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32 ** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-3.c b/gcc/testsuite/gcc.target/riscv/sat_u_add-3.c index cd15dc96450..bd7ccb2a8c7 100644 --- a/gcc/testsu
[PATCH v1 2/2] Match: Add int type fits check for form 2 of .SAT_SUB imm operand
From: Pan Li This patch would like to add strict check for imm operand of .SAT_SUB matching. We have no type checking for imm operand in previous, which may result in unexpected IL to be catched by .SAT_SUB pattern. We leverage the int_fits_type_p here to make sure the imm operand is a int type fits the result type of the .SAT_SUB. For example: Fits uint8_t: uint8_t a; uint8_t sum = .SAT_SUB (a, 12); uint8_t sum = .SAT_SUB (a, 12u); uint8_t sum = .SAT_SUB (a, 126u); uint8_t sum = .SAT_SUB (a, 128u); uint8_t sum = .SAT_SUB (a, 228); uint8_t sum = .SAT_SUB (a, 223u); Not fits uint8_t: uint8_t a; uint8_t sum = .SAT_SUB (a, -1); uint8_t sum = .SAT_SUB (a, 256u); uint8_t sum = .SAT_SUB (a, 257); The below test suite are passed for this patch: * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add int_fits_type_p check for .SAT_SUB imm operand. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_u_add_imm_type_check-57.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-58.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-59.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-60.c: New test. Signed-off-by: Pan Li --- gcc/match.pd | 2 +- gcc/testsuite/gcc.target/riscv/sat_arith.h| 14 ++ .../riscv/sat_u_add_imm_type_check-57.c | 18 + .../riscv/sat_u_add_imm_type_check-58.c | 27 +++ .../riscv/sat_u_add_imm_type_check-59.c | 18 + .../riscv/sat_u_add_imm_type_check-60.c | 27 +++ 6 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-58.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-59.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-60.c diff --git a/gcc/match.pd b/gcc/match.pd index 45e0cc4a54f..6c54f0502eb 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3288,7 +3288,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (match (unsigned_integer_sat_sub @0 @1) (plus (max @0 INTEGER_CST@1) INTEGER_CST@2) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) - && types_match (type, @1)) + && types_match (type, @1) && int_fits_type_p (@1, type)) (with { unsigned precision = TYPE_PRECISION (type); diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index 75f48b4b760..4d11b6dcf3b 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -281,6 +281,20 @@ sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_2 (T y)\ return IMM > y ? IMM - y : 0; \ } +#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3(INDEX, T, IMM) \ +T __attribute__((noinline)) \ +sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_3 (T x)\ +{ \ + return x >= IMM ? x - IMM : 0; \ +} + +#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_4(INDEX, T, IMM) \ +T __attribute__((noinline)) \ +sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_4 (T x)\ +{ \ + return x > IMM ? x - IMM : 0; \ +} + /**/ /* Saturation Truncate (unsigned and signed) */ /**/ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c new file mode 100644 index 000..1b193bcfb26 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "sat_arith.h" + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (0, uint8_t, -43) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (1, uint8_t, 269) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (2, uint8_t, 369u) + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (3, uint16_t, -4) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (4, uint16_t, 65579) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (5, uint16_t, 65679u) + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (6, uint32_t, -62l) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (7, uint32_t, 6294967342ll) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (8, uint32_t, 4394967342ull) + +/* { dg-final { scan-rtl-dump-not ".SAT_ADD " "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-58.c b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-58.c new file mode 100644 ind
[PATCH v1 1/2] Match: Add int type fits check for form 1 of .SAT_SUB imm operand
From: Pan Li This patch would like to add strict check for imm operand of .SAT_SUB matching. We have no type checking for imm operand in previous, which may result in unexpected IL to be catched by .SAT_SUB pattern. We leverage the int_fits_type_p here to make sure the imm operand is a int type fits the result type of the .SAT_SUB. For example: Fits uint8_t: uint8_t a; uint8_t sum = .SAT_SUB (12, a); uint8_t sum = .SAT_SUB (12u, a); uint8_t sum = .SAT_SUB (126u, a); uint8_t sum = .SAT_SUB (128u, a); uint8_t sum = .SAT_SUB (228, a); uint8_t sum = .SAT_SUB (223u, a); Not fits uint8_t: uint8_t a; uint8_t sum = .SAT_SUB (-1, a); uint8_t sum = .SAT_SUB (256u, a); uint8_t sum = .SAT_SUB (257, a); The below test suite are passed for this patch: * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add int_fits_type_p check for .SAT_SUB imm operand. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_u_add_imm_type_check-53.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-54.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-55.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-56.c: New test. Signed-off-by: Pan Li --- gcc/match.pd | 2 +- gcc/testsuite/gcc.target/riscv/sat_arith.h| 14 ++ .../riscv/sat_u_add_imm_type_check-53.c | 18 + .../riscv/sat_u_add_imm_type_check-54.c | 27 +++ .../riscv/sat_u_add_imm_type_check-55.c | 18 + .../riscv/sat_u_add_imm_type_check-56.c | 27 +++ 6 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-54.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-55.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-56.c diff --git a/gcc/match.pd b/gcc/match.pd index be211535a49..45e0cc4a54f 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3269,7 +3269,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (match (unsigned_integer_sat_sub @0 @1) (cond^ (le @1 INTEGER_CST@2) (minus INTEGER_CST@0 @1) integer_zerop) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) - && types_match (type, @1)) + && types_match (type, @1) && int_fits_type_p (@0, type)) (with { unsigned precision = TYPE_PRECISION (type); diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index a899979904b..75f48b4b760 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -267,6 +267,20 @@ sat_u_sub_imm##IMM##_##T##_fmt_4 (T x) \ #define RUN_SAT_U_SUB_IMM_FMT_4(T, x, IMM, expect) \ if (sat_u_sub_imm##IMM##_##T##_fmt_4(x) != expect) __builtin_abort () +#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1(INDEX, T, IMM) \ +T __attribute__((noinline)) \ +sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_1 (T y)\ +{ \ + return IMM >= y ? IMM - y : 0; \ +} + +#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_2(INDEX, T, IMM) \ +T __attribute__((noinline)) \ +sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_2 (T y)\ +{ \ + return IMM > y ? IMM - y : 0; \ +} + /**/ /* Saturation Truncate (unsigned and signed) */ /**/ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c new file mode 100644 index 000..c959eeb0d86 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "sat_arith.h" + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (0, uint8_t, -43) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (1, uint8_t, 269) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (2, uint8_t, 369u) + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (3, uint16_t, -4) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (4, uint16_t, 65579) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (5, uint16_t, 65679u) + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (6, uint32_t, -62) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (7, uint32_t, 4294967342ll) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (8, uint32_t, 4394967342ull) + +/* { dg-final { scan-rtl-dump-not ".SAT_ADD " "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-54.c b/gcc/testsuite/gc
[PATCH v1] Vect: Support form 1 of vector signed integer .SAT_ADD
From: Pan Li This patch would like to support the vector signed ssadd pattern for the RISC-V backend. Aka Form 1: #define DEF_VEC_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \ void __attribute__((noinline)) \ vec_sat_s_add_##T##_fmt_1 (T *out, T *x, T *y, unsigned n) \ { \ for (unsigned i = 0; i < n; i++) \ { \ T sum = (UT)x[i] + (UT)y[i]; \ out[i] = (x[i] ^ y[i]) < 0 \ ? sum \ : (sum ^ x[i]) >= 0\ ? sum\ : x[i] < 0 ? MIN : MAX; \ } \ } DEF_VEC_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX) If the backend implemented the vector mode of ssadd, we will see IR diff similar as below: Before this patch: 108 │ _114 = .SELECT_VL (ivtmp_112, POLY_INT_CST [2, 2]); 109 │ ivtmp_77 = _114 * 8; 110 │ vect__4.9_80 = .MASK_LEN_LOAD (vectp_x.7_78, 64B, { -1, ... }, _114, 0); 111 │ vect__5.10_81 = VIEW_CONVERT_EXPR(vect__4.9_80); 112 │ vect__7.13_85 = .MASK_LEN_LOAD (vectp_y.11_83, 64B, { -1, ... }, _114, 0); 113 │ vect__8.14_86 = VIEW_CONVERT_EXPR(vect__7.13_85); 114 │ vect__9.15_87 = vect__5.10_81 + vect__8.14_86; 115 │ vect_sum_20.16_88 = VIEW_CONVERT_EXPR(vect__9.15_87); 116 │ vect__10.17_89 = vect__4.9_80 ^ vect__7.13_85; 117 │ vect__11.18_90 = vect__4.9_80 ^ vect_sum_20.16_88; 118 │ mask__46.19_92 = vect__10.17_89 >= { 0, ... }; 119 │ _36 = vect__4.9_80 >> 63; 120 │ mask__44.26_104 = vect__11.18_90 < { 0, ... }; 121 │ mask__43.27_105 = mask__46.19_92 & mask__44.26_104; 122 │ _115 = .COND_XOR (mask__43.27_105, _36, { 9223372036854775807, ... }, vect_sum_20.16_88); 123 │ .MASK_LEN_STORE (vectp_out.29_108, 64B, { -1, ... }, _114, 0, _115); 124 │ vectp_x.7_79 = vectp_x.7_78 + ivtmp_77; 125 │ vectp_y.11_84 = vectp_y.11_83 + ivtmp_77; 126 │ vectp_out.29_109 = vectp_out.29_108 + ivtmp_77; 127 │ ivtmp_113 = ivtmp_112 - _114; After this patch: 94 │ # vectp_x.7_82 = PHI 95 │ # vectp_y.10_86 = PHI 96 │ # vectp_out.14_91 = PHI 97 │ # ivtmp_95 = PHI 98 │ _97 = .SELECT_VL (ivtmp_95, POLY_INT_CST [2, 2]); 99 │ ivtmp_81 = _97 * 8; 100 │ vect__4.9_84 = .MASK_LEN_LOAD (vectp_x.7_82, 64B, { -1, ... }, _97, 0); 101 │ vect__7.12_88 = .MASK_LEN_LOAD (vectp_y.10_86, 64B, { -1, ... }, _97, 0); 102 │ vect_patt_40.13_89 = .SAT_ADD (vect__4.9_84, vect__7.12_88); 103 │ .MASK_LEN_STORE (vectp_out.14_91, 64B, { -1, ... }, _97, 0, vect_patt_40.13_89); 104 │ vectp_x.7_83 = vectp_x.7_82 + ivtmp_81; 105 │ vectp_y.10_87 = vectp_y.10_86 + ivtmp_81; 106 │ vectp_out.14_92 = vectp_out.14_91 + ivtmp_81; 107 │ ivtmp_96 = ivtmp_95 - _97; The below test suites are passed for this patch: 1. The rv64gcv fully regression tests. 2. The x86 bootstrap tests. 3. The x86 fully regression tests. gcc/ChangeLog: * match.pd: Add case 2 for the signed .SAT_ADD consumed by vect pattern. * tree-vect-patterns.cc (gimple_signed_integer_sat_add): Add new matching func decl for signed .SAT_ADD. (vect_recog_sat_add_pattern): Add signed .SAT_ADD pattern match. Signed-off-by: Pan Li --- gcc/match.pd | 17 + gcc/tree-vect-patterns.cc | 5 - 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/gcc/match.pd b/gcc/match.pd index be211535a49..578c9dd5b77 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3207,6 +3207,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) && types_match (type, @0, @1 +/* Signed saturation add, case 2: + T sum = (T)((UT)X + (UT)Y) + SAT_S_ADD = (X ^ Y) < 0 && (X ^ sum) >= 0 ? (-(T)(X < 0) ^ MAX) : sum; + + The T and UT are type pair like T=int8_t, UT=uint8_t. */ +(match (signed_integer_sat_add @0 @1) + (cond^ (bit_and:c (lt (bit_xor:c @0 (nop_convert@2 (plus (nop_convert @0) + (nop_convert @1 + integer_zerop) + (ge (bit_xor:c @0 @1) integer_zerop)) + (bit_xor:c (nop_convert (negate (nop_convert (convert + (lt @0 integer_zerop) + max_value) + @2) + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) + && types_match (type, @0, @1 + /* Unsigned saturation sub, case 1 (branch with gt): SAT_U_SUB = X > Y ? X - Y : 0 */ (match (unsigned_integer_sat_sub @0 @1) diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
[PATCH v1] RISC-V: Refactor gen zero_extend rtx for SAT_* when expand SImode in RV64
From: Pan Li In previous, we have some specially handling for both the .SAT_ADD and .SAT_SUB for unsigned int. There are similar to take care of SImode in RV64 for zero extend. Thus refactor these two helper function into one for possible code duplication. The below test suite are passed for this patch. * The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_gen_zero_extend_rtx): Merge the zero_extend handing from func riscv_gen_unsigned_xmode_reg. (riscv_gen_unsigned_xmode_reg): Remove. (riscv_expand_ussub): Leverage riscv_gen_zero_extend_rtx instead of riscv_gen_unsigned_xmode_reg. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_u_sub-11.c: Adjust asm check. * gcc.target/riscv/sat_u_sub-15.c: Ditto. * gcc.target/riscv/sat_u_sub-19.c: Ditto. * gcc.target/riscv/sat_u_sub-23.c: Ditto. * gcc.target/riscv/sat_u_sub-27.c: Ditto. * gcc.target/riscv/sat_u_sub-3.c: Ditto. * gcc.target/riscv/sat_u_sub-31.c: Ditto. * gcc.target/riscv/sat_u_sub-35.c: Ditto. * gcc.target/riscv/sat_u_sub-39.c: Ditto. * gcc.target/riscv/sat_u_sub-43.c: Ditto. * gcc.target/riscv/sat_u_sub-47.c: Ditto. * gcc.target/riscv/sat_u_sub-7.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-11.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-11_1.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-11_2.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-15.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-15_1.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-15_2.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-3.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-3_1.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-3_2.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-7.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-7_1.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-7_2.c: Ditto. Signed-off-by: Pan Li --- gcc/config/riscv/riscv.cc | 99 +-- gcc/testsuite/gcc.target/riscv/sat_u_sub-11.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub-15.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub-19.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub-23.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub-27.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub-3.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub-31.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub-35.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub-39.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub-43.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub-47.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub-7.c | 4 + .../gcc.target/riscv/sat_u_sub_imm-11.c | 2 + .../gcc.target/riscv/sat_u_sub_imm-11_1.c | 2 + .../gcc.target/riscv/sat_u_sub_imm-11_2.c | 2 + .../gcc.target/riscv/sat_u_sub_imm-15.c | 2 + .../gcc.target/riscv/sat_u_sub_imm-15_1.c | 2 + .../gcc.target/riscv/sat_u_sub_imm-15_2.c | 2 + .../gcc.target/riscv/sat_u_sub_imm-3.c| 2 + .../gcc.target/riscv/sat_u_sub_imm-3_1.c | 2 + .../gcc.target/riscv/sat_u_sub_imm-3_2.c | 2 + .../gcc.target/riscv/sat_u_sub_imm-7.c| 2 + .../gcc.target/riscv/sat_u_sub_imm-7_1.c | 2 + .../gcc.target/riscv/sat_u_sub_imm-7_2.c | 2 + 25 files changed, 118 insertions(+), 53 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 496dd177fe7..75b37b53244 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11894,19 +11894,56 @@ riscv_get_raw_result_mode (int regno) return default_get_reg_raw_mode (regno); } -/* Generate a new rtx of Xmode based on the rtx and mode in define pattern. - The rtx x will be zero extended to Xmode if the mode is HI/QImode, and - the new zero extended Xmode rtx will be returned. - Or the gen_lowpart rtx of Xmode will be returned. */ +/* Generate a REG rtx of Xmode from the given rtx and mode. + The rtx x can be REG (QI/HI/SI/DI) or const_int. + The machine_mode mode is the original mode from define pattern. + + If rtx is REG and Xmode, the RTX x will be returned directly. + + If rtx is REG and non-Xmode, the zero extended to new REG of Xmode will be + returned. + + If rtx is const_int, a new REG rtx will be created to hold the value of + const_int and then returned. + + According to the gccint doc, the constants generated for modes with fewer + bits than in HOST_WIDE_INT must be sign extended to full width. Thus there + will be two cases here, take QImode as example. + + For .SAT_SUB (127, y) in QImode, we have (const_int 127) and one simple + mov from const_int to the new REG rtx is good enough here. + + For .SAT_SUB (254, y) in QImode, we have (const_int -2) after define_expand. + Aka 0xfffe in Xmode of RV64 but we actually need 0xfe in Xmode + of RV64. So we need to cleanup the highest 56 bits of the new REG
[PATCH v1 2/2] RISC-V: Add testcases for form 4 of unsigned vector .SAT_ADD IMM
From: Pan Li This patch would like to add test cases for the unsigned vector .SAT_ADD when one of the operand is IMM. Form 4: #define DEF_VEC_SAT_U_ADD_IMM_FMT_4(T, IMM) \ T __attribute__((noinline)) \ vec_sat_u_add_imm##IMM##_##T##_fmt_4 (T *out, T *in, unsigned limit) \ { \ unsigned i; \ T ret; \ for (i = 0; i < limit; i++) \ { \ out[i] = __builtin_add_overflow (in[i], IMM, &ret) == 0 ? ret : -1; \ } \ } DEF_VEC_SAT_U_ADD_IMM_FMT_4(uint64_t, 123) The below test are passed for this patch. * The rv64gcv fully regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-16.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-13.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-14.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-15.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-16.c: New test. Signed-off-by: Pan Li --- .../rvv/autovec/binop/vec_sat_u_add_imm-13.c | 14 ++ .../rvv/autovec/binop/vec_sat_u_add_imm-14.c | 14 ++ .../rvv/autovec/binop/vec_sat_u_add_imm-15.c | 14 ++ .../rvv/autovec/binop/vec_sat_u_add_imm-16.c | 14 ++ .../autovec/binop/vec_sat_u_add_imm-run-13.c | 28 +++ .../autovec/binop/vec_sat_u_add_imm-run-14.c | 28 +++ .../autovec/binop/vec_sat_u_add_imm-run-15.c | 28 +++ .../autovec/binop/vec_sat_u_add_imm-run-16.c | 28 +++ .../riscv/rvv/autovec/vec_sat_arith.h | 20 + 9 files changed, 188 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-16.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-13.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-14.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-15.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-16.c diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c new file mode 100644 index 000..a9439dff39f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../vec_sat_arith.h" + +/* +** vec_sat_u_add_imm9u_uint8_t_fmt_4: +** ... +** vsaddu\.vi\s+v[0-9]+,\s*v[0-9]+,\s*9 +** ... +*/ +DEF_VEC_SAT_U_ADD_IMM_FMT_4(uint8_t, 9u) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c new file mode 100644 index 000..dbe47497599 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../vec_sat_arith.h" + +/* +** vec_sat_u_add_imm15_uint16_t_fmt_4: +** ... +** vsaddu\.vi\s+v[0-9]+,\s*v[0-9]+,\s*15 +** ... +*/ +DEF_VEC_SAT_U_ADD_IMM_FMT_4(uint16_t, 15) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c new file mode 100644 index 000..0ac2e1b2942 --- /dev/null +++
[PATCH v1 1/2] RISC-V: Add testcases for form 3 of unsigned vector .SAT_ADD IMM
From: Pan Li This patch would like to add test cases for the unsigned vector .SAT_ADD when one of the operand is IMM. Form 3: #define DEF_VEC_SAT_U_ADD_IMM_FMT_3(T, IMM) \ T __attribute__((noinline)) \ vec_sat_u_add_imm##IMM##_##T##_fmt_3 (T *out, T *in, unsigned limit) \ {\ unsigned i;\ T ret; \ for (i = 0; i < limit; i++)\ {\ out[i] = __builtin_add_overflow (in[i], IMM, &ret) ? -1 : ret; \ }\ } DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint64_t, 123) The below test are passed for this patch. * The rv64gcv fully regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-9.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-10.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-11.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-12.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-9.c: New test. Signed-off-by: Pan Li --- .../rvv/autovec/binop/vec_sat_u_add_imm-10.c | 14 ++ .../rvv/autovec/binop/vec_sat_u_add_imm-11.c | 14 ++ .../rvv/autovec/binop/vec_sat_u_add_imm-12.c | 14 ++ .../rvv/autovec/binop/vec_sat_u_add_imm-9.c | 14 ++ .../autovec/binop/vec_sat_u_add_imm-run-10.c | 28 +++ .../autovec/binop/vec_sat_u_add_imm-run-11.c | 28 +++ .../autovec/binop/vec_sat_u_add_imm-run-12.c | 28 +++ .../autovec/binop/vec_sat_u_add_imm-run-9.c | 28 +++ 8 files changed, 168 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-9.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-10.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-11.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-12.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-9.c diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c new file mode 100644 index 000..b6b605ac615 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../vec_sat_arith.h" + +/* +** vec_sat_u_add_imm15_uint16_t_fmt_3: +** ... +** vsaddu\.vi\s+v[0-9]+,\s*v[0-9]+,\s*15 +** ... +*/ +DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint16_t, 15) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c new file mode 100644 index 000..6da86a1abe1 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../vec_sat_arith.h" + +/* +** vec_sat_u_add_imm33u_uint32_t_fmt_3: +** ... +** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+ +** ... +*/ +DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint32_t, 33u) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c new file mode 100644 index 000..b6ff5a6d5d6 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-
[PATCH v2 1/2] RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 2
From: Pan Li This patch would like to add test cases for the unsigned scalar quad and oct .SAT_TRUNC form 2. Aka: Form 2: #define DEF_SAT_U_TRUC_FMT_2(NT, WT) \ NT __attribute__((noinline)) \ sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \ {\ WT max = (WT)(NT)-1; \ return x > max ? (NT) max : (NT)x; \ } QUAD: DEF_SAT_U_TRUC_FMT_2 (uint16_t, uint64_t) DEF_SAT_U_TRUC_FMT_2 (uint8_t, uint32_t) OCT: DEF_SAT_U_TRUC_FMT_2 (uint8_t, uint64_t) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_u_trunc-10.c: New test. * gcc.target/riscv/sat_u_trunc-11.c: New test. * gcc.target/riscv/sat_u_trunc-12.c: New test. * gcc.target/riscv/sat_u_trunc-run-10.c: New test. * gcc.target/riscv/sat_u_trunc-run-11.c: New test. * gcc.target/riscv/sat_u_trunc-run-12.c: New test. Signed-off-by: Pan Li --- .../gcc.target/riscv/sat_u_trunc-10.c | 17 .../gcc.target/riscv/sat_u_trunc-11.c | 17 .../gcc.target/riscv/sat_u_trunc-12.c | 20 +++ .../gcc.target/riscv/sat_u_trunc-run-10.c | 16 +++ .../gcc.target/riscv/sat_u_trunc-run-11.c | 16 +++ .../gcc.target/riscv/sat_u_trunc-run-12.c | 16 +++ 6 files changed, 102 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-11.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-12.c diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c new file mode 100644 index 000..5ea8e613901 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_trunc_uint32_t_to_uint8_t_fmt_2: +** sltiu\s+[atx][0-9]+,\s*a0,\s*255 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff +** ret +*/ +DEF_SAT_U_TRUNC_FMT_2(uint8_t, uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c new file mode 100644 index 000..3b45e2af9ce --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_trunc_uint64_t_to_uint8_t_fmt_2: +** sltiu\s+[atx][0-9]+,\s*a0,\s*255 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff +** ret +*/ +DEF_SAT_U_TRUNC_FMT_2(uint8_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c new file mode 100644 index 000..7ea2c93a301 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_trunc_uint64_t_to_uint16_t_fmt_2: +** li\s+[atx][0-9]+,\s*65536 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+ +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** slli\s+a0,\s*a0,\s*48 +** srli\s+a0,\s*a0,\s*48 +** ret +*/ +DEF_SAT_U_TRUNC_FMT_2(uint16_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c new file mode 100644 index 000..2281610f335 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "sat_arith.h" +#include "sat_arith_data.h" + +#define T1 uint8_t +#define T2 uint32_t + +DEF_SAT_U_TRUNC_FMT_2_WRAP(T1, T2) + +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define
[PATCH v2 2/2] RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 3
From: Pan Li This patch would like to add test cases for the unsigned scalar quad and oct .SAT_TRUNC form 3. Aka: Form 3: #define DEF_SAT_U_TRUC_FMT_3(NT, WT) \ NT __attribute__((noinline)) \ sat_u_truc_##WT##_to_##NT##_fmt_3 (WT x) \ {\ WT max = (WT)(NT)-1; \ return x <= max ? (NT)x : (NT) max;\ } QUAD: DEF_SAT_U_TRUC_FMT_3 (uint16_t, uint64_t) DEF_SAT_U_TRUC_FMT_3 (uint8_t, uint32_t) OCT: DEF_SAT_U_TRUC_FMT_3 (uint8_t, uint64_t) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_u_trunc-16.c: New test. * gcc.target/riscv/sat_u_trunc-17.c: New test. * gcc.target/riscv/sat_u_trunc-18.c: New test. * gcc.target/riscv/sat_u_trunc-run-16.c: New test. * gcc.target/riscv/sat_u_trunc-run-17.c: New test. * gcc.target/riscv/sat_u_trunc-run-18.c: New test. Signed-off-by: Pan Li --- .../gcc.target/riscv/sat_u_trunc-16.c | 17 .../gcc.target/riscv/sat_u_trunc-17.c | 17 .../gcc.target/riscv/sat_u_trunc-18.c | 20 +++ .../gcc.target/riscv/sat_u_trunc-run-16.c | 16 +++ .../gcc.target/riscv/sat_u_trunc-run-17.c | 16 +++ .../gcc.target/riscv/sat_u_trunc-run-18.c | 16 +++ 6 files changed, 102 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-17.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-18.c diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c new file mode 100644 index 000..f91da58c0ba --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_trunc_uint32_t_to_uint8_t_fmt_3: +** sltiu\s+[atx][0-9]+,\s*a0,\s*255 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff +** ret +*/ +DEF_SAT_U_TRUNC_FMT_3(uint8_t, uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c new file mode 100644 index 000..9813e1f79b0 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_trunc_uint64_t_to_uint8_t_fmt_3: +** sltiu\s+[atx][0-9]+,\s*a0,\s*255 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff +** ret +*/ +DEF_SAT_U_TRUNC_FMT_3(uint8_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c new file mode 100644 index 000..eb799849f73 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_trunc_uint64_t_to_uint16_t_fmt_3: +** li\s+[atx][0-9]+,\s*65536 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+ +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** slli\s+a0,\s*a0,\s*48 +** srli\s+a0,\s*a0,\s*48 +** ret +*/ +DEF_SAT_U_TRUNC_FMT_3(uint16_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c new file mode 100644 index 000..20ceda6852e --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "sat_arith.h" +#include "sat_arith_data.h" + +#define T1 uint8_t +#define T2 uint32_t + +DEF_SAT_U_TRUNC_FMT_3_WRAP(T1, T2) + +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define
[PATCH v1] RISC-V: Support form 1 of integer scalar .SAT_ADD
From: Pan Li This patch would like to support the scalar signed ssadd pattern for the RISC-V backend. Aka Form 1: #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \ T __attribute__((noinline)) \ sat_s_add_##T##_fmt_1 (T x, T y) \ {\ T sum = (UT)x + (UT)y; \ return (x ^ y) < 0 \ ? sum\ : (sum ^ x) >= 0 \ ? sum \ : x < 0 ? MIN : MAX; \ } DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX) Before this patch: 10 │ sat_s_add_int64_t_fmt_1: 11 │ mv a5,a0 12 │ add a0,a0,a1 13 │ xor a1,a5,a1 14 │ not a1,a1 15 │ xor a4,a5,a0 16 │ and a1,a1,a4 17 │ blt a1,zero,.L5 18 │ ret 19 │ .L5: 20 │ srai a5,a5,63 21 │ li a0,-1 22 │ srli a0,a0,1 23 │ xor a0,a5,a0 24 │ ret After this patch: 10 │ sat_s_add_int64_t_fmt_1: 11 │ add a2,a0,a1 12 │ xor a1,a0,a1 13 │ xor a5,a0,a2 14 │ srli a5,a5,63 15 │ srli a1,a1,63 16 │ xori a1,a1,1 17 │ and a5,a5,a1 18 │ srai a4,a0,63 19 │ li a3,-1 20 │ srli a3,a3,1 21 │ xor a3,a3,a4 22 │ neg a4,a5 23 │ and a3,a3,a4 24 │ addi a5,a5,-1 25 │ and a0,a2,a5 26 │ or a0,a0,a3 27 │ ret The below test suites are passed for this patch: 1. The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/riscv-protos.h (riscv_expand_ssadd): Add new func decl for expanding ssadd. * config/riscv/riscv.cc (riscv_gen_sign_max_cst): Add new func impl to gen the max int rtx. (riscv_expand_ssadd): Add new func impl to expand the ssadd. * config/riscv/riscv.md (ssadd3): Add new pattern for signed integer .SAT_ADD. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_arith_data.h: Add test data. * gcc.target/riscv/sat_s_add-1.c: New test. * gcc.target/riscv/sat_s_add-2.c: New test. * gcc.target/riscv/sat_s_add-3.c: New test. * gcc.target/riscv/sat_s_add-4.c: New test. * gcc.target/riscv/sat_s_add-run-1.c: New test. * gcc.target/riscv/sat_s_add-run-2.c: New test. * gcc.target/riscv/sat_s_add-run-3.c: New test. * gcc.target/riscv/sat_s_add-run-4.c: New test. * gcc.target/riscv/scalar_sat_binary_run_xxx.h: New test. Signed-off-by: Pan Li --- gcc/config/riscv/riscv-protos.h | 1 + gcc/config/riscv/riscv.cc | 90 +++ gcc/config/riscv/riscv.md | 11 +++ gcc/testsuite/gcc.target/riscv/sat_arith.h| 17 .../gcc.target/riscv/sat_arith_data.h | 85 ++ gcc/testsuite/gcc.target/riscv/sat_s_add-1.c | 30 +++ gcc/testsuite/gcc.target/riscv/sat_s_add-2.c | 32 +++ gcc/testsuite/gcc.target/riscv/sat_s_add-3.c | 31 +++ gcc/testsuite/gcc.target/riscv/sat_s_add-4.c | 30 +++ .../gcc.target/riscv/sat_s_add-run-1.c| 16 .../gcc.target/riscv/sat_s_add-run-2.c| 16 .../gcc.target/riscv/sat_s_add-run-3.c| 16 .../gcc.target/riscv/sat_s_add-run-4.c| 16 .../riscv/scalar_sat_binary_run_xxx.h | 26 ++ 14 files changed, 417 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/scalar_sat_binary_run_xxx.h diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 926899ccad6..3358e3887b9 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -134,6 +134,7 @@ extern bool riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int); extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx); extern void riscv_expand_usadd (rtx, rtx, rtx); +extern void riscv_expand_ssadd (rtx, rtx, rtx); extern void riscv_expand_ussub (rtx, rtx, rtx); extern void riscv_expand_ustrunc (rtx, rtx); diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index e9b1b9bc3ad..e2b28a278f6 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11947,6 +11947,96 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y)
[PATCH v3] Vect: Reconcile the const_int operand type of unsigned .SAT_ADD
From: Pan Li The .SAT_ADD has 2 operand, when one of the operand may be INTEGER_CST. For example _1 = .SAT_ADD (_2, 9) comes from below sample code. Form 3: #define DEF_VEC_SAT_U_ADD_IMM_FMT_3(T, IMM) \ T __attribute__((noinline)) \ vec_sat_u_add_imm##IMM##_##T##_fmt_3 (T *out, T *in, unsigned limit) \ {\ unsigned i;\ T ret; \ for (i = 0; i < limit; i++)\ {\ out[i] = __builtin_add_overflow (in[i], IMM, &ret) ? -1 : ret; \ }\ } DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint64_t, 9) It will fail to vectorize as the vectorizable_call will check the operands is type_compatiable but the imm will be (const_int 9) with the SImode, which is different from _2 (DImode). Aka: uint64_t _1; uint64_t _2; _1 = .SAT_ADD (_2, 9); This patch would like to reconcile the imm operand to the operand type mode of _2 by fold_convert to make the vectorizable_call happy. The below test suites are passed for this patch: 1. The rv64gcv fully regression tests. 2. The x86 bootstrap tests. 3. The x86 fully regression tests. gcc/ChangeLog: * tree-vect-patterns.cc (vect_recog_sat_add_pattern): Add fold convert for const_int to the type of operand 0. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-1.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-10.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-11.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-12.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-13.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-14.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-15.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-2.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-3.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-4.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-5.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-6.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-7.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-8.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-9.c: New test. Signed-off-by: Pan Li --- .../binop/vec_sat_u_add_imm_reconcile-1.c | 9 + .../binop/vec_sat_u_add_imm_reconcile-10.c| 9 + .../binop/vec_sat_u_add_imm_reconcile-11.c| 9 + .../binop/vec_sat_u_add_imm_reconcile-12.c| 9 + .../binop/vec_sat_u_add_imm_reconcile-13.c| 9 + .../binop/vec_sat_u_add_imm_reconcile-14.c| 9 + .../binop/vec_sat_u_add_imm_reconcile-15.c| 9 + .../binop/vec_sat_u_add_imm_reconcile-2.c | 9 + .../binop/vec_sat_u_add_imm_reconcile-3.c | 9 + .../binop/vec_sat_u_add_imm_reconcile-4.c | 9 + .../binop/vec_sat_u_add_imm_reconcile-5.c | 9 + .../binop/vec_sat_u_add_imm_reconcile-6.c | 9 + .../binop/vec_sat_u_add_imm_reconcile-7.c | 9 + .../binop/vec_sat_u_add_imm_reconcile-8.c | 9 + .../binop/vec_sat_u_add_imm_reconcile-9.c | 9 + .../riscv/rvv/autovec/vec_sat_arith.h | 20 +++ gcc/tree-vect-patterns.cc | 3 +++ 17 files changed, 158 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-10.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-11.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-12.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-13.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-14.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-15.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-2.
[PATCH v4] Match: Support form 1 for scalar signed integer .SAT_ADD
From: Pan Li This patch would like to support the form 1 of the scalar signed integer .SAT_ADD. Aka below example: Form 1: #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \ T __attribute__((noinline)) \ sat_s_add_##T##_fmt_1 (T x, T y) \ {\ T sum = (UT)x + (UT)y; \ return (x ^ y) < 0 \ ? sum\ : (sum ^ x) >= 0 \ ? sum \ : x < 0 ? MIN : MAX; \ } DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX) We can tell the difference before and after this patch if backend implemented the ssadd3 pattern similar as below. Before this patch: 4 │ __attribute__((noinline)) 5 │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y) 6 │ { 7 │ int64_t sum; 8 │ long unsigned int x.0_1; 9 │ long unsigned int y.1_2; 10 │ long unsigned int _3; 11 │ long int _4; 12 │ long int _5; 13 │ int64_t _6; 14 │ _Bool _11; 15 │ long int _12; 16 │ long int _13; 17 │ long int _14; 18 │ long int _16; 19 │ long int _17; 20 │ 21 │ ;; basic block 2, loop depth 0 22 │ ;;pred: ENTRY 23 │ x.0_1 = (long unsigned int) x_7(D); 24 │ y.1_2 = (long unsigned int) y_8(D); 25 │ _3 = x.0_1 + y.1_2; 26 │ sum_9 = (int64_t) _3; 27 │ _4 = x_7(D) ^ y_8(D); 28 │ _5 = x_7(D) ^ sum_9; 29 │ _17 = ~_4; 30 │ _16 = _5 & _17; 31 │ if (_16 < 0) 32 │ goto ; [41.00%] 33 │ else 34 │ goto ; [59.00%] 35 │ ;;succ: 3 36 │ ;;4 37 │ 38 │ ;; basic block 3, loop depth 0 39 │ ;;pred: 2 40 │ _11 = x_7(D) < 0; 41 │ _12 = (long int) _11; 42 │ _13 = -_12; 43 │ _14 = _13 ^ 9223372036854775807; 44 │ ;;succ: 4 45 │ 46 │ ;; basic block 4, loop depth 0 47 │ ;;pred: 2 48 │ ;;3 49 │ # _6 = PHI 50 │ return _6; 51 │ ;;succ: EXIT 52 │ 53 │ } After this patch: 4 │ __attribute__((noinline)) 5 │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y) 6 │ { 7 │ int64_t _4; 8 │ 9 │ ;; basic block 2, loop depth 0 10 │ ;;pred: ENTRY 11 │ _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call] 12 │ return _4; 13 │ ;;succ: EXIT 14 │ 15 │ } The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add the matching for signed .SAT_ADD. * tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new matching func decl. (match_unsigned_saturation_add): Try signed .SAT_ADD and rename to ... (match_saturation_add): ... here. (math_opts_dom_walker::after_dom_children): Update the above renamed func from caller. Signed-off-by: Pan Li --- gcc/match.pd | 15 +++ gcc/tree-ssa-math-opts.cc | 35 ++- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index 78f1957e8c7..09a36159163 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3192,6 +3192,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) && types_match (type, @0 +/* Signed saturation add, case 1: + T sum = (T)((UT)X + (UT)Y) + SAT_S_ADD = (X ^ sum) & !(X ^ Y) < 0 ? (-(T)(X < 0) ^ MAX) : sum; + + The T and UT are type pair like T=int8_t, UT=uint8_t. */ +(match (signed_integer_sat_add @0 @1) + (cond^ (lt (bit_and:c (bit_xor:c @0 (nop_convert@2 (plus (nop_convert @0) + (nop_convert @1 + (bit_not (bit_xor:c @0 @1))) + integer_zerop) + (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value) + @2) + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) + && types_match (type, @0, @1 + /* Unsigned saturation sub, case 1 (branch with gt): SAT_U_SUB = X > Y ? X - Y : 0 */ (match (unsigned_integer_sat_sub @0 @1) diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index 8d96a4c964b..3c93fca5b53 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree)); extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree)); +extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree)); + static void build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
[PATCH v1 2/2] RISC-V: Add testcases for unsigned scalar .SAT_SUB IMM form 4
From: Pan Li This patch would like to add test cases for the unsigned scalar .SAT_SUB IMM form 4. Aka: Form 4: #define DEF_SAT_U_SUB_IMM_FMT_4(T, IMM) \ T __attribute__((noinline)) \ sat_u_sub_imm##IMM##_##T##_fmt_4 (T x) \ { \ return x > (T)IMM ? x - (T)IMM : 0; \ } DEF_SAT_U_SUB_IMM_FMT_4(uint64_t, 23) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_u_sub_imm-13.c: New test. * gcc.target/riscv/sat_u_sub_imm-13_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-13_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-14.c: New test. * gcc.target/riscv/sat_u_sub_imm-14_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-14_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-15.c: New test. * gcc.target/riscv/sat_u_sub_imm-15_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-15_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-16.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-13.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-14.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-15.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-16.c: New test. Signed-off-by: Pan Li --- gcc/testsuite/gcc.target/riscv/sat_arith.h| 9 +++ .../gcc.target/riscv/sat_u_sub_imm-13.c | 19 +++ .../gcc.target/riscv/sat_u_sub_imm-13_1.c | 19 +++ .../gcc.target/riscv/sat_u_sub_imm-13_2.c | 19 +++ .../gcc.target/riscv/sat_u_sub_imm-14.c | 20 +++ .../gcc.target/riscv/sat_u_sub_imm-14_1.c | 21 +++ .../gcc.target/riscv/sat_u_sub_imm-14_2.c | 22 .../gcc.target/riscv/sat_u_sub_imm-15.c | 19 +++ .../gcc.target/riscv/sat_u_sub_imm-15_1.c | 21 +++ .../gcc.target/riscv/sat_u_sub_imm-15_2.c | 22 .../gcc.target/riscv/sat_u_sub_imm-16.c | 18 ++ .../gcc.target/riscv/sat_u_sub_imm-run-13.c | 55 +++ .../gcc.target/riscv/sat_u_sub_imm-run-14.c | 55 +++ .../gcc.target/riscv/sat_u_sub_imm-run-15.c | 54 ++ .../gcc.target/riscv/sat_u_sub_imm-run-16.c | 48 15 files changed, 421 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13_1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13_2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-14.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-14_1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-14_2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-15.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-15_1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-15_2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-16.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-13.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-14.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-15.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-16.c diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index b4339eb0dff..a899979904b 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -238,6 +238,13 @@ sat_u_sub_imm##IMM##_##T##_fmt_3 (T y) \ return (T)IMM > y ? (T)IMM - y : 0; \ } +#define DEF_SAT_U_SUB_IMM_FMT_4(T, IMM) \ +T __attribute__((noinline)) \ +sat_u_sub_imm##IMM##_##T##_fmt_4 (T x) \ +{ \ + return x > (T)IMM ? x - (T)IMM : 0; \ +} + #define RUN_SAT_U_SUB_FMT_1(T, x, y) sat_u_sub_##T##_fmt_1(x, y) #define RUN_SAT_U_SUB_FMT_2(T, x, y) sat_u_sub_##T##_fmt_2(x, y) #define RUN_SAT_U_SUB_FMT_3(T, x, y) sat_u_sub_##T##_fmt_3(x, y) @@ -257,6 +264,8 @@ sat_u_sub_imm##IMM##_##T##_fmt_3 (T y) \ if (sat_u_sub_imm##IMM##_##T##_fmt_2(x) != expect) __builtin_abort () #define RUN_SAT_U_SUB_IMM_FMT_3(T, IMM, y, expect) \ if (sat_u_sub_imm##IMM##_##T##_fmt_3(y) != expect) __builtin_abort () +#define RUN_SAT_U_SUB_IMM_FMT_4(T, x, IMM, expect) \ + if (sat_u_sub_imm##IMM##_##T##_fmt_4(x) != expect) __builtin_abort () /**/ /* Saturation Truncate (unsigned and signed) */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13.c b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13.c new file mode 100644 index 000..7dcbc3b1a12 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-opt
[PATCH v1 1/2] RISC-V: Add testcases for unsigned scalar .SAT_SUB IMM form 3
From: Pan Li This patch would like to add test cases for the unsigned scalar .SAT_SUB IMM form 3. Aka: Form 3: #define DEF_SAT_U_SUB_IMM_FMT_3(T, IMM) \ T __attribute__((noinline)) \ sat_u_sub_imm##IMM##_##T##_fmt_3 (T y) \ { \ return (T)IMM > y ? (T)IMM - y : 0; \ } DEF_SAT_U_SUB_IMM_FMT_3(uint64_t, 23) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_u_sub_imm-10.c: New test. * gcc.target/riscv/sat_u_sub_imm-10_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-10_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-11.c: New test. * gcc.target/riscv/sat_u_sub_imm-11_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-11_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-12.c: New test. * gcc.target/riscv/sat_u_sub_imm-9.c: New test. * gcc.target/riscv/sat_u_sub_imm-9_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-9_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-10.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-11.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-12.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-9.c: New test. Signed-off-by: Pan Li --- gcc/testsuite/gcc.target/riscv/sat_arith.h| 9 +++ .../gcc.target/riscv/sat_u_sub_imm-10.c | 21 +++ .../gcc.target/riscv/sat_u_sub_imm-10_1.c | 22 .../gcc.target/riscv/sat_u_sub_imm-10_2.c | 22 .../gcc.target/riscv/sat_u_sub_imm-11.c | 20 +++ .../gcc.target/riscv/sat_u_sub_imm-11_1.c | 22 .../gcc.target/riscv/sat_u_sub_imm-11_2.c | 22 .../gcc.target/riscv/sat_u_sub_imm-12.c | 19 +++ .../gcc.target/riscv/sat_u_sub_imm-9.c| 20 +++ .../gcc.target/riscv/sat_u_sub_imm-9_1.c | 20 +++ .../gcc.target/riscv/sat_u_sub_imm-9_2.c | 20 +++ .../gcc.target/riscv/sat_u_sub_imm-run-10.c | 56 +++ .../gcc.target/riscv/sat_u_sub_imm-run-11.c | 55 ++ .../gcc.target/riscv/sat_u_sub_imm-run-12.c | 48 .../gcc.target/riscv/sat_u_sub_imm-run-9.c| 56 +++ 15 files changed, 432 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10_1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10_2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-11.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-11_1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-11_2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-12.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-9.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-9_1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-9_2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-10.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-11.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-12.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-9.c diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index c8ff8320d82..b4339eb0dff 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -231,6 +231,13 @@ sat_u_sub_imm##IMM##_##T##_fmt_2 (T x) \ return x >= (T)IMM ? x - (T)IMM : 0; \ } +#define DEF_SAT_U_SUB_IMM_FMT_3(T, IMM) \ +T __attribute__((noinline)) \ +sat_u_sub_imm##IMM##_##T##_fmt_3 (T y) \ +{ \ + return (T)IMM > y ? (T)IMM - y : 0; \ +} + #define RUN_SAT_U_SUB_FMT_1(T, x, y) sat_u_sub_##T##_fmt_1(x, y) #define RUN_SAT_U_SUB_FMT_2(T, x, y) sat_u_sub_##T##_fmt_2(x, y) #define RUN_SAT_U_SUB_FMT_3(T, x, y) sat_u_sub_##T##_fmt_3(x, y) @@ -248,6 +255,8 @@ sat_u_sub_imm##IMM##_##T##_fmt_2 (T x) \ if (sat_u_sub_imm##IMM##_##T##_fmt_1(y) != expect) __builtin_abort () #define RUN_SAT_U_SUB_IMM_FMT_2(T, x, IMM, expect) \ if (sat_u_sub_imm##IMM##_##T##_fmt_2(x) != expect) __builtin_abort () +#define RUN_SAT_U_SUB_IMM_FMT_3(T, IMM, y, expect) \ + if (sat_u_sub_imm##IMM##_##T##_fmt_3(y) != expect) __builtin_abort () /**/ /* Saturation Truncate (unsigned and signed) */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10.c b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10.c new file mode 100644 index 000..db450d7cfbf --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options
[PATCH v2] Vect: Reconcile the const_int operand type of unsigned .SAT_ADD
From: Pan Li The .SAT_ADD has 2 operand, when one of the operand may be INTEGER_CST. For example _1 = .SAT_ADD (_2, 9) comes from below sample code. Form 3: #define DEF_VEC_SAT_U_ADD_IMM_FMT_3(T, IMM) \ T __attribute__((noinline)) \ vec_sat_u_add_imm##IMM##_##T##_fmt_3 (T *out, T *in, unsigned limit) \ {\ unsigned i;\ T ret; \ for (i = 0; i < limit; i++)\ {\ out[i] = __builtin_add_overflow (in[i], IMM, &ret) ? -1 : ret; \ }\ } DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint64_t, 9) It will fail to vectorize as the vectorizable_call will check the operands is type_compatiable but the imm will be (const_int 9) with the SImode, which is different from _2 (DImode). Aka: uint64_t _1; uint64_t _2; _1 = .SAT_ADD (_2, 9); This patch would like to reconcile the imm operand to the operand type mode of _2 if and only if there is no precision/data loss. Aka convert the imm 9 to the DImode for above example. The below test suites are passed for this patch: 1. The rv64gcv fully regression tests. 2. The rv64gcv build with glibc. 3. The x86 bootstrap tests. 4. The x86 fully regression tests. gcc/ChangeLog: * tree-vect-patterns.cc (vect_recog_reconcile_cst_to_unsigned): Add new func impl to reconcile the cst int type to given TREE type. (vect_recog_sat_add_pattern): Reconcile the ops of .SAT_ADD before building the gimple call. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-1.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-10.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-11.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-12.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-13.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-14.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-15.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-2.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-3.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-4.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-5.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-6.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-7.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-8.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-9.c: New test. Signed-off-by: Pan Li --- .../binop/vec_sat_u_add_imm_reconcile-1.c | 9 + .../binop/vec_sat_u_add_imm_reconcile-10.c| 9 + .../binop/vec_sat_u_add_imm_reconcile-11.c| 9 + .../binop/vec_sat_u_add_imm_reconcile-12.c| 9 + .../binop/vec_sat_u_add_imm_reconcile-13.c| 9 + .../binop/vec_sat_u_add_imm_reconcile-14.c| 9 + .../binop/vec_sat_u_add_imm_reconcile-15.c| 9 + .../binop/vec_sat_u_add_imm_reconcile-2.c | 9 + .../binop/vec_sat_u_add_imm_reconcile-3.c | 9 + .../binop/vec_sat_u_add_imm_reconcile-4.c | 9 + .../binop/vec_sat_u_add_imm_reconcile-5.c | 9 + .../binop/vec_sat_u_add_imm_reconcile-6.c | 9 + .../binop/vec_sat_u_add_imm_reconcile-7.c | 9 + .../binop/vec_sat_u_add_imm_reconcile-8.c | 9 + .../binop/vec_sat_u_add_imm_reconcile-9.c | 9 + .../riscv/rvv/autovec/vec_sat_arith.h | 20 ++ gcc/tree-vect-patterns.cc | 38 +++ 17 files changed, 193 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-10.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-11.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-12.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-13.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-14.c create mode 100644 gcc/testsuite/gcc.tar
[PATCH v1] RISC-V: Support IMM for operand 1 of ussub pattern
From: Pan Li This patch would like to allow IMM for the operand 1 of ussub pattern. Aka .SAT_SUB(x, 22) as the below example. Form 2: #define DEF_SAT_U_SUB_IMM_FMT_2(T, IMM) \ T __attribute__((noinline)) \ sat_u_sub_imm##IMM##_##T##_fmt_2 (T x) \ { \ return x >= (T)IMM ? x - (T)IMM : 0; \ } DEF_SAT_U_SUB_IMM_FMT_2(uint64_t, 1022) It is almost the as support imm for operand 0 of ussub pattern, but allow the second operand to be imm insted of the first operand. The below test suites are passed for this patch: 1. The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_expand_ussub): Gen xmode for the second operand, aka y in parameter. * config/riscv/riscv.md (ussub3): Allow const_int for operand 2. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_u_sub_imm-5.c: New test. * gcc.target/riscv/sat_u_sub_imm-5_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-5_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-6.c: New test. * gcc.target/riscv/sat_u_sub_imm-6_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-6_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-7.c: New test. * gcc.target/riscv/sat_u_sub_imm-7_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-7_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-8.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-5.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-6.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-7.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-8.c: New test. Signed-off-by: Pan Li --- gcc/config/riscv/riscv.cc | 2 +- gcc/config/riscv/riscv.md | 2 +- gcc/testsuite/gcc.target/riscv/sat_arith.h| 9 +++ .../gcc.target/riscv/sat_u_sub_imm-5.c| 19 +++ .../gcc.target/riscv/sat_u_sub_imm-5_1.c | 19 +++ .../gcc.target/riscv/sat_u_sub_imm-5_2.c | 19 +++ .../gcc.target/riscv/sat_u_sub_imm-6.c| 20 +++ .../gcc.target/riscv/sat_u_sub_imm-6_1.c | 21 +++ .../gcc.target/riscv/sat_u_sub_imm-6_2.c | 22 .../gcc.target/riscv/sat_u_sub_imm-7.c| 19 +++ .../gcc.target/riscv/sat_u_sub_imm-7_1.c | 21 +++ .../gcc.target/riscv/sat_u_sub_imm-7_2.c | 22 .../gcc.target/riscv/sat_u_sub_imm-8.c| 18 ++ .../gcc.target/riscv/sat_u_sub_imm-run-5.c| 55 +++ .../gcc.target/riscv/sat_u_sub_imm-run-6.c| 55 +++ .../gcc.target/riscv/sat_u_sub_imm-run-7.c| 54 ++ .../gcc.target/riscv/sat_u_sub_imm-run-8.c| 48 17 files changed, 423 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-5_1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-5_2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-6.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-6_1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-6_2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7_1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7_2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-8.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-6.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-7.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-8.c diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 90a6e936558..1f544c1287e 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11965,7 +11965,7 @@ riscv_expand_ussub (rtx dest, rtx x, rtx y) { machine_mode mode = GET_MODE (dest); rtx xmode_x = riscv_gen_unsigned_xmode_reg (x, mode); - rtx xmode_y = gen_lowpart (Xmode, y); + rtx xmode_y = riscv_gen_unsigned_xmode_reg (y, mode); rtx xmode_lt = gen_reg_rtx (Xmode); rtx xmode_minus = gen_reg_rtx (Xmode); rtx xmode_dest = gen_reg_rtx (Xmode); diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index a94705a8e7c..3289ed2155a 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -4370,7 +4370,7 @@ (define_expand "usadd3" (define_expand "ussub3" [(match_operand:ANYI 0 "register_operand") (match_operand:ANYI 1 "reg_or_int_operand") - (match_operand:ANYI 2 "register_operand")] + (match_operand:ANYI 2 "reg_or_int_operand")] "" { riscv_expand_ussub (operands[0], operands[1], operands[2]); diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.
[PATCH v2] Match: Add int type fits check for .SAT_ADD imm operand
From: Pan Li This patch would like to add strict check for imm operand of .SAT_ADD matching. We have no type checking for imm operand in previous, which may result in unexpected IL to be catched by .SAT_ADD pattern. We leverage the int_fits_type_p here to make sure the imm operand is a int type fits the result type of the .SAT_ADD. For example: Fits uint8_t: uint8_t a; uint8_t sum = .SAT_ADD (a, 12); uint8_t sum = .SAT_ADD (a, 12u); uint8_t sum = .SAT_ADD (a, 126u); uint8_t sum = .SAT_ADD (a, 128u); uint8_t sum = .SAT_ADD (a, 228); uint8_t sum = .SAT_ADD (a, 223u); Not fits uint8_t: uint8_t a; uint8_t sum = .SAT_ADD (a, -1); uint8_t sum = .SAT_ADD (a, 256u); uint8_t sum = .SAT_ADD (a, 257); The below test suite are passed for this patch: * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add int_fits_type_p check for .SAT_ADD imm operand. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_u_add_imm-11.c: Adjust test case for imm. * gcc.target/riscv/sat_u_add_imm-11.c: Ditto. * gcc.target/riscv/sat_u_add_imm-12.c: Ditto. * gcc.target/riscv/sat_u_add_imm-15.c: Ditto. * gcc.target/riscv/sat_u_add_imm-16.c: Ditto. * gcc.target/riscv/sat_u_add_imm_type_check-1.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-10.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-11.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-12.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-13.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-14.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-15.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-16.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-17.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-18.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-19.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-2.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-20.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-21.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-22.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-23.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-24.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-25.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-26.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-27.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-28.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-29.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-3.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-30.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-31.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-32.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-33.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-34.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-35.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-36.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-37.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-38.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-39.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-4.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-40.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-41.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-42.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-43.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-44.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-45.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-46.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-47.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-48.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-49.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-5.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-50.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-51.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-52.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-6.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-7.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-8.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-9.c: New test. Signed-off-by: Pan Li --- gcc/match.pd | 2 +- gcc/testsuite/gcc.target/riscv/sat_arith.h | 16 ++
[PATCH v3] Match: Support form 1 for scalar signed integer .SAT_ADD
From: Pan Li This patch would like to support the form 1 of the scalar signed integer .SAT_ADD. Aka below example: Form 1: #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \ T __attribute__((noinline)) \ sat_s_add_##T##_fmt_1 (T x, T y) \ {\ T sum = (UT)x + (UT)y; \ return (x ^ y) < 0 \ ? sum\ : (sum ^ x) >= 0 \ ? sum \ : x < 0 ? MIN : MAX; \ } DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX) We can tell the difference before and after this patch if backend implemented the ssadd3 pattern similar as below. Before this patch: 4 │ __attribute__((noinline)) 5 │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y) 6 │ { 7 │ int64_t sum; 8 │ long unsigned int x.0_1; 9 │ long unsigned int y.1_2; 10 │ long unsigned int _3; 11 │ long int _4; 12 │ long int _5; 13 │ int64_t _6; 14 │ _Bool _11; 15 │ long int _12; 16 │ long int _13; 17 │ long int _14; 18 │ long int _16; 19 │ long int _17; 20 │ 21 │ ;; basic block 2, loop depth 0 22 │ ;;pred: ENTRY 23 │ x.0_1 = (long unsigned int) x_7(D); 24 │ y.1_2 = (long unsigned int) y_8(D); 25 │ _3 = x.0_1 + y.1_2; 26 │ sum_9 = (int64_t) _3; 27 │ _4 = x_7(D) ^ y_8(D); 28 │ _5 = x_7(D) ^ sum_9; 29 │ _17 = ~_4; 30 │ _16 = _5 & _17; 31 │ if (_16 < 0) 32 │ goto ; [41.00%] 33 │ else 34 │ goto ; [59.00%] 35 │ ;;succ: 3 36 │ ;;4 37 │ 38 │ ;; basic block 3, loop depth 0 39 │ ;;pred: 2 40 │ _11 = x_7(D) < 0; 41 │ _12 = (long int) _11; 42 │ _13 = -_12; 43 │ _14 = _13 ^ 9223372036854775807; 44 │ ;;succ: 4 45 │ 46 │ ;; basic block 4, loop depth 0 47 │ ;;pred: 2 48 │ ;;3 49 │ # _6 = PHI 50 │ return _6; 51 │ ;;succ: EXIT 52 │ 53 │ } After this patch: 4 │ __attribute__((noinline)) 5 │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y) 6 │ { 7 │ int64_t _4; 8 │ 9 │ ;; basic block 2, loop depth 0 10 │ ;;pred: ENTRY 11 │ _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call] 12 │ return _4; 13 │ ;;succ: EXIT 14 │ 15 │ } The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add the matching for signed .SAT_ADD. * tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new matching func decl. (match_unsigned_saturation_add): Try signed .SAT_ADD and rename to ... (match_saturation_add): ... here. (math_opts_dom_walker::after_dom_children): Update the above renamed func from caller. Signed-off-by: Pan Li --- gcc/match.pd | 18 ++ gcc/tree-ssa-math-opts.cc | 35 ++- 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index 78f1957e8c7..b059e313415 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3192,6 +3192,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) && types_match (type, @0 +/* Signed saturation add, case 1: + T sum = (UT)X + (UT)Y; + SAT_S_ADD = (X ^ Y) < 0 + ? sum + : (sum ^ x) >= 0 + ? sum + : x < 0 ? MIN : MAX; + T and UT are type pair like T=int8_t, UT=uint8_t. */ +(match (signed_integer_sat_add @0 @1) + (cond^ (lt (bit_and:c (bit_xor:c @0 (convert@2 (plus:c (convert @0) + (convert @1 + (bit_not (bit_xor:c @0 @1))) + integer_zerop) + (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value) + @2) + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) + && types_match (type, @0, @1 + /* Unsigned saturation sub, case 1 (branch with gt): SAT_U_SUB = X > Y ? X - Y : 0 */ (match (unsigned_integer_sat_sub @0 @1) diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index 8d96a4c964b..3c93fca5b53 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree)); extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree)); +extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree)); + static void build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, inte
[PATCH v1 2/2] RISC-V: Add testcases for unsigned vector .SAT_TRUNC form 4
From: Pan Li This patch would like to add test cases for the unsigned vector .SAT_TRUNC form 4. Aka: Form 4: #define DEF_VEC_SAT_U_TRUNC_FMT_4(NT, WT) \ void __attribute__((noinline))\ vec_sat_u_trunc_##NT##_##WT##_fmt_4 (NT *out, WT *in, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ bool not_overflow = in[i] <= (WT)(NT)(-1); \ out[i] = ((NT)in[i]) | (NT)((NT)not_overflow - 1); \ } \ } DEF_VEC_SAT_U_TRUNC_FMT_4 (uint32_t, uint64_t) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-20.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-21.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-22.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-23.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-24.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-19.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-20.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-21.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-22.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-23.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-24.c: New test. Signed-off-by: Pan Li --- .../rvv/autovec/unop/vec_sat_u_trunc-19.c | 19 +++ .../rvv/autovec/unop/vec_sat_u_trunc-20.c | 21 + .../rvv/autovec/unop/vec_sat_u_trunc-21.c | 23 +++ .../rvv/autovec/unop/vec_sat_u_trunc-22.c | 19 +++ .../rvv/autovec/unop/vec_sat_u_trunc-23.c | 21 + .../rvv/autovec/unop/vec_sat_u_trunc-24.c | 19 +++ .../rvv/autovec/unop/vec_sat_u_trunc-run-19.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-20.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-21.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-22.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-23.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-24.c | 16 + .../riscv/rvv/autovec/vec_sat_arith.h | 18 +++ 13 files changed, 236 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-20.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-21.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-22.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-23.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-24.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-19.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-20.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-21.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-22.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-23.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-24.c diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c new file mode 100644 index 000..a80cefe46ab --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint8_t_uint16_t_fmt_4: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*mf2,\s*ta,\s*ma +** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_4 (uint8_t, uint16_t) + +/*
[PATCH v1 1/2] RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 4
From: Pan Li This patch would like to add test cases for the unsigned scalar quad and oct .SAT_TRUNC form 4. Aka: Form 4: #define DEF_SAT_U_TRUNC_FMT_4(NT, WT) \ NT __attribute__((noinline)) \ sat_u_trunc_##WT##_to_##NT##_fmt_4 (WT x) \ { \ bool not_overflow = x <= (WT)(NT)(-1); \ return ((NT)x) | (NT)((NT)not_overflow - 1); \ } The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_u_trunc-19.c: New test. * gcc.target/riscv/sat_u_trunc-20.c: New test. * gcc.target/riscv/sat_u_trunc-21.c: New test. * gcc.target/riscv/sat_u_trunc-22.c: New test. * gcc.target/riscv/sat_u_trunc-23.c: New test. * gcc.target/riscv/sat_u_trunc-24.c: New test. * gcc.target/riscv/sat_u_trunc-run-19.c: New test. * gcc.target/riscv/sat_u_trunc-run-20.c: New test. * gcc.target/riscv/sat_u_trunc-run-21.c: New test. * gcc.target/riscv/sat_u_trunc-run-22.c: New test. * gcc.target/riscv/sat_u_trunc-run-23.c: New test. * gcc.target/riscv/sat_u_trunc-run-24.c: New test. Signed-off-by: Pan Li --- gcc/testsuite/gcc.target/riscv/sat_arith.h| 12 +++ .../gcc.target/riscv/sat_u_trunc-19.c | 17 .../gcc.target/riscv/sat_u_trunc-20.c | 20 +++ .../gcc.target/riscv/sat_u_trunc-21.c | 19 ++ .../gcc.target/riscv/sat_u_trunc-22.c | 17 .../gcc.target/riscv/sat_u_trunc-23.c | 17 .../gcc.target/riscv/sat_u_trunc-24.c | 20 +++ .../gcc.target/riscv/sat_u_trunc-run-19.c | 16 +++ .../gcc.target/riscv/sat_u_trunc-run-20.c | 16 +++ .../gcc.target/riscv/sat_u_trunc-run-21.c | 16 +++ .../gcc.target/riscv/sat_u_trunc-run-22.c | 16 +++ .../gcc.target/riscv/sat_u_trunc-run-23.c | 16 +++ .../gcc.target/riscv/sat_u_trunc-run-24.c | 16 +++ 13 files changed, 218 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-19.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-20.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-21.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-22.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-23.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-24.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-19.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-20.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-21.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-22.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-23.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-24.c diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index 91853b60f59..229e1f0a5cd 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -245,6 +245,15 @@ sat_u_trunc_##WT##_to_##NT##_fmt_3 (WT x) \ } #define DEF_SAT_U_TRUNC_FMT_3_WRAP(NT, WT) DEF_SAT_U_TRUNC_FMT_3(NT, WT) +#define DEF_SAT_U_TRUNC_FMT_4(NT, WT) \ +NT __attribute__((noinline)) \ +sat_u_trunc_##WT##_to_##NT##_fmt_4 (WT x) \ +{ \ + bool not_overflow = x <= (WT)(NT)(-1); \ + return ((NT)x) | (NT)((NT)not_overflow - 1); \ +} +#define DEF_SAT_U_TRUNC_FMT_4_WRAP(NT, WT) DEF_SAT_U_TRUNC_FMT_4(NT, WT) + #define RUN_SAT_U_TRUNC_FMT_1(NT, WT, x) sat_u_trunc_##WT##_to_##NT##_fmt_1 (x) #define RUN_SAT_U_TRUNC_FMT_1_WRAP(NT, WT, x) RUN_SAT_U_TRUNC_FMT_1(NT, WT, x) @@ -254,4 +263,7 @@ sat_u_trunc_##WT##_to_##NT##_fmt_3 (WT x) \ #define RUN_SAT_U_TRUNC_FMT_3(NT, WT, x) sat_u_trunc_##WT##_to_##NT##_fmt_3 (x) #define RUN_SAT_U_TRUNC_FMT_3_WRAP(NT, WT, x) RUN_SAT_U_TRUNC_FMT_3(NT, WT, x) +#define RUN_SAT_U_TRUNC_FMT_4(NT, WT, x) sat_u_trunc_##WT##_to_##NT##_fmt_4 (x) +#define RUN_SAT_U_TRUNC_FMT_4_WRAP(NT, WT, x) RUN_SAT_U_TRUNC_FMT_4(NT, WT, x) + #endif diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-19.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-19.c new file mode 100644 index 000..e61faffbbc6 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-19.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_trunc_uint16_t_to_uint8_t_fmt_4: +** sltiu\s+[atx][0-9]+,\s*a0,\s*255 +** addi\s+[atx][0-9]+,\s*[atx]
[PATCH v1] Match: Add type check for .SAT_ADD imm operand
From: Pan Li This patch would like to add strict check for imm operand of .SAT_ADD matching. We have no type checking for imm operand in previous, which may result in unexpected IL to be catched by .SAT_ADD pattern. However, things may become more complicated due to the int promotion. This means any const_int without any suffix will be promoted to int before matching. For example as below. uint8_t a; uint8_t sum = .SAT_ADD (a, 12); The second operand will be (const_int 12) with int type when try to match .SAT_ADD. Thus, to support int8/int16 .SAT_ADD, only the int32 and int64 will be strictly checked. The below test suite are passed for this patch: * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: * match.pd: Add strict type check for .SAT_ADD imm operand. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_u_add_imm-11.c: Adjust test case for imm. * gcc.target/riscv/sat_u_add_imm-12.c: Ditto. * gcc.target/riscv/sat_u_add_imm-15.c: Ditto. * gcc.target/riscv/sat_u_add_imm-16.c: Ditto. * gcc.target/riscv/sat_u_add_imm_type_check-1.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-2.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-3.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-4.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-5.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-6.c: New test. Signed-off-by: Pan Li --- gcc/match.pd | 11 ++- gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c | 4 ++-- gcc/testsuite/gcc.target/riscv/sat_u_add_imm-12.c | 4 ++-- gcc/testsuite/gcc.target/riscv/sat_u_add_imm-15.c | 4 ++-- gcc/testsuite/gcc.target/riscv/sat_u_add_imm-16.c | 4 ++-- .../gcc.target/riscv/sat_u_add_imm_type_check-1.c | 9 + .../gcc.target/riscv/sat_u_add_imm_type_check-2.c | 9 + .../gcc.target/riscv/sat_u_add_imm_type_check-3.c | 9 + .../gcc.target/riscv/sat_u_add_imm_type_check-4.c | 9 + .../gcc.target/riscv/sat_u_add_imm_type_check-5.c | 9 + .../gcc.target/riscv/sat_u_add_imm_type_check-6.c | 9 + 11 files changed, 72 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-6.c diff --git a/gcc/match.pd b/gcc/match.pd index 65a3aae2243..f695790629e 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3190,7 +3190,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (cond^ (ne (imagpart (IFN_ADD_OVERFLOW@2 @0 INTEGER_CST@1)) integer_zerop) integer_minus_onep (realpart @2)) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) - && types_match (type, @0 + && types_match (type, @0)) + (with +{ + unsigned precision = TYPE_PRECISION (type); + unsigned int_precision = HOST_BITS_PER_INT; +} +/* The const_int will perform int promotion, the const_int will have at + least the int_precision. Thus, type less than int_precision will be + skipped the type match checking. */ +(if (precision < int_precision || types_match (type, @1)) /* Unsigned saturation sub, case 1 (branch with gt): SAT_U_SUB = X > Y ? X - Y : 0 */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c index 43f34b5f3c9..a246e9b1857 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c @@ -5,7 +5,7 @@ #include "sat_arith.h" /* -** sat_u_add_imm7_uint32_t_fmt_3: +** sat_u_add_imm7u_uint32_t_fmt_3: ** slli\s+[atx][0-9]+,\s*a0,\s*32 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32 ** addi\s+[atx][0-9]+,\s*a0,\s*7 @@ -17,6 +17,6 @@ ** sext.w\s+a0,\s*a0 ** ret */ -DEF_SAT_U_ADD_IMM_FMT_3(uint32_t, 7) +DEF_SAT_U_ADD_IMM_FMT_3(uint32_t, 7u) /* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-12.c b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-12.c index 561c127f5fa..143f14c3af0 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-12.c +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-12.c @@ -5,13 +5,13 @@ #include "sat_arith.h" /* -** sat_u_add_imm8_uint64_t_fmt_3: +** sat_u_add_imm8ull_uint64_t_fmt_3: ** addi\s+[atx][0-9]+,\s*a0,\s*8 ** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ ** neg\s+[atx][0-9]+,\s*[atx][0-9]+ ** or\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+ *
[PATCH v1 2/2] RISC-V: Add testcases for unsigned vector .SAT_TRUNC form 3
From: Pan Li This patch would like to add test cases for the unsigned vector .SAT_TRUNC form 3. Aka: Form 3: #define DEF_VEC_SAT_U_TRUNC_FMT_3(NT, WT) \ void __attribute__((noinline))\ vec_sat_u_trunc_##NT##_##WT##_fmt_3 (NT *out, WT *in, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ WT max = (WT)(NT)-1;\ out[i] = in[i] <= max ? (NT)in[i] : (NT)max;\ } \ } DEF_VEC_SAT_U_TRUNC_FMT_3 (uint32_t, uint64_t) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-14.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-15.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-17.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-18.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-13.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-14.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-15.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-17.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-18.c: New test. Signed-off-by: Pan Li --- .../rvv/autovec/unop/vec_sat_u_trunc-13.c | 19 +++ .../rvv/autovec/unop/vec_sat_u_trunc-14.c | 21 + .../rvv/autovec/unop/vec_sat_u_trunc-15.c | 23 +++ .../rvv/autovec/unop/vec_sat_u_trunc-16.c | 19 +++ .../rvv/autovec/unop/vec_sat_u_trunc-17.c | 21 + .../rvv/autovec/unop/vec_sat_u_trunc-18.c | 19 +++ .../rvv/autovec/unop/vec_sat_u_trunc-run-13.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-14.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-15.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-16.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-17.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-18.c | 16 + .../riscv/rvv/autovec/vec_sat_arith.h | 18 +++ 13 files changed, 236 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-14.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-15.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-16.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-17.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-18.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-13.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-14.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-15.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-16.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-17.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-18.c diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c new file mode 100644 index 000..49bdbdc3606 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint8_t_uint16_t_fmt_3: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*mf2,\s*ta,\s*ma +** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_3 (uint8_t, uint16_t) + +/*
[PATCH v1 1/2] RISC-V: Add testcases for unsigned vector .SAT_TRUNC form 2
From: Pan Li This patch would like to add test cases for the unsigned vector .SAT_TRUNC form 2. Aka: Form 2: #define DEF_VEC_SAT_U_TRUNC_FMT_2(NT, WT) \ void __attribute__((noinline))\ vec_sat_u_trunc_##NT##_##WT##_fmt_2 (NT *out, WT *in, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ WT max = (WT)(NT)-1;\ out[i] = in[i] > max ? (NT)max : (NT)in[i]; \ } \ } DEF_VEC_SAT_U_TRUNC_FMT_2 (uint32_t, uint64_t) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-11.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-12.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-7.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-9.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-10.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-11.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-12.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-7.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-9.c: New test. Signed-off-by: Pan Li --- .../rvv/autovec/unop/vec_sat_u_trunc-10.c | 19 +++ .../rvv/autovec/unop/vec_sat_u_trunc-11.c | 21 + .../rvv/autovec/unop/vec_sat_u_trunc-12.c | 19 +++ .../rvv/autovec/unop/vec_sat_u_trunc-7.c | 19 +++ .../rvv/autovec/unop/vec_sat_u_trunc-8.c | 21 + .../rvv/autovec/unop/vec_sat_u_trunc-9.c | 23 +++ .../rvv/autovec/unop/vec_sat_u_trunc-run-10.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-11.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-12.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-7.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-8.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-9.c | 16 + .../riscv/rvv/autovec/vec_sat_arith.h | 18 +++ 13 files changed, 236 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-11.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-12.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-7.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-8.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-9.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-10.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-11.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-12.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-7.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-8.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-9.c diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c new file mode 100644 index 000..f5084e503eb --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint16_t_uint32_t_fmt_2: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*mf2,\s*ta,\s*ma +** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_2 (uint16_t, uint32_t) + +/* { dg-fin
[PATCH v1] RISC-V: Fix one typo in .SAT_TRUNC test func name [NFC]
From: Pan Li Fix one typo `sat_truc` to `sat_trunc`, as well as `SAT_TRUC` to `SAT_TRUNC`. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Fix SAT_TRUNC typo. * gcc.target/riscv/sat_u_trunc-1.c: Ditto. * gcc.target/riscv/sat_u_trunc-13.c: Ditto. * gcc.target/riscv/sat_u_trunc-14.c: Ditto. * gcc.target/riscv/sat_u_trunc-15.c: Ditto. * gcc.target/riscv/sat_u_trunc-2.c: Ditto. * gcc.target/riscv/sat_u_trunc-3.c: Ditto. * gcc.target/riscv/sat_u_trunc-4.c: Ditto. * gcc.target/riscv/sat_u_trunc-5.c: Ditto. * gcc.target/riscv/sat_u_trunc-6.c: Ditto. * gcc.target/riscv/sat_u_trunc-7.c: Ditto. * gcc.target/riscv/sat_u_trunc-8.c: Ditto. * gcc.target/riscv/sat_u_trunc-9.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-1.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-13.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-14.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-15.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-2.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-3.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-4.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-5.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-6.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-7.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-8.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-9.c: Ditto. Signed-off-by: Pan Li --- gcc/testsuite/gcc.target/riscv/sat_arith.h| 30 +-- .../gcc.target/riscv/sat_u_trunc-1.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-13.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-14.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-15.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-2.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-3.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-4.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-5.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-6.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-7.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-8.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-9.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-run-1.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-run-13.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-run-14.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-run-15.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-run-2.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-run-3.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-run-4.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-run-5.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-run-6.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-run-7.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-run-8.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-run-9.c | 4 +-- 25 files changed, 63 insertions(+), 63 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index cf055410fd1..91853b60f59 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -218,40 +218,40 @@ sat_u_sub_##T##_fmt_12 (T x, T y) \ /* Saturation Truncate (unsigned and signed) */ /**/ -#define DEF_SAT_U_TRUC_FMT_1(NT, WT) \ +#define DEF_SAT_U_TRUNC_FMT_1(NT, WT)\ NT __attribute__((noinline)) \ -sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \ +sat_u_trunc_##WT##_to_##NT##_fmt_1 (WT x) \ {\ bool overflow = x > (WT)(NT)(-1); \ return ((NT)x) | (NT)-overflow;\ } -#define DEF_SAT_U_TRUC_FMT_1_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_1(NT, WT) +#define DEF_SAT_U_TRUNC_FMT_1_WRAP(NT, WT) DEF_SAT_U_TRUNC_FMT_1(NT, WT) -#define DEF_SAT_U_TRUC_FMT_2(NT, WT) \ +#define DEF_SAT_U_TRUNC_FMT_2(NT, WT)\ NT __attribute__((noinline)) \ -sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \ +sat_u_trunc_##WT##_to_##NT##_fmt_2 (WT x) \ {\ WT max = (WT)(NT)-1; \ return x > max ? (NT) max : (NT)x; \ } -#define DEF_SAT_U_TRUC_FMT_2_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_2(NT, WT) +#define DEF_SAT_U_TRUNC_FMT_2_WRAP(NT, WT) DEF_SAT_U_TRUNC_FMT_2(NT, WT) -#define DEF_SAT_U_TRUC_FMT_3(NT, WT) \ +#define DEF_SAT_U_TRUNC_FMT_3(NT, WT)\ NT __attribute__((noinline)) \ -sat_u_truc_##WT##_to_##NT##_fmt_3 (WT x) \ +sat_u_trunc_##WT##_to_##NT##_fmt_3 (WT x) \ {\ WT max = (WT)(NT)-1; \ return x <= max ? (NT)x : (NT) max;\ } -#define DEF_SAT_U_TRUC_FMT_3_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_3(NT, WT) +#define DEF_SAT_U_TRUNC_FMT_3_WRAP(NT, WT) DEF_SAT_U_TRUNC_FMT_3(NT, WT) -#define RUN_SAT_U_TRUC_FMT_1(NT, WT, x) sat_u
[PATCH v1] Match: Support form 4 for unsigned integer .SAT_TRUNC
From: Pan Li This patch would like to support the form 4 of the unsigned integer .SAT_TRUNC. Aka below example: Form 4: #define DEF_SAT_U_TRUC_FMT_4(NT, WT) \ NT __attribute__((noinline)) \ sat_u_truc_##WT##_to_##NT##_fmt_4 (WT x) \ { \ bool not_overflow = x <= (WT)(NT)(-1); \ return ((NT)x) | (NT)((NT)not_overflow - 1); \ } DEF_SAT_U_TRUC_FMT_4(uint32_t, uint64_t) Before this patch: 4 │ __attribute__((noinline)) 5 │ uint8_t sat_u_truc_uint32_t_to_uint8_t_fmt_4 (uint32_t x) 6 │ { 7 │ _Bool not_overflow; 8 │ unsigned char _1; 9 │ unsigned char _2; 10 │ unsigned char _3; 11 │ uint8_t _6; 12 │ 13 │ ;; basic block 2, loop depth 0 14 │ ;;pred: ENTRY 15 │ not_overflow_5 = x_4(D) <= 255; 16 │ _1 = (unsigned char) x_4(D); 17 │ _2 = (unsigned char) not_overflow_5; 18 │ _3 = _2 + 255; 19 │ _6 = _1 | _3; 20 │ return _6; 21 │ ;;succ: EXIT 22 │ 23 │ } After this patch: 4 │ __attribute__((noinline)) 5 │ uint8_t sat_u_truc_uint32_t_to_uint8_t_fmt_4 (uint32_t x) 6 │ { 7 │ uint8_t _6; 8 │ 9 │ ;; basic block 2, loop depth 0 10 │ ;;pred: ENTRY 11 │ _6 = .SAT_TRUNC (x_4(D)); [tail call] 12 │ return _6; 13 │ ;;succ: EXIT 14 │ 15 │ } The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add form 4 for unsigned .SAT_TRUNC matching. Signed-off-by: Pan Li --- gcc/match.pd | 18 ++ 1 file changed, 18 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index c9c8478d286..5a2f777e65f 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3311,6 +3311,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) } (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst)) +/* Unsigned saturation truncate, case 3, sizeof (WT) > sizeof (NT). + SAT_U_TRUNC = (NT)X | ((NT)(X <= (WT)-1) + (NT)-1). */ +(match (unsigned_integer_sat_trunc @0) + (bit_ior:c (plus:c (convert (le @0 INTEGER_CST@1)) INTEGER_CST@2) + (convert @0)) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && TYPE_UNSIGNED (TREE_TYPE (@0))) + (with + { + unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0)); + unsigned otype_precision = TYPE_PRECISION (type); + wide_int trunc_max = wi::mask (otype_precision, false, itype_precision); + wide_int max = wi::mask (otype_precision, false, otype_precision); + wide_int int_cst_1 = wi::to_wide (@1); + wide_int int_cst_2 = wi::to_wide (@2); + } + (if (wi::eq_p (trunc_max, int_cst_1) && wi::eq_p (max, int_cst_2)) + /* x > y && x != XXX_MIN --> x > y x > y && x == XXX_MIN --> false . */ (for eqne (eq ne) -- 2.43.0
[PATCH v3] RISC-V: Support IMM for operand 0 of ussub pattern
From: Pan Li This patch would like to allow IMM for the operand 0 of ussub pattern. Aka .SAT_SUB(1023, y) as the below example. Form 1: #define DEF_SAT_U_SUB_IMM_FMT_1(T, IMM) \ T __attribute__((noinline)) \ sat_u_sub_imm##IMM##_##T##_fmt_1 (T y) \ { \ return (T)IMM >= y ? (T)IMM - y : 0; \ } DEF_SAT_U_SUB_IMM_FMT_1(uint64_t, 1023) Before this patch: 10 │ sat_u_sub_imm82_uint64_t_fmt_1: 11 │ li a5,82 12 │ bgtua0,a5,.L3 13 │ sub a0,a5,a0 14 │ ret 15 │ .L3: 16 │ li a0,0 17 │ ret After this patch: 10 │ sat_u_sub_imm82_uint64_t_fmt_1: 11 │ li a5,82 12 │ sltua4,a5,a0 13 │ addia4,a4,-1 14 │ sub a0,a5,a0 15 │ and a0,a4,a0 16 │ ret The below test suites are passed for this patch: 1. The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_gen_unsigned_xmode_reg): Add new func impl to gen xmode rtx reg from operand rtx. (riscv_expand_ussub): Gen xmode reg for operand 1. * config/riscv/riscv.md: Allow const_int for operand 1. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macro. * gcc.target/riscv/sat_u_sub_imm-1.c: New test. * gcc.target/riscv/sat_u_sub_imm-1_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-1_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-2.c: New test. * gcc.target/riscv/sat_u_sub_imm-2_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-2_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-3.c: New test. * gcc.target/riscv/sat_u_sub_imm-3_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-3_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-4.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-1.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-2.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-3.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-4.c: New test. Signed-off-by: Pan Li --- gcc/config/riscv/riscv.cc | 46 ++- gcc/config/riscv/riscv.md | 2 +- gcc/testsuite/gcc.target/riscv/sat_arith.h| 10 .../gcc.target/riscv/sat_u_sub_imm-1.c| 20 +++ .../gcc.target/riscv/sat_u_sub_imm-1_1.c | 20 +++ .../gcc.target/riscv/sat_u_sub_imm-1_2.c | 20 +++ .../gcc.target/riscv/sat_u_sub_imm-2.c| 21 +++ .../gcc.target/riscv/sat_u_sub_imm-2_1.c | 21 +++ .../gcc.target/riscv/sat_u_sub_imm-2_2.c | 22 .../gcc.target/riscv/sat_u_sub_imm-3.c| 20 +++ .../gcc.target/riscv/sat_u_sub_imm-3_1.c | 21 +++ .../gcc.target/riscv/sat_u_sub_imm-3_2.c | 22 .../gcc.target/riscv/sat_u_sub_imm-4.c| 19 +++ .../gcc.target/riscv/sat_u_sub_imm-run-1.c| 56 +++ .../gcc.target/riscv/sat_u_sub_imm-run-2.c| 56 +++ .../gcc.target/riscv/sat_u_sub_imm-run-3.c| 55 ++ .../gcc.target/riscv/sat_u_sub_imm-run-4.c| 48 17 files changed, 477 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-4.c diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index f266c45ed4d..5e6f3ba10e4 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11893,6 +11893,50 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y) emit_move_insn (dest, gen_lowpart (mode, xmode_dest)); } +/* Generate a REG rtx of Xmode from the given rtx and mode. + The rtx x can be REG (QI/HI/SI/DI) or const_int. + The machine_mode mode is the original mode from define pattern. + + If rtx is REG, the gen_lowpart of Xmode will be returned. + + If rtx is const_int, a new REG rtx will be created to hold the value of + const_int and then returned. + + According to the gccint doc, the constants generated for modes with fewer + bits than in HOST_WIDE_INT
[PATCH v2] Test: Move pr116278 run test to dg/torture [NFC]
From: Pan Li Move the run test of pr116278 to dg/torture and leave the risc-v the asm check under risc-v part. PR target/116278 gcc/testsuite/ChangeLog: * gcc.target/riscv/pr116278-run-1.c: Take compile instead of run. * gcc.target/riscv/pr116278-run-2.c: Ditto. * gcc.dg/torture/pr116278-run-1.c: New test. * gcc.dg/torture/pr116278-run-2.c: New test. Signed-off-by: Pan Li --- gcc/testsuite/gcc.dg/torture/pr116278-run-1.c | 19 +++ gcc/testsuite/gcc.dg/torture/pr116278-run-2.c | 19 +++ .../gcc.target/riscv/pr116278-run-1.c | 2 +- .../gcc.target/riscv/pr116278-run-2.c | 2 +- 4 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/torture/pr116278-run-1.c create mode 100644 gcc/testsuite/gcc.dg/torture/pr116278-run-2.c diff --git a/gcc/testsuite/gcc.dg/torture/pr116278-run-1.c b/gcc/testsuite/gcc.dg/torture/pr116278-run-1.c new file mode 100644 index 000..8e07fb6af29 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr116278-run-1.c @@ -0,0 +1,19 @@ +/* { dg-do run } */ +/* { dg-require-effective-target int32 } */ +/* { dg-options "-O2" } */ + +#include + +int8_t b[1]; +int8_t *d = b; +int32_t c; + +int main() { + b[0] = -40; + uint16_t t = (uint16_t)d[0]; + + c = (t < 0xFFF6 ? t : 0xFFF6) + 9; + + if (c != 65505) +__builtin_abort (); +} diff --git a/gcc/testsuite/gcc.dg/torture/pr116278-run-2.c b/gcc/testsuite/gcc.dg/torture/pr116278-run-2.c new file mode 100644 index 000..d85e21531e1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr116278-run-2.c @@ -0,0 +1,19 @@ +/* { dg-do run } */ +/* { dg-require-effective-target int32 } */ +/* { dg-options "-O2" } */ + +#include + +int16_t b[1]; +int16_t *d = b; +int64_t c; + +int main() { + b[0] = -40; + uint32_t t = (uint32_t)d[0]; + + c = (t < 0xFFF6u ? t : 0xFFF6u) + 9; + + if (c != 4294967265) +__builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c index d3812bdcdfb..c758fca7975 100644 --- a/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c +++ b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c @@ -1,4 +1,4 @@ -/* { dg-do run { target { riscv_v } } } */ +/* { dg-do compile } */ /* { dg-options "-O2 -fdump-rtl-expand-details" } */ #include diff --git a/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c b/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c index 669cd4f003f..a4da8a323f0 100644 --- a/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c +++ b/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c @@ -1,4 +1,4 @@ -/* { dg-do run { target { riscv_v } } } */ +/* { dg-do compile } */ /* { dg-options "-O2 -fdump-rtl-expand-details" } */ #include -- 2.43.0
[PATCH v1 1/2] RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 2
From: Pan Li This patch would like to add test cases for the unsigned scalar quad and oct .SAT_TRUNC form 2. Aka: Form 2: #define DEF_SAT_U_TRUC_FMT_2(NT, WT) \ NT __attribute__((noinline)) \ sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \ {\ WT max = (WT)(NT)-1; \ return x > max ? (NT) max : (NT)x; \ } QUAD: DEF_SAT_U_TRUC_FMT_2 (uint16_t, uint64_t) DEF_SAT_U_TRUC_FMT_2 (uint8_t, uint32_t) OCT: DEF_SAT_U_TRUC_FMT_2 (uint8_t, uint64_t) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_u_trunc-10.c: New test. * gcc.target/riscv/sat_u_trunc-11.c: New test. * gcc.target/riscv/sat_u_trunc-12.c: New test. * gcc.target/riscv/sat_u_trunc-run-10.c: New test. * gcc.target/riscv/sat_u_trunc-run-11.c: New test. * gcc.target/riscv/sat_u_trunc-run-12.c: New test. Signed-off-by: Pan Li --- .../gcc.target/riscv/sat_u_trunc-10.c | 17 .../gcc.target/riscv/sat_u_trunc-11.c | 17 .../gcc.target/riscv/sat_u_trunc-12.c | 20 +++ .../gcc.target/riscv/sat_u_trunc-run-10.c | 16 +++ .../gcc.target/riscv/sat_u_trunc-run-11.c | 16 +++ .../gcc.target/riscv/sat_u_trunc-run-12.c | 16 +++ 6 files changed, 102 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-11.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-12.c diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c new file mode 100644 index 000..7dfc740c54f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_truc_uint32_t_to_uint8_t_fmt_2: +** sltiu\s+[atx][0-9]+,\s*a0,\s*255 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff +** ret +*/ +DEF_SAT_U_TRUC_FMT_2(uint8_t, uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c new file mode 100644 index 000..c50ae96f47d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_truc_uint64_t_to_uint8_t_fmt_2: +** sltiu\s+[atx][0-9]+,\s*a0,\s*255 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff +** ret +*/ +DEF_SAT_U_TRUC_FMT_2(uint8_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c new file mode 100644 index 000..61331cee6fa --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_truc_uint64_t_to_uint16_t_fmt_2: +** li\s+[atx][0-9]+,\s*65536 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+ +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** slli\s+a0,\s*a0,\s*48 +** srli\s+a0,\s*a0,\s*48 +** ret +*/ +DEF_SAT_U_TRUC_FMT_2(uint16_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c new file mode 100644 index 000..4bc9303e457 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "sat_arith.h" +#include "sat_arith_data.h" + +#define T1 uint8_t +#define T2 uint32_t + +DEF_SAT_U_TRUC_FMT_2_WRAP(T1, T2) + +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define T
[PATCH v1] Test: Move pr116278 run test to c-torture [NFC]
From: Pan Li Move the run test of pr116278 to c-torture and leave the risc-v the asm check under risc-v part. PR target/116278 gcc/testsuite/ChangeLog: * gcc.target/riscv/pr116278-run-1.c: Take compile instead of run test. * gcc.target/riscv/pr116278-run-2.c: Ditto. * gcc.c-torture/execute/pr116278-run-1.c: New test. * gcc.c-torture/execute/pr116278-run-2.c: New test. Signed-off-by: Pan Li --- .../gcc.c-torture/execute/pr116278-run-1.c | 18 ++ .../gcc.c-torture/execute/pr116278-run-2.c | 18 ++ .../gcc.target/riscv/pr116278-run-1.c | 2 +- .../gcc.target/riscv/pr116278-run-2.c | 2 +- 4 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr116278-run-1.c create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr116278-run-2.c diff --git a/gcc/testsuite/gcc.c-torture/execute/pr116278-run-1.c b/gcc/testsuite/gcc.c-torture/execute/pr116278-run-1.c new file mode 100644 index 000..fa5340c9d58 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/pr116278-run-1.c @@ -0,0 +1,18 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +#include + +int8_t b[1]; +int8_t *d = b; +int32_t c; + +int main() { + b[0] = -40; + uint16_t t = (uint16_t)d[0]; + + c = (t < 0xFFF6 ? t : 0xFFF6) + 9; + + if (c != 65505) +__builtin_abort (); +} diff --git a/gcc/testsuite/gcc.c-torture/execute/pr116278-run-2.c b/gcc/testsuite/gcc.c-torture/execute/pr116278-run-2.c new file mode 100644 index 000..65439d614a1 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/pr116278-run-2.c @@ -0,0 +1,18 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +#include + +int16_t b[1]; +int16_t *d = b; +int64_t c; + +int main() { + b[0] = -40; + uint32_t t = (uint32_t)d[0]; + + c = (t < 0xFFF6u ? t : 0xFFF6u) + 9; + + if (c != 4294967265) +__builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c index d3812bdcdfb..c758fca7975 100644 --- a/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c +++ b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c @@ -1,4 +1,4 @@ -/* { dg-do run { target { riscv_v } } } */ +/* { dg-do compile } */ /* { dg-options "-O2 -fdump-rtl-expand-details" } */ #include diff --git a/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c b/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c index 669cd4f003f..a4da8a323f0 100644 --- a/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c +++ b/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c @@ -1,4 +1,4 @@ -/* { dg-do run { target { riscv_v } } } */ +/* { dg-do compile } */ /* { dg-options "-O2 -fdump-rtl-expand-details" } */ #include -- 2.43.0
[PATCH v1 2/2] RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 3
From: Pan Li This patch would like to add test cases for the unsigned scalar quad and oct .SAT_TRUNC form 3. Aka: Form 3: #define DEF_SAT_U_TRUC_FMT_3(NT, WT) \ NT __attribute__((noinline)) \ sat_u_truc_##WT##_to_##NT##_fmt_3 (WT x) \ {\ WT max = (WT)(NT)-1; \ return x <= max ? (NT)x : (NT) max;\ } QUAD: DEF_SAT_U_TRUC_FMT_3 (uint16_t, uint64_t) DEF_SAT_U_TRUC_FMT_3 (uint8_t, uint32_t) OCT: DEF_SAT_U_TRUC_FMT_3 (uint8_t, uint64_t) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_u_trunc-16.c: New test. * gcc.target/riscv/sat_u_trunc-17.c: New test. * gcc.target/riscv/sat_u_trunc-18.c: New test. * gcc.target/riscv/sat_u_trunc-run-16.c: New test. * gcc.target/riscv/sat_u_trunc-run-17.c: New test. * gcc.target/riscv/sat_u_trunc-run-18.c: New test. Signed-off-by: Pan Li --- .../gcc.target/riscv/sat_u_trunc-16.c | 17 .../gcc.target/riscv/sat_u_trunc-17.c | 17 .../gcc.target/riscv/sat_u_trunc-18.c | 20 +++ .../gcc.target/riscv/sat_u_trunc-run-16.c | 16 +++ .../gcc.target/riscv/sat_u_trunc-run-17.c | 16 +++ .../gcc.target/riscv/sat_u_trunc-run-18.c | 16 +++ 6 files changed, 102 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-17.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-18.c diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c new file mode 100644 index 000..3ee7dc03ade --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_truc_uint32_t_to_uint8_t_fmt_3: +** sltiu\s+[atx][0-9]+,\s*a0,\s*255 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff +** ret +*/ +DEF_SAT_U_TRUC_FMT_3(uint8_t, uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c new file mode 100644 index 000..975853712cd --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_truc_uint64_t_to_uint8_t_fmt_3: +** sltiu\s+[atx][0-9]+,\s*a0,\s*255 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff +** ret +*/ +DEF_SAT_U_TRUC_FMT_3(uint8_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c new file mode 100644 index 000..11e34ae6fd2 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_truc_uint64_t_to_uint16_t_fmt_3: +** li\s+[atx][0-9]+,\s*65536 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+ +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** slli\s+a0,\s*a0,\s*48 +** srli\s+a0,\s*a0,\s*48 +** ret +*/ +DEF_SAT_U_TRUC_FMT_3(uint16_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c new file mode 100644 index 000..3edcf137a79 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "sat_arith.h" +#include "sat_arith_data.h" + +#define T1 uint8_t +#define T2 uint32_t + +DEF_SAT_U_TRUC_FMT_3_WRAP(T1, T2) + +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define T
[PATCH v1 1/2] RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 2
From: Pan Li This patch would like to add test cases for the unsigned scalar quad and oct .SAT_TRUNC form 2. Aka: Form 2: #define DEF_SAT_U_TRUC_FMT_2(NT, WT) \ NT __attribute__((noinline)) \ sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \ {\ WT max = (WT)(NT)-1; \ return x > max ? (NT) max : (NT)x; \ } QUAD: DEF_SAT_U_TRUC_FMT_2 (uint16_t, uint64_t) DEF_SAT_U_TRUC_FMT_2 (uint8_t, uint32_t) OCT: DEF_SAT_U_TRUC_FMT_2 (uint8_t, uint64_t) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_u_trunc-10.c: New test. * gcc.target/riscv/sat_u_trunc-11.c: New test. * gcc.target/riscv/sat_u_trunc-12.c: New test. * gcc.target/riscv/sat_u_trunc-run-10.c: New test. * gcc.target/riscv/sat_u_trunc-run-11.c: New test. * gcc.target/riscv/sat_u_trunc-run-12.c: New test. Signed-off-by: Pan Li --- .../gcc.target/riscv/sat_u_trunc-10.c | 17 .../gcc.target/riscv/sat_u_trunc-11.c | 17 .../gcc.target/riscv/sat_u_trunc-12.c | 20 +++ .../gcc.target/riscv/sat_u_trunc-run-10.c | 16 +++ .../gcc.target/riscv/sat_u_trunc-run-11.c | 16 +++ .../gcc.target/riscv/sat_u_trunc-run-12.c | 16 +++ 6 files changed, 102 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-11.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-12.c diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c new file mode 100644 index 000..7dfc740c54f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_truc_uint32_t_to_uint8_t_fmt_2: +** sltiu\s+[atx][0-9]+,\s*a0,\s*255 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff +** ret +*/ +DEF_SAT_U_TRUC_FMT_2(uint8_t, uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c new file mode 100644 index 000..c50ae96f47d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_truc_uint64_t_to_uint8_t_fmt_2: +** sltiu\s+[atx][0-9]+,\s*a0,\s*255 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff +** ret +*/ +DEF_SAT_U_TRUC_FMT_2(uint8_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c new file mode 100644 index 000..61331cee6fa --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_truc_uint64_t_to_uint16_t_fmt_2: +** li\s+[atx][0-9]+,\s*65536 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+ +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** slli\s+a0,\s*a0,\s*48 +** srli\s+a0,\s*a0,\s*48 +** ret +*/ +DEF_SAT_U_TRUC_FMT_2(uint16_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c new file mode 100644 index 000..4bc9303e457 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "sat_arith.h" +#include "sat_arith_data.h" + +#define T1 uint8_t +#define T2 uint32_t + +DEF_SAT_U_TRUC_FMT_2_WRAP(T1, T2) + +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define T
[PATCH v1 2/2] RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 3
From: Pan Li This patch would like to add test cases for the unsigned scalar .SAT_TRUNC form 3. Aka: Form 3: #define DEF_SAT_U_TRUC_FMT_3(NT, WT) \ NT __attribute__((noinline)) \ sat_u_truc_##WT##_to_##NT##_fmt_3 (WT x) \ {\ WT max = (WT)(NT)-1; \ return x <= max ? (NT)x : (NT) max;\ } DEF_SAT_U_TRUC_FMT_3 (uint32_t, uint64_t) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_u_trunc-13.c: New test. * gcc.target/riscv/sat_u_trunc-14.c: New test. * gcc.target/riscv/sat_u_trunc-15.c: New test. * gcc.target/riscv/sat_u_trunc-run-13.c: New test. * gcc.target/riscv/sat_u_trunc-run-14.c: New test. * gcc.target/riscv/sat_u_trunc-run-15.c: New test. Signed-off-by: Pan Li --- gcc/testsuite/gcc.target/riscv/sat_arith.h| 12 +++ .../gcc.target/riscv/sat_u_trunc-13.c | 17 .../gcc.target/riscv/sat_u_trunc-14.c | 20 +++ .../gcc.target/riscv/sat_u_trunc-15.c | 19 ++ .../gcc.target/riscv/sat_u_trunc-run-13.c | 16 +++ .../gcc.target/riscv/sat_u_trunc-run-14.c | 16 +++ .../gcc.target/riscv/sat_u_trunc-run-15.c | 16 +++ 7 files changed, 116 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-13.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-14.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-15.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-13.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-14.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-15.c diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index 576a4926d1f..cf055410fd1 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -236,10 +236,22 @@ sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \ } #define DEF_SAT_U_TRUC_FMT_2_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_2(NT, WT) +#define DEF_SAT_U_TRUC_FMT_3(NT, WT) \ +NT __attribute__((noinline)) \ +sat_u_truc_##WT##_to_##NT##_fmt_3 (WT x) \ +{\ + WT max = (WT)(NT)-1; \ + return x <= max ? (NT)x : (NT) max;\ +} +#define DEF_SAT_U_TRUC_FMT_3_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_3(NT, WT) + #define RUN_SAT_U_TRUC_FMT_1(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_1 (x) #define RUN_SAT_U_TRUC_FMT_1_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_1(NT, WT, x) #define RUN_SAT_U_TRUC_FMT_2(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_2 (x) #define RUN_SAT_U_TRUC_FMT_2_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_2(NT, WT, x) +#define RUN_SAT_U_TRUC_FMT_3(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_3 (x) +#define RUN_SAT_U_TRUC_FMT_3_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_3(NT, WT, x) + #endif diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-13.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-13.c new file mode 100644 index 000..58910793a80 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-13.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_truc_uint16_t_to_uint8_t_fmt_3: +** sltiu\s+[atx][0-9]+,\s*a0,\s*255 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff +** ret +*/ +DEF_SAT_U_TRUC_FMT_3(uint8_t, uint16_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-14.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-14.c new file mode 100644 index 000..236ea1d45f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-14.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_truc_uint32_t_to_uint16_t_fmt_3: +** li\s+[atx][0-9]+,\s*65536 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+ +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** slli\s+a0,\s*a0,\s*48 +** srli\s+a0,\s*a0,\s*48 +** ret +*/ +DEF_SAT_U_TRUC_FMT_3(uint16_t, uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-15.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-15.c new file mode 100644 index 000.
[PATCH v1 1/2] RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 2
From: Pan Li This patch would like to add test cases for the unsigned scalar .SAT_TRUNC form 2. Aka: Form 2: #define DEF_SAT_U_TRUC_FMT_2(NT, WT) \ NT __attribute__((noinline)) \ sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \ {\ WT max = (WT)(NT)-1; \ return x > max ? (NT) max : (NT)x; \ } DEF_SAT_U_TRUC_FMT_2 (uint32_t, uint64_t) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_u_trunc-7.c: New test. * gcc.target/riscv/sat_u_trunc-8.c: New test. * gcc.target/riscv/sat_u_trunc-9.c: New test. * gcc.target/riscv/sat_u_trunc-run-7.c: New test. * gcc.target/riscv/sat_u_trunc-run-8.c: New test. * gcc.target/riscv/sat_u_trunc-run-9.c: New test. Signed-off-by: Pan Li --- gcc/testsuite/gcc.target/riscv/sat_arith.h| 12 +++ .../gcc.target/riscv/sat_u_trunc-7.c | 17 .../gcc.target/riscv/sat_u_trunc-8.c | 20 +++ .../gcc.target/riscv/sat_u_trunc-9.c | 19 ++ .../gcc.target/riscv/sat_u_trunc-run-7.c | 16 +++ .../gcc.target/riscv/sat_u_trunc-run-8.c | 16 +++ .../gcc.target/riscv/sat_u_trunc-run-9.c | 16 +++ 7 files changed, 116 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-7.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-8.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-9.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-7.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-8.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-9.c diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index 37e0a60f21b..576a4926d1f 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -227,7 +227,19 @@ sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \ } #define DEF_SAT_U_TRUC_FMT_1_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_1(NT, WT) +#define DEF_SAT_U_TRUC_FMT_2(NT, WT) \ +NT __attribute__((noinline)) \ +sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \ +{\ + WT max = (WT)(NT)-1; \ + return x > max ? (NT) max : (NT)x; \ +} +#define DEF_SAT_U_TRUC_FMT_2_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_2(NT, WT) + #define RUN_SAT_U_TRUC_FMT_1(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_1 (x) #define RUN_SAT_U_TRUC_FMT_1_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_1(NT, WT, x) +#define RUN_SAT_U_TRUC_FMT_2(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_2 (x) +#define RUN_SAT_U_TRUC_FMT_2_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_2(NT, WT, x) + #endif diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-7.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-7.c new file mode 100644 index 000..95d513a15fb --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-7.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_truc_uint16_t_to_uint8_t_fmt_2: +** sltiu\s+[atx][0-9]+,\s*a0,\s*255 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff +** ret +*/ +DEF_SAT_U_TRUC_FMT_2(uint8_t, uint16_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-8.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-8.c new file mode 100644 index 000..f168912293d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-8.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_truc_uint32_t_to_uint16_t_fmt_2: +** li\s+[atx][0-9]+,\s*65536 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+ +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** slli\s+a0,\s*a0,\s*48 +** srli\s+a0,\s*a0,\s*48 +** ret +*/ +DEF_SAT_U_TRUC_FMT_2(uint16_t, uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-9.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-9.c new file mode 100644 index 000..d82363d6aef --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-9.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl
[PATCH v4] RISC-V: Make sure high bits of usadd operands is clean for non-Xmode [PR116278]
From: Pan Li For QI/HImode of .SAT_ADD, the operands may be sign-extended and the high bits of Xmode may be all 1 which is not expected. For example as below code. signed char b[1]; unsigned short c; signed char *d = b; int main() { b[0] = -40; c = ({ (unsigned short)d[0] < 0xFFF6 ? (unsigned short)d[0] : 0xFFF6; }) + 9; __builtin_printf("%d\n", c); } After expanding we have: ;; _6 = .SAT_ADD (_3, 9); (insn 8 7 9 (set (reg:DI 143) (high:DI (symbol_ref:DI ("d") [flags 0x86] ))) (nil)) (insn 9 8 10 (set (reg/f:DI 142) (mem/f/c:DI (lo_sum:DI (reg:DI 143) (symbol_ref:DI ("d") [flags 0x86] )) [1 d+0 S8 A64])) (nil)) (insn 10 9 11 (set (reg:HI 144 [ _3 ]) (sign_extend:HI (mem:QI (reg/f:DI 142) [0 *d.0_1+0 S1 A8]))) "test.c":7:10 -1 (nil)) The convert from signed char to unsigned short will have sign_extend rtl as above. And finally become the lb insn as below: lb a1,0(a5) // a1 is -40, aka 0xffd8 lui a0,0x1a addia5,a1,9 sllia5,a5,0x30 srlia5,a5,0x30 // a5 is 65505 sltua1,a5,a1 // compare 65505 and 0xffd8 => TRUE The sltu try to compare 65505 and 0xffd8 here, but we actually want to compare 65505 and 65496 (0xffd8). Thus we need to clean up the high bits to ensure this. The below test suites are passed for this patch: * The rv64gcv fully regression test. PR target/116278 gcc/ChangeLog: * config/riscv/riscv.cc (riscv_gen_zero_extend_rtx): Add new func impl to zero extend rtx. (riscv_expand_usadd): Leverage above func to cleanup operands and sum. gcc/testsuite/ChangeLog: * gcc.target/riscv/pr116278-run-1.c: New test. * gcc.target/riscv/pr116278-run-2.c: New test. PR 116278 gcc/ChangeLog: * config/riscv/riscv.cc (riscv_gen_zero_extend_rtx): Add new func impl to zero extend rtx. (riscv_expand_usadd): Leverage above func to cleanup operands 0 and remove the special handing for SImode in RV64. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_u_add-11.c: Adjust asm check body. * gcc.target/riscv/sat_u_add-15.c: Ditto. * gcc.target/riscv/sat_u_add-19.c: Ditto. * gcc.target/riscv/sat_u_add-23.c: Ditto. * gcc.target/riscv/sat_u_add-3.c: Ditto. * gcc.target/riscv/sat_u_add-7.c: Ditto. * gcc.target/riscv/sat_u_add_imm-11.c: Ditto. * gcc.target/riscv/sat_u_add_imm-15.c: Ditto. * gcc.target/riscv/sat_u_add_imm-3.c: Ditto. * gcc.target/riscv/sat_u_add_imm-7.c: Ditto. * gcc.target/riscv/pr116278-run-1.c: New test. * gcc.target/riscv/pr116278-run-2.c: New test. Signed-off-by: Pan Li --- gcc/config/riscv/riscv.cc | 34 --- .../gcc.target/riscv/pr116278-run-1.c | 20 +++ .../gcc.target/riscv/pr116278-run-2.c | 20 +++ gcc/testsuite/gcc.target/riscv/sat_u_add-11.c | 6 +++- gcc/testsuite/gcc.target/riscv/sat_u_add-15.c | 6 +++- gcc/testsuite/gcc.target/riscv/sat_u_add-19.c | 6 +++- gcc/testsuite/gcc.target/riscv/sat_u_add-23.c | 6 +++- gcc/testsuite/gcc.target/riscv/sat_u_add-3.c | 6 +++- gcc/testsuite/gcc.target/riscv/sat_u_add-7.c | 6 +++- .../gcc.target/riscv/sat_u_add_imm-11.c | 6 +++- .../gcc.target/riscv/sat_u_add_imm-15.c | 6 +++- .../gcc.target/riscv/sat_u_add_imm-3.c| 6 +++- .../gcc.target/riscv/sat_u_add_imm-7.c| 6 +++- 13 files changed, 112 insertions(+), 22 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/pr116278-run-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/pr116278-run-2.c diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 1f60d8f9711..453a061428e 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11801,12 +11801,29 @@ riscv_get_raw_result_mode (int regno) return default_get_reg_raw_mode (regno); } +/* Generate a new rtx of Xmode based on the rtx and mode in define pattern. + The rtx x will be zero extended to Xmode if the mode is HI/QImode, and + the new zero extended Xmode rtx will be returned. + Or the gen_lowpart rtx of Xmode will be returned. */ + +static rtx +riscv_gen_zero_extend_rtx (rtx x, machine_mode mode) +{ + if (mode == Xmode) +return x; + + rtx xmode_reg = gen_reg_rtx (Xmode); + riscv_emit_unary (ZERO_EXTEND, xmode_reg, x); + + return xmode_reg; +} + /* Implements the unsigned saturation add standard name usadd for int mode. z = SAT_ADD(x, y). => 1. sum = x + y. - 2. sum = truncate (sum) for QI and HI only. + 2. sum = truncate (sum) for non-Xmode. 3. lt = sum < x. 4. lt = -lt. 5. z = sum | lt. */ @@ -11817,22 +11834,15 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y) machine_mode mode = GET_MODE (dest); rtx xmode_sum = gen_reg_rtx (Xmode); rtx xmode_lt = gen_reg_rtx (Xmode); - rtx xmode_x = gen_lowpart (Xmode, x)
[PATCH v3] RISC-V: Make sure high bits of usadd operands is clean for HI/QI [PR116278]
From: Pan Li For QI/HImode of .SAT_ADD, the operands may be sign-extended and the high bits of Xmode may be all 1 which is not expected. For example as below code. signed char b[1]; unsigned short c; signed char *d = b; int main() { b[0] = -40; c = ({ (unsigned short)d[0] < 0xFFF6 ? (unsigned short)d[0] : 0xFFF6; }) + 9; __builtin_printf("%d\n", c); } After expanding we have: ;; _6 = .SAT_ADD (_3, 9); (insn 8 7 9 (set (reg:DI 143) (high:DI (symbol_ref:DI ("d") [flags 0x86] ))) (nil)) (insn 9 8 10 (set (reg/f:DI 142) (mem/f/c:DI (lo_sum:DI (reg:DI 143) (symbol_ref:DI ("d") [flags 0x86] )) [1 d+0 S8 A64])) (nil)) (insn 10 9 11 (set (reg:HI 144 [ _3 ]) (sign_extend:HI (mem:QI (reg/f:DI 142) [0 *d.0_1+0 S1 A8]))) "test.c":7:10 -1 (nil)) The convert from signed char to unsigned short will have sign_extend rtl as above. And finally become the lb insn as below: lb a1,0(a5) // a1 is -40, aka 0xffd8 lui a0,0x1a addia5,a1,9 sllia5,a5,0x30 srlia5,a5,0x30 // a5 is 65505 sltua1,a5,a1 // compare 65505 and 0xffd8 => TRUE The sltu try to compare 65505 and 0xffd8 here, but we actually want to compare 65505 and 65496 (0xffd8). Thus we need to clean up the high bits to ensure this. The below test suites are passed for this patch: * The rv64gcv fully regression test. PR target/116278 gcc/ChangeLog: * config/riscv/riscv.cc (riscv_gen_zero_extend_rtx): Add new func impl to zero extend rtx. (riscv_expand_usadd): Leverage above func to cleanup operands and sum. gcc/testsuite/ChangeLog: * gcc.target/riscv/pr116278-run-1.c: New test. * gcc.target/riscv/pr116278-run-2.c: New test. Signed-off-by: Pan Li --- gcc/config/riscv/riscv.cc | 19 +- .../gcc.target/riscv/pr116278-run-1.c | 20 +++ .../gcc.target/riscv/pr116278-run-2.c | 20 +++ 3 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/riscv/pr116278-run-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/pr116278-run-2.c diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index a1b09e865ea..9793166dc5b 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11801,6 +11801,23 @@ riscv_get_raw_result_mode (int regno) return default_get_reg_raw_mode (regno); } +/* Generate a new rtx of Xmode based on the rtx and mode in define pattern. + The rtx x will be zero extended to Xmode if the mode is HI/QImode, and + the new zero extended Xmode rtx will be returned. + Or the gen_lowpart rtx of Xmode will be returned. */ + +static rtx +riscv_gen_zero_extend_rtx (rtx x, machine_mode mode) +{ + if (mode != HImode && mode != QImode) +return gen_lowpart (Xmode, x); + + rtx xmode_reg = gen_reg_rtx (Xmode); + riscv_emit_unary (ZERO_EXTEND, xmode_reg, x); + + return xmode_reg; +} + /* Implements the unsigned saturation add standard name usadd for int mode. z = SAT_ADD(x, y). @@ -11817,7 +11834,7 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y) machine_mode mode = GET_MODE (dest); rtx xmode_sum = gen_reg_rtx (Xmode); rtx xmode_lt = gen_reg_rtx (Xmode); - rtx xmode_x = gen_lowpart (Xmode, x); + rtx xmode_x = riscv_gen_zero_extend_rtx (x, mode); rtx xmode_y = gen_lowpart (Xmode, y); rtx xmode_dest = gen_reg_rtx (Xmode); diff --git a/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c new file mode 100644 index 000..d3812bdcdfb --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c @@ -0,0 +1,20 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-options "-O2 -fdump-rtl-expand-details" } */ + +#include + +int8_t b[1]; +int8_t *d = b; +int32_t c; + +int main() { + b[0] = -40; + uint16_t t = (uint16_t)d[0]; + + c = (t < 0xFFF6 ? t : 0xFFF6) + 9; + + if (c != 65505) +__builtin_abort (); +} + +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c b/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c new file mode 100644 index 000..669cd4f003f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c @@ -0,0 +1,20 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-options "-O2 -fdump-rtl-expand-details" } */ + +#include + +int16_t b[1]; +int16_t *d = b; +int64_t c; + +int main() { + b[0] = -40; + uint32_t t = (uint32_t)d[0]; + + c = (t < 0xFFF6u ? t : 0xFFF6u) + 9; + + if (c != 4294967265) +__builtin_abort (); +} + +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ -- 2.43.0
[PATCH v2] RISC-V: Make sure high bits of usadd operands is clean for HI/QI [PR116278]
From: Pan Li For QI/HImode of .SAT_ADD, the operands may be sign-extended and the high bits of Xmode may be all 1 which is not expected. For example as below code. signed char b[1]; unsigned short c; signed char *d = b; int main() { b[0] = -40; c = ({ (unsigned short)d[0] < 0xFFF6 ? (unsigned short)d[0] : 0xFFF6; }) + 9; __builtin_printf("%d\n", c); } After expanding we have: ;; _6 = .SAT_ADD (_3, 9); (insn 8 7 9 (set (reg:DI 143) (high:DI (symbol_ref:DI ("d") [flags 0x86] ))) (nil)) (insn 9 8 10 (set (reg/f:DI 142) (mem/f/c:DI (lo_sum:DI (reg:DI 143) (symbol_ref:DI ("d") [flags 0x86] )) [1 d+0 S8 A64])) (nil)) (insn 10 9 11 (set (reg:HI 144 [ _3 ]) (sign_extend:HI (mem:QI (reg/f:DI 142) [0 *d.0_1+0 S1 A8]))) "test.c":7:10 -1 (nil)) The convert from signed char to unsigned short will have sign_extend rtl as above. And finally become the lb insn as below: lb a1,0(a5) // a1 is -40, aka 0xffd8 lui a0,0x1a addia5,a1,9 sllia5,a5,0x30 srlia5,a5,0x30 // a5 is 65505 sltua1,a5,a1 // compare 65505 and 0xffd8 => TRUE The sltu try to compare 65505 and 0xffd8 here, but we actually want to compare 65505 and 65496 (0xffd8). Thus we need to clean up the high bits to ensure this. The below test suites are passed for this patch: * The rv64gcv fully regression test. PR target/116278 gcc/ChangeLog: * config/riscv/riscv.cc (riscv_gen_zero_extend_rtx): Add new func impl to zero extend rtx. (riscv_expand_usadd): Leverage above func to cleanup operands and sum. gcc/testsuite/ChangeLog: * gcc.target/riscv/pr116278-run-1.c: New test. Signed-off-by: Pan Li --- gcc/config/riscv/riscv.cc | 19 ++- .../gcc.target/riscv/pr116278-run-1.c | 16 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/riscv/pr116278-run-1.c diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 5fe4273beb7..cfdb3d82972 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11564,6 +11564,23 @@ riscv_get_raw_result_mode (int regno) return default_get_reg_raw_mode (regno); } +/* Generate a new rtx of Xmode based on the rtx and mode in define pattern. + The rtx x will be zero extended to Xmode if the mode is HI/QImode, and + the new zero extended Xmode rtx will be returned. + Or the gen_lowpart rtx of Xmode will be returned. */ + +static rtx +riscv_gen_zero_extend_rtx (rtx x, machine_mode mode) +{ + if (mode != HImode && mode != QImode) +return gen_lowpart (Xmode, x); + + rtx xmode_reg = gen_reg_rtx (Xmode); + riscv_emit_unary (ZERO_EXTEND, xmode_reg, x); + + return xmode_reg; +} + /* Implements the unsigned saturation add standard name usadd for int mode. z = SAT_ADD(x, y). @@ -11580,7 +11597,7 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y) machine_mode mode = GET_MODE (dest); rtx xmode_sum = gen_reg_rtx (Xmode); rtx xmode_lt = gen_reg_rtx (Xmode); - rtx xmode_x = gen_lowpart (Xmode, x); + rtx xmode_x = riscv_gen_zero_extend_rtx (x, mode); rtx xmode_y = gen_lowpart (Xmode, y); rtx xmode_dest = gen_reg_rtx (Xmode); diff --git a/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c new file mode 100644 index 000..f6268e290ec --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +signed char b[1]; +int c; +signed char *d = b; + +int main() { + b[0] = -40; + c = ({ +(unsigned short)d[0] < 0xFFF6 ? (unsigned short)d[0] : 0xFFF6; + }) + 9; + + if (c != 65505) +__builtin_abort (); +} -- 2.43.0
[PATCH v1] RISC-V: Bugfix incorrect operand for vwsll auto-vect
From: Pan Li This patch would like to fix one ICE when rv64gcv_zvbb for vwsll. Consider below example. void vwsll_vv_test (short *restrict dst, char *restrict a, int *restrict b, int n) { for (int i = 0; i < n; i++) dst[i] = a[i] << b[i]; } It will hit the vwsll pattern with following operands. operand 0 -> (reg:RVVMF2HI 146 [ vect__7.13 ]) operand 1 -> (reg:RVVMF4QI 165 [ vect_cst__33 ]) operand 2 -> (reg:RVVM1SI 171 [ vect_cst__36 ]) According to the ISA, operand 2 should be the same as operand 1. Aka operand 2 should have RVVMF4QI mode as above. Thus, add quad truncation for operand 2 before emit vwsll. The below test suites are passed for this patch. * The rv64gcv fully regression test. PR target/116280 gcc/ChangeLog: * config/riscv/autovec-opt.md: Add quad truncation to align the mode requirement for vwsll. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr116280-1.c: New test. * gcc.target/riscv/rvv/base/pr116280-2.c: New test. Signed-off-by: Pan Li --- gcc/config/riscv/autovec-opt.md| 4 .../gcc.target/riscv/rvv/base/pr116280-1.c | 14 ++ .../gcc.target/riscv/rvv/base/pr116280-2.c | 10 ++ 3 files changed, 28 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-2.c diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index d7a3cfd4602..4b33a145c17 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -1546,6 +1546,10 @@ (define_insn_and_split "*vwsll_zext1_trunc_" "&& 1" [(const_int 0)] { +rtx truncated = gen_reg_rtx (mode); +emit_insn (gen_trunc2 (truncated, operands[2])); +operands[2] = truncated; + insn_code icode = code_for_pred_vwsll (mode); riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands); DONE; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-1.c new file mode 100644 index 000..8b8547e2c34 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-1.c @@ -0,0 +1,14 @@ +/* Test there is no ICE when compile. */ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */ + +short a; +char b; + +void +test (int e[][1][1], char f[][1][1][1][1]) { + for (int g; b;) +for (;;) + for (int h; h < 4073709551572ULL; h += 18446744073709551612U) +a = f[2][2][1][4073709551612][1] << e[1][1][g]; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-2.c new file mode 100644 index 000..02f2de66eff --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116280-2.c @@ -0,0 +1,10 @@ +/* Test there is no ICE when compile. */ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */ + +void +test (short *restrict dst, char *restrict a, int *restrict b, int n) +{ + for (int i = 0; i < n; i++) +dst[i] = a[i] << b[i]; +} -- 2.43.0
[PATCH v1] RISC-V: Make sure high bits of usadd operands is clean for HI/QI [PR116278]
From: Pan Li For QI/HImode of .SAT_ADD, the operands may be sign-extended and the high bits of Xmode may be all 1 which is not expected. For example as below code. signed char b[1]; unsigned short c; signed char *d = b; int main() { b[0] = -40; c = ({ (unsigned short)d[0] < 0xFFF6 ? (unsigned short)d[0] : 0xFFF6; }) + 9; __builtin_printf("%d\n", c); } After expanding we have: ;; _6 = .SAT_ADD (_3, 9); (insn 8 7 9 (set (reg:DI 143) (high:DI (symbol_ref:DI ("d") [flags 0x86] ))) (nil)) (insn 9 8 10 (set (reg/f:DI 142) (mem/f/c:DI (lo_sum:DI (reg:DI 143) (symbol_ref:DI ("d") [flags 0x86] )) [1 d+0 S8 A64])) (nil)) (insn 10 9 11 (set (reg:HI 144 [ _3 ]) (sign_extend:HI (mem:QI (reg/f:DI 142) [0 *d.0_1+0 S1 A8]))) "test.c":7:10 -1 (nil)) The convert from signed char to unsigned short will have sign_extend rtl as above. And finally become the lb insn as below: lb a1,0(a5) // a1 is -40, aka 0xffd8 lui a0,0x1a addia5,a1,9 sllia5,a5,0x30 srlia5,a5,0x30 // a5 is 65505 sltua1,a5,a1 // compare 65505 and 0xffd8 => TRUE The sltu try to compare 65505 and 0xffd8 here, but we actually want to compare 65505 and 65496 (0xffd8). Thus we need to clean up the high bits to ensure this. The below test suites are passed for this patch: * The rv64gcv fully regression test. PR target/116278 gcc/ChangeLog: * config/riscv/riscv.cc (riscv_cleanup_rtx_high): Add new func impl to cleanup high bits of rtx. (riscv_expand_usadd): Leverage above func to cleanup operands and sum. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_u_add-1.c: Adjust asm check. * gcc.target/riscv/sat_u_add-10.c: Ditto. * gcc.target/riscv/sat_u_add-13.c: Ditto. * gcc.target/riscv/sat_u_add-14.c: Ditto. * gcc.target/riscv/sat_u_add-17.c: Ditto. * gcc.target/riscv/sat_u_add-18.c: Ditto. * gcc.target/riscv/sat_u_add-2.c: Ditto. * gcc.target/riscv/sat_u_add-21.c: Ditto. * gcc.target/riscv/sat_u_add-22.c: Ditto. * gcc.target/riscv/sat_u_add-5.c: Ditto. * gcc.target/riscv/sat_u_add-6.c: Ditto. * gcc.target/riscv/sat_u_add-9.c: Ditto. * gcc.target/riscv/sat_u_add_imm-1.c: Ditto. * gcc.target/riscv/sat_u_add_imm-10.c: Ditto. * gcc.target/riscv/sat_u_add_imm-13.c: Ditto. * gcc.target/riscv/sat_u_add_imm-14.c: Ditto. * gcc.target/riscv/sat_u_add_imm-2.c: Ditto. * gcc.target/riscv/sat_u_add_imm-5.c: Ditto. * gcc.target/riscv/sat_u_add_imm-6.c: Ditto. * gcc.target/riscv/sat_u_add_imm-9.c: Ditto. * gcc.target/riscv/pr116278-run-1.c: New test. Signed-off-by: Pan Li --- gcc/config/riscv/riscv.cc | 30 ++- .../gcc.target/riscv/pr116278-run-1.c | 16 ++ gcc/testsuite/gcc.target/riscv/sat_u_add-1.c | 1 + gcc/testsuite/gcc.target/riscv/sat_u_add-10.c | 2 ++ gcc/testsuite/gcc.target/riscv/sat_u_add-13.c | 1 + gcc/testsuite/gcc.target/riscv/sat_u_add-14.c | 2 ++ gcc/testsuite/gcc.target/riscv/sat_u_add-17.c | 1 + gcc/testsuite/gcc.target/riscv/sat_u_add-18.c | 2 ++ gcc/testsuite/gcc.target/riscv/sat_u_add-2.c | 2 ++ gcc/testsuite/gcc.target/riscv/sat_u_add-21.c | 1 + gcc/testsuite/gcc.target/riscv/sat_u_add-22.c | 2 ++ gcc/testsuite/gcc.target/riscv/sat_u_add-5.c | 1 + gcc/testsuite/gcc.target/riscv/sat_u_add-6.c | 2 ++ gcc/testsuite/gcc.target/riscv/sat_u_add-9.c | 1 + .../gcc.target/riscv/sat_u_add_imm-1.c| 1 + .../gcc.target/riscv/sat_u_add_imm-10.c | 2 ++ .../gcc.target/riscv/sat_u_add_imm-13.c | 1 + .../gcc.target/riscv/sat_u_add_imm-14.c | 2 ++ .../gcc.target/riscv/sat_u_add_imm-2.c| 2 ++ .../gcc.target/riscv/sat_u_add_imm-5.c| 1 + .../gcc.target/riscv/sat_u_add_imm-6.c| 2 ++ .../gcc.target/riscv/sat_u_add_imm-9.c| 1 + 22 files changed, 68 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/pr116278-run-1.c diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 5fe4273beb7..fb916217e5e 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11564,6 +11564,24 @@ riscv_get_raw_result_mode (int regno) return default_get_reg_raw_mode (regno); } +/* Cleanup the high bits of the RTX x and reserve the low bits. + The reserved bitsize comes from the bitsize of reserved_mode. */ + +static void +riscv_cleanup_rtx_high (rtx x, machine_mode reserved_mode) +{ + machine_mode mode = GET_MODE (x); + int reserved_bitsize = GET_MODE_BITSIZE (reserved_mode).to_constant (); + int mode_bitsize = GET_MODE_BITSIZE (mode).to_constant (); + + gcc_assert (mode_bitsize >= reserved_bitsize); + + int shift_bitsize = mode_bitsize - reserved_bitsize; + + riscv_emit_binary (ASHIFT, x, x, GEN_INT (shift_bitsize)); + riscv_emit
[PATCH v2] Match: Support form 1 for scalar signed integer .SAT_ADD
From: Pan Li This patch would like to support the form 1 of the scalar signed integer .SAT_ADD. Aka below example: Form 1: #define DEF_SAT_S_ADD_FMT_1(T, MIN, MAX) \ T __attribute__((noinline)) \ sat_s_add_##T##_fmt_1 (T x, T y) \ {\ T sum = x + y; \ return (x ^ y) < 0 \ ? sum\ : (sum ^ x) >= 0 \ ? sum \ : x < 0 ? MIN : MAX; \ } DEF_SAT_S_ADD_FMT_1(int64_t, INT64_MIN, INT64_MAX) We can tell the difference before and after this patch if backend implemented the ssadd3 pattern similar as below. Before this patch: 4 │ __attribute__((noinline)) 5 │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y) 6 │ { 7 │ int64_t sum; 8 │ long int _1; 9 │ long int _2; 10 │ int64_t _3; 11 │ _Bool _8; 12 │ long int _9; 13 │ long int _10; 14 │ long int _11; 15 │ long int _12; 16 │ long int _13; 17 │ 18 │[local count: 1073741824]: 19 │ sum_6 = x_4(D) + y_5(D); 20 │ _1 = x_4(D) ^ y_5(D); 21 │ _2 = x_4(D) ^ sum_6; 22 │ _12 = ~_1; 23 │ _13 = _2 & _12; 24 │ if (_13 < 0) 25 │ goto ; [41.00%] 26 │ else 27 │ goto ; [59.00%] 28 │ 29 │[local count: 259738147]: 30 │ _8 = x_4(D) < 0; 31 │ _9 = (long int) _8; 32 │ _10 = -_9; 33 │ _11 = _10 ^ 9223372036854775807; 34 │ 35 │[local count: 1073741824]: 36 │ # _3 = PHI 37 │ return _3; 38 │ 39 │ } After this patch: 4 │ __attribute__((noinline)) 5 │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y) 6 │ { 7 │ int64_t _4; 8 │ 9 │ ;; basic block 2, loop depth 0 10 │ ;;pred: ENTRY 11 │ _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call] 12 │ return _4; 13 │ ;;succ: EXIT 14 │ 15 │ } The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add the matching for signed .SAT_ADD. * tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new matching func decl. (match_unsigned_saturation_add): Try signed .SAT_ADD and rename to ... (match_saturation_add): ... here. (math_opts_dom_walker::after_dom_children): Update the above renamed func from caller. Signed-off-by: Pan Li --- gcc/match.pd | 17 gcc/tree-ssa-math-opts.cc | 42 ++- 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index c9c8478d286..8b8a5dbcfe3 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3311,6 +3311,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) } (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst)) +/* Signed saturation add, case 1: + T sum = X + Y; + SAT_S_ADD = (X ^ Y) < 0 + ? sum + : (sum ^ x) >= 0 + ? sum + : x < 0 ? MIN : MAX; */ +(match (signed_integer_sat_add @0 @1) + (cond^ (lt (bit_and:c (bit_xor:c @0 (convert?@2 (plus:c (convert? @0) +(convert? @1 + (bit_not (bit_xor:c @0 @1))) + integer_zerop) + (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value) + @2) + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) + && types_match (type, @0, @1 + /* x > y && x != XXX_MIN --> x > y x > y && x == XXX_MIN --> false . */ (for eqne (eq ne) diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index 8d96a4c964b..f39c88741a4 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree)); extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree)); +extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree)); + static void build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn, tree lhs, tree op_0, tree op_1) @@ -4072,7 +4074,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt) } /* - * Try to match saturation unsigned add with PHI. + * Try to match saturation add with PHI. + * For unsigned integer: *: * _1 = x_3(D) + y_4(D); * if (_1 >= x_3(D)) @@ -4086,10 +4089,38 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt) * # _2 = PHI <255(2), _1(3)> * => *[local count: 1073741824]: - * _2 = .SAT_ADD (x_4(D), y_5(D)); */ + * _2 = .SAT_ADD (x_4(D), y_5(D)); +
[PATCH v2] Vect: Make sure the lhs type of .SAT_TRUNC has its mode precision [PR116202]
From: Pan Li The .SAT_TRUNC vect pattern recog is valid when the lhs type has its mode precision. For example as below, QImode with 1 bit precision like _Bool is invalid here. g_12 = (long unsigned int) _2; _13 = MIN_EXPR ; _3 = (_Bool) _13; The above pattern cannot be recog as .SAT_TRUNC (g_12) because the dest only has 1 bit precision with QImode mode. Aka the type doesn't have the mode precision. The below tests are passed for this patch. 1. The rv64gcv fully regression tests. 2. The x86 bootstrap tests. 3. The x86 fully regression tests. PR target/116202 gcc/ChangeLog: * tree-vect-patterns.cc (vect_recog_sat_trunc_pattern): Add the type_has_mode_precision_p check for the lhs type. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr116202-run-1.c: New test. Signed-off-by: Pan Li --- .../riscv/rvv/base/pr116202-run-1.c | 24 +++ gcc/tree-vect-patterns.cc | 5 ++-- 2 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr116202-run-1.c diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr116202-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116202-run-1.c new file mode 100644 index 000..d150f20b5d9 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116202-run-1.c @@ -0,0 +1,24 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -march=rv64gcv_zvl256b -fdump-rtl-expand-details" } */ + +int b[24]; +_Bool c[24]; + +int main() { + for (int f = 0; f < 4; ++f) +b[f] = 6; + + for (int f = 0; f < 24; f += 4) +c[f] = ({ + int g = ({ +unsigned long g = -b[f]; +1 < g ? 1 : g; + }); + g; +}); + + if (c[0] != 1) +__builtin_abort (); +} + +/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */ diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 4674a16d15f..74f80587b0e 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -4695,11 +4695,12 @@ vect_recog_sat_trunc_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo, tree ops[1]; tree lhs = gimple_assign_lhs (last_stmt); + tree otype = TREE_TYPE (lhs); - if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)) + if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL) + && type_has_mode_precision_p (otype)) { tree itype = TREE_TYPE (ops[0]); - tree otype = TREE_TYPE (lhs); tree v_itype = get_vectype_for_scalar_type (vinfo, itype); tree v_otype = get_vectype_for_scalar_type (vinfo, otype); internal_fn fn = IFN_SAT_TRUNC; -- 2.43.0
[PATCH v1] RISC-V: Update .SAT_TRUNC dump check due to middle-end change
From: Pan Li Due to recent middle-end change, update the .SAT_TRUNC expand dump check from 2 to 4. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c: Adjust asm check times from 2 to 4. Signed-off-by: Pan Li --- .../gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c index 7f047f3f6a2..ae3e44cd57e 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c @@ -16,4 +16,4 @@ */ DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint16_t) -/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 4 "expand" } } */ -- 2.43.0
[PATCH v1] Match: Support form 1 for scalar signed integer .SAT_ADD
From: Pan Li This patch would like to support the form 1 of the scalar signed integer .SAT_ADD. Aka below example: Form 1: #define DEF_SAT_S_ADD_FMT_1(T) \ T __attribute__((noinline))\ sat_s_add_##T##_fmt_1 (T x, T y) \ { \ T min = (T)1u << (sizeof (T) * 8 - 1); \ T max = min - 1; \ return (x ^ y) < 0 \ ? (T)(x + y) \ : ((T)(x + y) ^ x) >= 0\ ? (T)(x + y) \ : x < 0 ? min : max; \ } DEF_SAT_S_ADD_FMT_1 (int64_t) We can tell the difference before and after this patch if backend implemented the ssadd3 pattern similar as below. Before this patch: 4 │ __attribute__((noinline)) 5 │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y) 6 │ { 7 │ long int _1; 8 │ long int _2; 9 │ long int _3; 10 │ int64_t _4; 11 │ long int _7; 12 │ _Bool _9; 13 │ long int _10; 14 │ long int _11; 15 │ long int _12; 16 │ long int _13; 17 │ 18 │ ;; basic block 2, loop depth 0 19 │ ;;pred: ENTRY 20 │ _1 = x_5(D) ^ y_6(D); 21 │ _13 = x_5(D) + y_6(D); 22 │ _3 = x_5(D) ^ _13; 23 │ _2 = ~_1; 24 │ _7 = _2 & _3; 25 │ if (_7 >= 0) 26 │ goto ; [59.00%] 27 │ else 28 │ goto ; [41.00%] 29 │ ;;succ: 4 30 │ ;;3 31 │ 32 │ ;; basic block 3, loop depth 0 33 │ ;;pred: 2 34 │ _9 = x_5(D) < 0; 35 │ _10 = (long int) _9; 36 │ _11 = -_10; 37 │ _12 = _11 ^ 9223372036854775807; 38 │ ;;succ: 4 39 │ 40 │ ;; basic block 4, loop depth 0 41 │ ;;pred: 2 42 │ ;;3 43 │ # _4 = PHI <_13(2), _12(3)> 44 │ return _4; 45 │ ;;succ: EXIT 46 │ 47 │ } After this patch: 4 │ __attribute__((noinline)) 5 │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y) 6 │ { 7 │ int64_t _4; 8 │ 9 │ ;; basic block 2, loop depth 0 10 │ ;;pred: ENTRY 11 │ _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call] 12 │ return _4; 13 │ ;;succ: EXIT 14 │ 15 │ } The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add the matching for signed .SAT_ADD. * tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new matching func decl. (match_unsigned_saturation_add): Try signed .SAT_ADD and rename to ... (match_saturation_add): ... here. (math_opts_dom_walker::after_dom_children): Update the above renamed func from caller. Signed-off-by: Pan Li --- gcc/match.pd | 14 + gcc/tree-ssa-math-opts.cc | 42 ++- 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index c9c8478d286..0a2ffc733d3 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3311,6 +3311,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) } (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst)) +/* Signed saturation add, case 1: + T min = (T)1u << (sizeof (T) * 8 - 1); + T max = min - 1; + SAT_S_ADD = (X ^ Y) < 0 + ? (X + Y) + : ((T)(X + Y) ^ X) >= 0 ? (X + Y) : X < 0 ? min : max. */ +(match (signed_integer_sat_add @0 @1) + (cond^ (ge (bit_and:c (bit_xor @0 (convert? @2)) (bit_not (bit_xor @0 @1))) + integer_zerop) + (convert? (plus@2 (convert1? @0) (convert1? @1))) + (bit_xor (negate (convert (lt @0 integer_zerop))) max_value)) + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) + && types_match (type, @0, @1 + /* x > y && x != XXX_MIN --> x > y x > y && x == XXX_MIN --> false . */ (for eqne (eq ne) diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index 8d96a4c964b..d5c9b475f72 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree)); extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree)); +extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree)); + static void build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn, tree lhs, tree op_0, tree op_1) @@ -4072,7 +4074,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt) } /* - * Try to match saturation unsigned add with PHI. + * Try to match saturation add with PHI. + * For unsigned integer: *: * _1 = x_3(D) + y_4(D); * if (_1 >= x_3(D))
[PATCH v2] RISC-V: Support IMM for operand 0 of ussub pattern
From: Pan Li This patch would like to allow IMM for the operand 0 of ussub pattern. Aka .SAT_SUB(1023, y) as the below example. Form 1: #define DEF_SAT_U_SUB_IMM_FMT_1(T, IMM) \ T __attribute__((noinline)) \ sat_u_sub_imm##IMM##_##T##_fmt_1 (T y) \ { \ return (T)IMM >= y ? (T)IMM - y : 0; \ } DEF_SAT_U_SUB_IMM_FMT_1(uint64_t, 1023) Before this patch: 10 │ sat_u_sub_imm82_uint64_t_fmt_1: 11 │ li a5,82 12 │ bgtua0,a5,.L3 13 │ sub a0,a5,a0 14 │ ret 15 │ .L3: 16 │ li a0,0 17 │ ret After this patch: 10 │ sat_u_sub_imm82_uint64_t_fmt_1: 11 │ li a5,82 12 │ sltua4,a5,a0 13 │ addia4,a4,-1 14 │ sub a0,a5,a0 15 │ and a0,a4,a0 16 │ ret The below test suites are passed for this patch: 1. The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_gen_unsigned_xmode_reg): Add new func impl to gen xmode rtx reg from operand rtx. (riscv_expand_ussub): Gen xmode reg for operand 1. * config/riscv/riscv.md: Allow const_int for operand 1. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macro. * gcc.target/riscv/sat_u_sub_imm-1.c: New test. * gcc.target/riscv/sat_u_sub_imm-1_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-1_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-2.c: New test. * gcc.target/riscv/sat_u_sub_imm-2_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-2_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-3.c: New test. * gcc.target/riscv/sat_u_sub_imm-3_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-3_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-4.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-1.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-2.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-3.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-4.c: New test. Signed-off-by: Pan Li --- gcc/config/riscv/riscv.cc | 51 - gcc/config/riscv/riscv.md | 2 +- gcc/testsuite/gcc.target/riscv/sat_arith.h| 10 .../gcc.target/riscv/sat_u_sub_imm-1.c| 20 +++ .../gcc.target/riscv/sat_u_sub_imm-1_1.c | 20 +++ .../gcc.target/riscv/sat_u_sub_imm-1_2.c | 20 +++ .../gcc.target/riscv/sat_u_sub_imm-2.c| 21 +++ .../gcc.target/riscv/sat_u_sub_imm-2_1.c | 21 +++ .../gcc.target/riscv/sat_u_sub_imm-2_2.c | 22 .../gcc.target/riscv/sat_u_sub_imm-3.c| 20 +++ .../gcc.target/riscv/sat_u_sub_imm-3_1.c | 21 +++ .../gcc.target/riscv/sat_u_sub_imm-3_2.c | 22 .../gcc.target/riscv/sat_u_sub_imm-4.c| 19 +++ .../gcc.target/riscv/sat_u_sub_imm-run-1.c| 56 +++ .../gcc.target/riscv/sat_u_sub_imm-run-2.c| 56 +++ .../gcc.target/riscv/sat_u_sub_imm-run-3.c| 55 ++ .../gcc.target/riscv/sat_u_sub_imm-run-4.c| 48 17 files changed, 482 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-4.c diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index b19d56149e7..5e4e9722729 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11612,6 +11612,55 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y) emit_move_insn (dest, gen_lowpart (mode, xmode_dest)); } +/* Generate a REG rtx of Xmode from the given rtx and mode. + The rtx x can be REG (QI/HI/SI/DI) or const_int. + The machine_mode mode is the original mode from define pattern. + + If rtx is REG, the gen_lowpart of Xmode will be returned. + + If rtx is const_int, a new REG rtx will be created to hold the value of + const_int and then returned. + + According to the gccint doc, the constants generated for modes with fewer + bits than in HOST_WIDE_IN
[PATCH v1] Match: Add type_has_mode_precision_p check for SAT_TRUNC [PR116202]
From: Pan Li The .SAT_TRUNC matching can only perform the type has its mode precision. g_12 = (long unsigned int) _2; _13 = MIN_EXPR ; _3 = (_Bool) _13; The above pattern cannot be recog as .SAT_TRUNC (g_12) because the dest only has 1 bit precision but QImode. Aka the type doesn't have the mode precision. Thus, add the type_has_mode_precision_p for the dest to avoid such case. The below tests are passed for this patch. 1. The rv64gcv fully regression tests. 2. The x86 bootstrap tests. 3. The x86 fully regression tests. PR target/116202 gcc/ChangeLog: * match.pd: Add type_has_mode_precision_p for the dest type of the .SAT_TRUNC matching. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr116202-run-1.c: New test. Signed-off-by: Pan Li --- gcc/match.pd | 6 +++-- .../riscv/rvv/base/pr116202-run-1.c | 24 +++ 2 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr116202-run-1.c diff --git a/gcc/match.pd b/gcc/match.pd index c9c8478d286..dfa0bba3908 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3283,7 +3283,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) wide_int trunc_max = wi::mask (otype_precision, false, itype_precision); wide_int int_cst = wi::to_wide (@1, itype_precision); } - (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst)) + (if (type_has_mode_precision_p (type) && otype_precision < itype_precision + && wi::eq_p (trunc_max, int_cst)) /* Unsigned saturation truncate, case 2, sizeof (WT) > sizeof (NT). SAT_U_TRUNC = (NT)(MIN_EXPR (X, 255)). */ @@ -3309,7 +3310,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) wide_int trunc_max = wi::mask (otype_precision, false, itype_precision); wide_int int_cst = wi::to_wide (@1, itype_precision); } - (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst)) + (if (type_has_mode_precision_p (type) && otype_precision < itype_precision + && wi::eq_p (trunc_max, int_cst)) /* x > y && x != XXX_MIN --> x > y x > y && x == XXX_MIN --> false . */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr116202-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116202-run-1.c new file mode 100644 index 000..d150f20b5d9 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116202-run-1.c @@ -0,0 +1,24 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -march=rv64gcv_zvl256b -fdump-rtl-expand-details" } */ + +int b[24]; +_Bool c[24]; + +int main() { + for (int f = 0; f < 4; ++f) +b[f] = 6; + + for (int f = 0; f < 24; f += 4) +c[f] = ({ + int g = ({ +unsigned long g = -b[f]; +1 < g ? 1 : g; + }); + g; +}); + + if (c[0] != 1) +__builtin_abort (); +} + +/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */ -- 2.43.0
[PATCH v1] RISC-V: Support IMM for operand 0 of ussub pattern
From: Pan Li This patch would like to allow IMM for the operand 0 of ussub pattern. Aka .SAT_SUB(1023, y) as the below example. Form 1: #define DEF_SAT_U_SUB_IMM_FMT_1(T, IMM) \ T __attribute__((noinline)) \ sat_u_sub_imm##IMM##_##T##_fmt_1 (T y) \ { \ return (T)IMM >= y ? (T)IMM - y : 0; \ } DEF_SAT_U_SUB_IMM_FMT_1(uint64_t, 1023) Before this patch: 10 │ sat_u_sub_imm82_uint64_t_fmt_1: 11 │ li a5,82 12 │ bgtua0,a5,.L3 13 │ sub a0,a5,a0 14 │ ret 15 │ .L3: 16 │ li a0,0 17 │ ret After this patch: 10 │ sat_u_sub_imm82_uint64_t_fmt_1: 11 │ li a5,82 12 │ sltua4,a5,a0 13 │ addia4,a4,-1 14 │ sub a0,a5,a0 15 │ and a0,a4,a0 16 │ ret The below test suites are passed for this patch: 1. The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_gen_unsigned_xmode_reg): Add new func impl to gen xmode rtx reg. (riscv_expand_ussub): Gen xmode reg for operand 1. * config/riscv/riscv.md: Allow const_int for operand 1. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: * gcc.target/riscv/sat_u_sub_imm-1.c: New test. * gcc.target/riscv/sat_u_sub_imm-1_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-1_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-2.c: New test. * gcc.target/riscv/sat_u_sub_imm-2_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-2_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-3.c: New test. * gcc.target/riscv/sat_u_sub_imm-3_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-3_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-4.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-1.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-2.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-3.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-4.c: New test. Signed-off-by: Pan Li --- gcc/config/riscv/riscv.cc | 45 ++- gcc/config/riscv/riscv.md | 2 +- gcc/testsuite/gcc.target/riscv/sat_arith.h| 10 .../gcc.target/riscv/sat_u_sub_imm-1.c| 20 +++ .../gcc.target/riscv/sat_u_sub_imm-1_1.c | 20 +++ .../gcc.target/riscv/sat_u_sub_imm-1_2.c | 20 +++ .../gcc.target/riscv/sat_u_sub_imm-2.c| 21 +++ .../gcc.target/riscv/sat_u_sub_imm-2_1.c | 21 +++ .../gcc.target/riscv/sat_u_sub_imm-2_2.c | 22 .../gcc.target/riscv/sat_u_sub_imm-3.c| 20 +++ .../gcc.target/riscv/sat_u_sub_imm-3_1.c | 21 +++ .../gcc.target/riscv/sat_u_sub_imm-3_2.c | 22 .../gcc.target/riscv/sat_u_sub_imm-4.c| 19 +++ .../gcc.target/riscv/sat_u_sub_imm-run-1.c| 56 +++ .../gcc.target/riscv/sat_u_sub_imm-run-2.c| 56 +++ .../gcc.target/riscv/sat_u_sub_imm-run-3.c| 55 ++ .../gcc.target/riscv/sat_u_sub_imm-run-4.c| 48 17 files changed, 476 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-4.c diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index b19d56149e7..90d95944ba4 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11612,6 +11612,49 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y) emit_move_insn (dest, gen_lowpart (mode, xmode_dest)); } +/* According to the gccint doc, the Constants generated for modes with fewer + bits than in HOST_WIDE_INT must be sign extended to full width. Thus we + may have some problem here when expanding unsigned pattern like ussub. + + There are 2 cases here. Take .SAT_SUB (imm, y) as example. + + 1. Case 1: .SAT_SUB (127, y) for QImode. + The imm will be (const_int 127) after expand_expr_real_1, thus we + can just move the (const_int 127) to Xmode reg without any other insn. + + 2. Case
[PATCH v1] RISC-V: Take Xmode instead of Pmode for ussub expanding
From: Pan Li The Pmode is designed for pointer, thus leverage the Xmode instead for the expanding of the ussub. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_expand_ussub): Promote to Xmode instead of Pmode. Signed-off-by: Pan Li --- gcc/config/riscv/riscv.cc | 24 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index a490b9598b0..8ece7859945 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11620,26 +11620,26 @@ void riscv_expand_ussub (rtx dest, rtx x, rtx y) { machine_mode mode = GET_MODE (dest); - rtx pmode_x = gen_lowpart (Pmode, x); - rtx pmode_y = gen_lowpart (Pmode, y); - rtx pmode_lt = gen_reg_rtx (Pmode); - rtx pmode_minus = gen_reg_rtx (Pmode); - rtx pmode_dest = gen_reg_rtx (Pmode); + rtx xmode_x = gen_lowpart (Xmode, x); + rtx xmode_y = gen_lowpart (Xmode, y); + rtx xmode_lt = gen_reg_rtx (Xmode); + rtx xmode_minus = gen_reg_rtx (Xmode); + rtx xmode_dest = gen_reg_rtx (Xmode); /* Step-1: minus = x - y */ - riscv_emit_binary (MINUS, pmode_minus, pmode_x, pmode_y); + riscv_emit_binary (MINUS, xmode_minus, xmode_x, xmode_y); /* Step-2: lt = x < y */ - riscv_emit_binary (LTU, pmode_lt, pmode_x, pmode_y); + riscv_emit_binary (LTU, xmode_lt, xmode_x, xmode_y); /* Step-3: lt = lt - 1 (lt + (-1)) */ - riscv_emit_binary (PLUS, pmode_lt, pmode_lt, CONSTM1_RTX (Pmode)); + riscv_emit_binary (PLUS, xmode_lt, xmode_lt, CONSTM1_RTX (Xmode)); - /* Step-4: pmode_dest = minus & lt */ - riscv_emit_binary (AND, pmode_dest, pmode_lt, pmode_minus); + /* Step-4: xmode_dest = minus & lt */ + riscv_emit_binary (AND, xmode_dest, xmode_lt, xmode_minus); - /* Step-5: dest = pmode_dest */ - emit_move_insn (dest, gen_lowpart (mode, pmode_dest)); + /* Step-5: dest = xmode_dest */ + emit_move_insn (dest, gen_lowpart (mode, xmode_dest)); } /* Implement the unsigned saturation truncation for int mode. -- 2.34.1
[PATCH v2] Internal-fn: Handle vector bool type for type strict match mode [PR116103]
From: Pan Li For some target like target=amdgcn-amdhsa, we need to take care of vector bool types prior to general vector mode types. Or we may have the asm check failure as below. gcc.target/gcn/cond_smax_1.c scan-assembler-times \\tv_cmp_gt_i32\\tvcc, s[0-9]+, v[0-9]+ 80 gcc.target/gcn/cond_smin_1.c scan-assembler-times \\tv_cmp_gt_i32\\tvcc, s[0-9]+, v[0-9]+ 80 gcc.target/gcn/cond_umax_1.c scan-assembler-times \\tv_cmp_gt_i32\\tvcc, s[0-9]+, v[0-9]+ 56 gcc.target/gcn/cond_umin_1.c scan-assembler-times \\tv_cmp_gt_i32\\tvcc, s[0-9]+, v[0-9]+ 56 gcc.dg/tree-ssa/loop-bound-2.c scan-tree-dump-not ivopts "zero if " The below test suites are passed for this patch. 1. The rv64gcv fully regression tests. 2. The x86 bootstrap tests. 3. The x86 fully regression tests. 4. The amdgcn test case as above. gcc/ChangeLog: * internal-fn.cc (type_strictly_matches_mode_p): Add handling for vector bool type. Signed-off-by: Pan Li --- gcc/internal-fn.cc | 10 ++ 1 file changed, 10 insertions(+) diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 8a2e07f2f96..966594a52ed 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -4171,6 +4171,16 @@ direct_internal_fn_optab (internal_fn fn) static bool type_strictly_matches_mode_p (const_tree type) { + /* The masked vector operations have both vector data operands and vector + boolean operands. The vector data operands are expected to have a vector + mode, but the vector boolean operands can be an integer mode rather than + a vector mode, depending on how TARGET_VECTORIZE_GET_MASK_MODE is + defined. PR116103. */ + if (VECTOR_BOOLEAN_TYPE_P (type) + && SCALAR_INT_MODE_P (TYPE_MODE (type)) + && TYPE_PRECISION (TREE_TYPE (type)) == 1) +return true; + if (VECTOR_TYPE_P (type)) return VECTOR_MODE_P (TYPE_MODE (type)); -- 2.34.1
[PATCH v1] Internal-fn: Handle vector bool type for type strict match mode [PR116103]
From: Pan Li For some target like target=amdgcn-amdhsa, we need to take care of vector bool types prior to general vector mode types. Or we may have the asm check failure as below. gcc.target/gcn/cond_smax_1.c scan-assembler-times \\tv_cmp_gt_i32\\tvcc, s[0-9]+, v[0-9]+ 80 gcc.target/gcn/cond_smin_1.c scan-assembler-times \\tv_cmp_gt_i32\\tvcc, s[0-9]+, v[0-9]+ 80 gcc.target/gcn/cond_umax_1.c scan-assembler-times \\tv_cmp_gt_i32\\tvcc, s[0-9]+, v[0-9]+ 56 gcc.target/gcn/cond_umin_1.c scan-assembler-times \\tv_cmp_gt_i32\\tvcc, s[0-9]+, v[0-9]+ 56 gcc.dg/tree-ssa/loop-bound-2.c scan-tree-dump-not ivopts "zero if " The below test suites are passed for this patch. 1. The rv64gcv fully regression tests. 2. The x86 bootstrap tests. 3. The x86 fully regression tests. 4. The amdgcn test case as above. gcc/ChangeLog: * internal-fn.cc (type_strictly_matches_mode_p): Add handling for vector bool type. Signed-off-by: Pan Li --- gcc/internal-fn.cc | 6 ++ 1 file changed, 6 insertions(+) diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 8a2e07f2f96..086c8be398a 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -4171,6 +4171,12 @@ direct_internal_fn_optab (internal_fn fn) static bool type_strictly_matches_mode_p (const_tree type) { + /* For target=amdgcn-amdhsa, we need to take care of vector bool types. + More details see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116103. */ + if (VECTOR_BOOLEAN_TYPE_P (type) && SCALAR_INT_MODE_P (TYPE_MODE (type)) +&& TYPE_PRECISION (TREE_TYPE (type)) == 1) +return true; + if (VECTOR_TYPE_P (type)) return VECTOR_MODE_P (TYPE_MODE (type)); -- 2.34.1
[PATCH v1] Widening-Mul: Try .SAT_SUB for PLUS_EXPR when one op is IMM
From: Pan Li After add the matching for .SAT_SUB when one op is IMM, there will be a new root PLUS_EXPR for the .SAT_SUB pattern. For example, Form 3: #define DEF_SAT_U_SUB_IMM_FMT_3(T, IMM) \ T __attribute__((noinline)) \ sat_u_sub_imm##IMM##_##T##_fmt_3 (T x) \ { \ return x >= IMM ? x - IMM : 0;\ } DEF_SAT_U_SUB_IMM_FMT_3(uint64_t, 11) And then we will have gimple before widening-mul as below. Thus, try the .SAT_SUB for the PLUS_EXPR. 4 │ __attribute__((noinline)) 5 │ uint64_t sat_u_sub_imm11_uint64_t_fmt_3 (uint64_t x) 6 │ { 7 │ long unsigned int _1; 8 │ uint64_t _3; 9 │ 10 │[local count: 1073741824]: 11 │ _1 = MAX_EXPR ; 12 │ _3 = _1 + 18446744073709551605; 13 │ return _3; 14 │ 15 │ } The below test suites are passed for this patch. 1. The rv64gcv fully regression tests. 2. The x86 bootstrap tests. 3. The x86 fully regression tests. gcc/ChangeLog: * tree-ssa-math-opts.cc (math_opts_dom_walker::after_dom_children): Try .SAT_SUB for PLUS_EXPR case. Signed-off-by: Pan Li --- gcc/tree-ssa-math-opts.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index ac86be8eb94..8d96a4c964b 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -6129,6 +6129,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb) case PLUS_EXPR: match_unsigned_saturation_add (&gsi, as_a (stmt)); + match_unsigned_saturation_sub (&gsi, as_a (stmt)); /* fall-through */ case MINUS_EXPR: if (!convert_plusminus_to_widen (&gsi, stmt, code)) -- 2.34.1
[PATCH v1] Match: Support .SAT_SUB with IMM op for form 1-4
From: Pan Li This patch would like to support .SAT_SUB when one of the op is IMM. Aka below 1-4 forms. Form 1: #define DEF_SAT_U_SUB_IMM_FMT_1(T, IMM) \ T __attribute__((noinline)) \ sat_u_sub_imm##IMM##_##T##_fmt_1 (T y) \ { \ return IMM >= y ? IMM - y : 0;\ } Form 2: #define DEF_SAT_U_SUB_IMM_FMT_2(T, IMM) \ T __attribute__((noinline)) \ sat_u_sub_imm##IMM##_##T##_fmt_2 (T y) \ { \ return IMM > y ? IMM - y : 0; \ } Form 3: #define DEF_SAT_U_SUB_IMM_FMT_3(T, IMM) \ T __attribute__((noinline)) \ sat_u_sub_imm##IMM##_##T##_fmt_3 (T x) \ { \ return x >= IMM ? x - IMM : 0;\ } Form 4: #define DEF_SAT_U_SUB_IMM_FMT_4(T, IMM) \ T __attribute__((noinline)) \ sat_u_sub_imm##IMM##_##T##_fmt_4 (T x) \ { \ return x > IMM ? x - IMM : 0; \ } Take below form 1 as example: DEF_SAT_U_SUB_OP0_IMM_FMT_1(uint32_t, 11) Before this patch: 4 │ __attribute__((noinline)) 5 │ uint64_t sat_u_sub_imm11_uint64_t_fmt_1 (uint64_t y) 6 │ { 7 │ uint64_t _1; 8 │ uint64_t _3; 9 │ 10 │ ;; basic block 2, loop depth 0 11 │ ;;pred: ENTRY 12 │ if (y_2(D) <= 11) 13 │ goto ; [50.00%] 14 │ else 15 │ goto ; [50.00%] 16 │ ;;succ: 3 17 │ ;;4 18 │ 19 │ ;; basic block 3, loop depth 0 20 │ ;;pred: 2 21 │ _3 = 11 - y_2(D); 22 │ ;;succ: 4 23 │ 24 │ ;; basic block 4, loop depth 0 25 │ ;;pred: 2 26 │ ;;3 27 │ # _1 = PHI <0(2), _3(3)> 28 │ return _1; 29 │ ;;succ: EXIT 30 │ 31 │ } After this patch: 4 │ __attribute__((noinline)) 5 │ uint64_t sat_u_sub_imm11_uint64_t_fmt_1 (uint64_t y) 6 │ { 7 │ uint64_t _1; 8 │ 9 │ ;; basic block 2, loop depth 0 10 │ ;;pred: ENTRY 11 │ _1 = .SAT_SUB (11, y_2(D)); [tail call] 12 │ return _1; 13 │ ;;succ: EXIT 14 │ 15 │ } The below test suites are passed for this patch: 1. The rv64gcv fully regression tests. 2. The x86 bootstrap tests. 3. The x86 fully regression tests. gcc/ChangeLog: * match.pd: Add case 9 and case 10 for .SAT_SUB when one of the op is IMM. Signed-off-by: Pan Li --- gcc/match.pd | 35 +++ 1 file changed, 35 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index cf359b0ec0f..b2e7d61790d 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3234,6 +3234,41 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) && types_match (type, @0, @1 +/* Unsigned saturation sub with op_0 imm, case 9 (branch with gt): + SAT_U_SUB = IMM > Y ? (IMM - Y) : 0. + = IMM >= Y ? (IMM - Y) : 0. */ +(match (unsigned_integer_sat_sub @0 @1) + (cond^ (le @1 INTEGER_CST@2) (minus INTEGER_CST@0 @1) integer_zerop) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && types_match (type, @1)) + (with + { + unsigned precision = TYPE_PRECISION (type); + wide_int max = wi::mask (precision, false, precision); + wide_int c0 = wi::to_wide (@0); + wide_int c2 = wi::to_wide (@2); + wide_int c2_add_1 = wi::add (c2, wi::uhwi (1, precision)); + bool equal_p = wi::eq_p (c0, c2); + bool less_than_1_p = !wi::eq_p (c2, max) && wi::eq_p (c2_add_1, c0); + } + (if (equal_p || less_than_1_p) + +/* Unsigned saturation sub with op_1 imm, case 10: + SAT_U_SUB = X > IMM ? (X - IMM) : 0. + = X >= IMM ? (X - IMM) : 0. */ +(match (unsigned_integer_sat_sub @0 @1) + (plus (max @0 INTEGER_CST@1) INTEGER_CST@2) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && types_match (type, @1)) + (with + { + unsigned precision = TYPE_PRECISION (type); + wide_int c1 = wi::to_wide (@1); + wide_int c2 = wi::to_wide (@2); + wide_int sum = wi::add (c1, c2); + } + (if (wi::eq_p (sum, wi::uhwi (0, precision))) + /* Unsigned saturation truncate, case 1, sizeof (WT) > sizeof (NT). SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))). */ (match (unsigned_integer_sat_trunc @0) -- 2.34.1
[PATCH v1] RISC-V: Implement the quad and oct .SAT_TRUNC for scalar
From: Pan Li This patch would like to implement the quad and oct .SAT_TRUNC pattern in the riscv backend. Aka: Form 1: #define DEF_SAT_U_TRUC_FMT_1(NT, WT) \ NT __attribute__((noinline)) \ sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \ {\ bool overflow = x > (WT)(NT)(-1); \ return ((NT)x) | (NT)-overflow;\ } DEF_SAT_U_TRUC_FMT_1(uint16_t, uint64_t) Before this patch: 4 │ __attribute__((noinline)) 5 │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x) 6 │ { 7 │ _Bool overflow; 8 │ short unsigned int _1; 9 │ short unsigned int _2; 10 │ short unsigned int _3; 11 │ uint16_t _6; 12 │ 13 │ ;; basic block 2, loop depth 0 14 │ ;;pred: ENTRY 15 │ overflow_5 = x_4(D) > 65535; 16 │ _1 = (short unsigned int) x_4(D); 17 │ _2 = (short unsigned int) overflow_5; 18 │ _3 = -_2; 19 │ _6 = _1 | _3; 20 │ return _6; 21 │ ;;succ: EXIT 22 │ 23 │ } After this patch: 3 │ 4 │ __attribute__((noinline)) 5 │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x) 6 │ { 7 │ uint16_t _6; 8 │ 9 │ ;; basic block 2, loop depth 0 10 │ ;;pred: ENTRY 11 │ _6 = .SAT_TRUNC (x_4(D)); [tail call] 12 │ return _6; 13 │ ;;succ: EXIT 14 │ 15 │ } The below tests suites are passed for this patch 1. The rv64gcv fully regression test. 2. The rv64gcv build with glibc gcc/ChangeLog: * config/riscv/iterators.md (ANYI_QUAD_TRUNC): New iterator for quad truncation. (ANYI_OCT_TRUNC): New iterator for oct truncation. (ANYI_QUAD_TRUNCATED): New attr for truncated quad modes. (ANYI_OCT_TRUNCATED): New attr for truncated oct modes. (anyi_quad_truncated): Ditto but for lower case. (anyi_oct_truncated): Ditto but for lower case. * config/riscv/riscv.md (ustrunc2): Add new pattern for quad truncation. (ustrunc2): Ditto but for oct. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: Adjust the expand dump check times. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: Ditto. * gcc.target/riscv/sat_arith_data.h: Add test helper macros. * gcc.target/riscv/sat_u_trunc-4.c: New test. * gcc.target/riscv/sat_u_trunc-5.c: New test. * gcc.target/riscv/sat_u_trunc-6.c: New test. * gcc.target/riscv/sat_u_trunc-run-4.c: New test. * gcc.target/riscv/sat_u_trunc-run-5.c: New test. * gcc.target/riscv/sat_u_trunc-run-6.c: New test. Signed-off-by: Pan Li --- gcc/config/riscv/iterators.md | 20 gcc/config/riscv/riscv.md | 20 .../rvv/autovec/unop/vec_sat_u_trunc-2.c | 2 +- .../rvv/autovec/unop/vec_sat_u_trunc-3.c | 2 +- .../gcc.target/riscv/sat_arith_data.h | 51 +++ .../gcc.target/riscv/sat_u_trunc-4.c | 17 +++ .../gcc.target/riscv/sat_u_trunc-5.c | 17 +++ .../gcc.target/riscv/sat_u_trunc-6.c | 20 .../gcc.target/riscv/sat_u_trunc-run-4.c | 16 ++ .../gcc.target/riscv/sat_u_trunc-run-5.c | 16 ++ .../gcc.target/riscv/sat_u_trunc-run-6.c | 16 ++ 11 files changed, 195 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md index 734da041f0c..bdcdb8babc8 100644 --- a/gcc/config/riscv/iterators.md +++ b/gcc/config/riscv/iterators.md @@ -67,14 +67,34 @@ (define_mode_iterator ANYI [QI HI SI (DI "TARGET_64BIT")]) (define_mode_iterator ANYI_DOUBLE_TRUNC [HI SI (DI "TARGET_64BIT")]) +(define_mode_iterator ANYI_QUAD_TRUNC [SI (DI "TARGET_64BIT")]) + +(define_mode_iterator ANYI_OCT_TRUNC [(DI "TARGET_64BIT")]) + (define_mode_attr ANYI_DOUBLE_TRUNCATED [ (HI "QI") (SI "HI") (DI "SI") ]) +(define_mode_attr ANYI_QUAD_TRUNCATED [ + (SI "QI") (DI "HI") +]) + +(define_mode_attr ANYI_OCT_TRUNCATED [ + (DI "QI") +]) + (define_mode_attr anyi_double_truncated [ (HI "qi") (SI "hi") (DI "si") ]) +(define_mode_attr anyi_quad_truncated [ + (SI "qi") (DI "hi") +]) + +(define_mode_attr anyi_oct_truncated [ + (DI "qi") +]) + ;; Iterator for hardware-supported floating-point modes. (define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT || TARGET_ZFINX") (DF "TARGET_DOUBLE_FLOAT || TARGET_ZDINX") diff --git a/gcc/config/riscv/riscv.m
[PATCH v1] RISC-V: Rearrange the test helper files for vector .SAT_*
From: Pan Li Rearrange the test help header files, as well as align the name conventions. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary.h: Move to... * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vvv_run.h: ...here. * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_scalar.h: Move to... * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vvx_run.h: ...here. * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vx.h: Move to... * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vx_run.h: ...here. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: Adjust the include file names. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-12.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-9.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-1.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-10.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-11.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-12.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-13.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-16.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-17.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-18.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-19.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-20.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-21.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-23.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-24.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-25.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-26.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-27.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-28.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-29.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-30.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-31.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-32.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-4.c: Ditto. * gcc.target/riscv/rvv/au
[PATCH v2] Internal-fn: Only allow type matches mode for internal fn[PR115961]
From: Pan Li The direct_internal_fn_supported_p has no restrictions for the type modes. For example the bitfield like below will be recog as .SAT_TRUNC. struct e { unsigned pre : 12; unsigned a : 4; }; __attribute__((noipa)) void bug (e * v, unsigned def, unsigned use) { e & defE = *v; defE.a = min_u (use + 1, 0xf); } This patch would like to check strictly for the direct_internal_fn_supported_p, and only allows the type matches mode for ifn type tree pair. The below test suites are passed for this patch: 1. The rv64gcv fully regression tests. 2. The x86 bootstrap tests. 3. The x86 fully regression tests. PR target/115961 gcc/ChangeLog: * internal-fn.cc (type_strictly_matches_mode_p): Add new func impl to check type strictly matches mode or not. (type_pair_strictly_matches_mode_p): Ditto but for tree type pair. (direct_internal_fn_supported_p): Add above check for the tree type pair. gcc/testsuite/ChangeLog: * g++.target/i386/pr115961-run-1.C: New test. * g++.target/riscv/rvv/base/pr115961-run-1.C: New test. Signed-off-by: Pan Li --- gcc/internal-fn.cc| 32 + .../g++.target/i386/pr115961-run-1.C | 34 +++ .../riscv/rvv/base/pr115961-run-1.C | 34 +++ 3 files changed, 100 insertions(+) create mode 100644 gcc/testsuite/g++.target/i386/pr115961-run-1.C create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 95946bfd683..5c21249318e 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -4164,6 +4164,35 @@ direct_internal_fn_optab (internal_fn fn) gcc_unreachable (); } +/* Return true if TYPE's mode has the same format as TYPE, and if there is + a 1:1 correspondence between the values that the mode can store and the + values that the type can store. */ + +static bool +type_strictly_matches_mode_p (const_tree type) +{ + if (VECTOR_TYPE_P (type)) +return VECTOR_MODE_P (TYPE_MODE (type)); + + if (INTEGRAL_TYPE_P (type)) +return type_has_mode_precision_p (type); + + if (SCALAR_FLOAT_TYPE_P (type) || COMPLEX_FLOAT_TYPE_P (type)) +return true; + + return false; +} + +/* Return true if both the first and the second type of tree pair are + strictly matches their modes, or return false. */ + +static bool +type_pair_strictly_matches_mode_p (tree_pair type_pair) +{ + return type_strictly_matches_mode_p (type_pair.first) +&& type_strictly_matches_mode_p (type_pair.second); +} + /* Return true if FN is supported for the types in TYPES when the optimization type is OPT_TYPE. The types are those associated with the "type0" and "type1" fields of FN's direct_internal_fn_info @@ -4173,6 +4202,9 @@ bool direct_internal_fn_supported_p (internal_fn fn, tree_pair types, optimization_type opt_type) { + if (!type_pair_strictly_matches_mode_p (types)) +return false; + switch (fn) { #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) \ diff --git a/gcc/testsuite/g++.target/i386/pr115961-run-1.C b/gcc/testsuite/g++.target/i386/pr115961-run-1.C new file mode 100644 index 000..b8c8aef3b17 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr115961-run-1.C @@ -0,0 +1,34 @@ +/* PR target/115961 */ +/* { dg-do run } */ +/* { dg-options "-O3 -fdump-rtl-expand-details" } */ + +struct e +{ + unsigned pre : 12; + unsigned a : 4; +}; + +static unsigned min_u (unsigned a, unsigned b) +{ + return (b < a) ? b : a; +} + +__attribute__((noipa)) +void bug (e * v, unsigned def, unsigned use) { + e & defE = *v; + defE.a = min_u (use + 1, 0xf); +} + +__attribute__((noipa, optimize(0))) +int main(void) +{ + e v = { 0xded, 3 }; + + bug(&v, 32, 33); + + if (v.a != 0xf) +__builtin_abort (); + + return 0; +} +/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */ diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C b/gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C new file mode 100644 index 000..b8c8aef3b17 --- /dev/null +++ b/gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C @@ -0,0 +1,34 @@ +/* PR target/115961 */ +/* { dg-do run } */ +/* { dg-options "-O3 -fdump-rtl-expand-details" } */ + +struct e +{ + unsigned pre : 12; + unsigned a : 4; +}; + +static unsigned min_u (unsigned a, unsigned b) +{ + return (b < a) ? b : a; +} + +__attribute__((noipa)) +void bug (e * v, unsigned def, unsigned use) { + e & defE = *v; + defE.a = min_u (use + 1, 0xf); +} + +__attribute__((noipa, optimize(0))) +int main(void) +{ + e v = { 0xded, 3 }; + + bug(&v, 32, 33); + + if (v.a != 0xf) +__builtin_abort (); + + return 0; +} +/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */ -- 2.34.1
[PATCH v1] Internal-fn: Only allow modes describe types for internal fn[PR115961]
From: Pan Li The direct_internal_fn_supported_p has no restrictions for the type modes. For example the bitfield like below will be recog as .SAT_TRUNC. struct e { unsigned pre : 12; unsigned a : 4; }; __attribute__((noipa)) void bug (e * v, unsigned def, unsigned use) { e & defE = *v; defE.a = min_u (use + 1, 0xf); } This patch would like to add checks for the direct_internal_fn_supported_p, and only allows the tree types describled by modes. The below test suites are passed for this patch: 1. The rv64gcv fully regression tests. 2. The x86 bootstrap tests. 3. The x86 fully regression tests. PR target/115961 gcc/ChangeLog: * internal-fn.cc (mode_describle_type_precision_p): Add new func impl to check if mode describle the tree type. (direct_internal_fn_supported_p): Add above check for the first and second tree type of tree pair. gcc/testsuite/ChangeLog: * g++.target/i386/pr115961-run-1.C: New test. * g++.target/riscv/rvv/base/pr115961-run-1.C: New test. Signed-off-by: Pan Li --- gcc/internal-fn.cc| 21 .../g++.target/i386/pr115961-run-1.C | 34 +++ .../riscv/rvv/base/pr115961-run-1.C | 34 +++ 3 files changed, 89 insertions(+) create mode 100644 gcc/testsuite/g++.target/i386/pr115961-run-1.C create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 95946bfd683..4dc69264a24 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -4164,6 +4164,23 @@ direct_internal_fn_optab (internal_fn fn) gcc_unreachable (); } +/* Return true if the mode describes the precision of tree type, or false. */ + +static bool +mode_describle_type_precision_p (const_tree type) +{ + if (VECTOR_TYPE_P (type)) +return VECTOR_MODE_P (TYPE_MODE (type)); + + if (INTEGRAL_TYPE_P (type)) +return type_has_mode_precision_p (type); + + if (SCALAR_FLOAT_TYPE_P (type) || COMPLEX_FLOAT_TYPE_P (type)) +return true; + + return false; +} + /* Return true if FN is supported for the types in TYPES when the optimization type is OPT_TYPE. The types are those associated with the "type0" and "type1" fields of FN's direct_internal_fn_info @@ -4173,6 +4190,10 @@ bool direct_internal_fn_supported_p (internal_fn fn, tree_pair types, optimization_type opt_type) { + if (!mode_describle_type_precision_p (types.first) +|| !mode_describle_type_precision_p (types.second)) +return false; + switch (fn) { #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) \ diff --git a/gcc/testsuite/g++.target/i386/pr115961-run-1.C b/gcc/testsuite/g++.target/i386/pr115961-run-1.C new file mode 100644 index 000..b8c8aef3b17 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr115961-run-1.C @@ -0,0 +1,34 @@ +/* PR target/115961 */ +/* { dg-do run } */ +/* { dg-options "-O3 -fdump-rtl-expand-details" } */ + +struct e +{ + unsigned pre : 12; + unsigned a : 4; +}; + +static unsigned min_u (unsigned a, unsigned b) +{ + return (b < a) ? b : a; +} + +__attribute__((noipa)) +void bug (e * v, unsigned def, unsigned use) { + e & defE = *v; + defE.a = min_u (use + 1, 0xf); +} + +__attribute__((noipa, optimize(0))) +int main(void) +{ + e v = { 0xded, 3 }; + + bug(&v, 32, 33); + + if (v.a != 0xf) +__builtin_abort (); + + return 0; +} +/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */ diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C b/gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C new file mode 100644 index 000..b8c8aef3b17 --- /dev/null +++ b/gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C @@ -0,0 +1,34 @@ +/* PR target/115961 */ +/* { dg-do run } */ +/* { dg-options "-O3 -fdump-rtl-expand-details" } */ + +struct e +{ + unsigned pre : 12; + unsigned a : 4; +}; + +static unsigned min_u (unsigned a, unsigned b) +{ + return (b < a) ? b : a; +} + +__attribute__((noipa)) +void bug (e * v, unsigned def, unsigned use) { + e & defE = *v; + defE.a = min_u (use + 1, 0xf); +} + +__attribute__((noipa, optimize(0))) +int main(void) +{ + e v = { 0xded, 3 }; + + bug(&v, 32, 33); + + if (v.a != 0xf) +__builtin_abort (); + + return 0; +} +/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */ -- 2.34.1