From: Pan Li <[email protected]>
This patch would like to implement the sssub for vector signed integer.
Form 1:
#define DEF_VEC_SAT_S_SUB_FMT_1(T, UT, MIN, MAX) \
void __attribute__((noinline)) \
vec_sat_s_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned limit) \
{ \
unsigned i; \
for (i = 0; i < limit; i++) \
{ \
T x = op_1[i]; \
T y = op_2[i]; \
T minus = (UT)x - (UT)y; \
out[i] = (x ^ y) >= 0 \
? minus \
: (minus ^ x) >= 0 \
? minus \
: x < 0 ? MIN : MAX; \
} \
}
DEF_VEC_SAT_S_SUB_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX)
Before this patch:
28 │ vle8.v v1,0(a1)
29 │ vle8.v v2,0(a2)
30 │ sub a3,a3,a5
31 │ add a1,a1,a5
32 │ add a2,a2,a5
33 │ vsra.vi v4,v1,7
34 │ vsub.vv v3,v1,v2
35 │ vxor.vv v2,v1,v2
36 │ vxor.vv v0,v1,v3
37 │ vmslt.vi v2,v2,0
38 │ vmslt.vi v0,v0,0
39 │ vmand.mm v0,v0,v2
40 │ vxor.vv v3,v4,v5,v0.t
41 │ vse8.v v3,0(a0)
42 │ add a0,a0,a5
After this patch:
25 │ vle8.v v1,0(a1)
26 │ vle8.v v2,0(a2)
27 │ sub a3,a3,a5
28 │ add a1,a1,a5
29 │ add a2,a2,a5
30 │ vssub.vv v1,v1,v2
31 │ vse8.v v1,0(a0)
32 │ add a0,a0,a5
The below test suites are passed for this patch.
* The rv64gcv fully regression test.
gcc/ChangeLog:
* config/riscv/autovec.md (sssub<mode>3): Add new pattern for
signed SAT_SUB.
* config/riscv/riscv-protos.h (expand_vec_sssub): Add new func
decl to expand sssub to vssub.
* config/riscv/riscv-v.cc (expand_vec_sssub): Add new func
impl to expand sssub to vssub.
Signed-off-by: Pan Li <[email protected]>
---
gcc/config/riscv/autovec.md | 11 +++++++++++
gcc/config/riscv/riscv-protos.h | 1 +
gcc/config/riscv/riscv-v.cc | 9 +++++++++
3 files changed, 21 insertions(+)
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 836cdd4491f..7dc78a48874 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2734,6 +2734,17 @@ (define_expand "ussub<mode>3"
}
)
+(define_expand "sssub<mode>3"
+ [(match_operand:V_VLSI 0 "register_operand")
+ (match_operand:V_VLSI 1 "register_operand")
+ (match_operand:V_VLSI 2 "register_operand")]
+ "TARGET_VECTOR"
+ {
+ riscv_vector::expand_vec_sssub (operands[0], operands[1], operands[2],
<MODE>mode);
+ DONE;
+ }
+)
+
(define_expand "ustrunc<mode><v_double_trunc>2"
[(match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
(match_operand:VWEXTI 1 "register_operand")]
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 1e6d10a1402..b2f5d72f494 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -649,6 +649,7 @@ void expand_vec_lfloor (rtx, rtx, machine_mode,
machine_mode);
void expand_vec_usadd (rtx, rtx, rtx, machine_mode);
void expand_vec_ssadd (rtx, rtx, rtx, machine_mode);
void expand_vec_ussub (rtx, rtx, rtx, machine_mode);
+void expand_vec_sssub (rtx, rtx, rtx, machine_mode);
void expand_vec_double_ustrunc (rtx, rtx, machine_mode);
void expand_vec_quad_ustrunc (rtx, rtx, machine_mode, machine_mode);
void expand_vec_oct_ustrunc (rtx, rtx, machine_mode, machine_mode,
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index ca3a80cceb9..fba35652cc2 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4902,6 +4902,15 @@ expand_vec_ussub (rtx op_0, rtx op_1, rtx op_2,
machine_mode vec_mode)
emit_vec_binary_alu (op_0, op_1, op_2, US_MINUS, vec_mode);
}
+/* Expand the standard name ssadd<mode>3 for vector mode, we can leverage
+ the vector fixed point vector single-width saturating add directly. */
+
+void
+expand_vec_sssub (rtx op_0, rtx op_1, rtx op_2, machine_mode vec_mode)
+{
+ emit_vec_binary_alu (op_0, op_1, op_2, SS_MINUS, vec_mode);
+}
+
/* Expand the standard name ustrunc<m><n>2 for double vector mode, like
DI => SI. we can leverage the vector fixed point vector narrowing
fixed-point clip directly. */
--
2.43.0