From: Pan Li <[email protected]>
There are sorts of forms for the unsigned SAT_ADD. Some of them are
complicated while others are cheap. This patch would like to simplify
the complicated form into the cheap ones. For example as below:
>From the form 6 (branch):
SUM = ADD_OVERFLOW (X, Y)
SAT_U_ADD = IMAGPART_EXPR (SUM) != 0 ? -1 : REALPART_EXPR (SUM)
To (branchless):
SAT_U_ADD = (X + Y) | - ((X + Y) < X).
#define T uint8_t
T sat_add_u_1 (T x, T y)
{
T ret;
return __builtin_add_overflow (x, y, &ret) != 0 ? -1 : ret;
}
Before this patch in phiopt2:
4 │ uint8_t sat_u_add_uint8_t_13 (uint8_t x, uint8_t y)
5 │ {
6 │ unsigned char _1;
7 │ unsigned char _2;
8 │ uint8_t _3;
9 │ __complex__ unsigned char _6;
10 │
11 │ <bb 2> [local count: 1073741824]:
12 │ _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
13 │ _1 = REALPART_EXPR <_6>;
14 │ _2 = IMAGPART_EXPR <_6>;
15 │ if (_2 != 0)
16 │ goto <bb 4>; [35.00%]
17 │ else
18 │ goto <bb 3>; [65.00%]
19 │
20 │ <bb 3> [local count: 697932184]:
21 │
22 │ <bb 4> [local count: 1073741824]:
23 │ # _3 = PHI <_1(3), 255(2)>
24 │ return _3;
25 │
26 │ }
After this patch:
14 │ uint8_t sat_u_add_uint8_t_13 (uint8_t x, uint8_t y)
15 │ {
16 │ unsigned char _1;
17 │ __complex__ unsigned char _6;
18 │ unsigned char _8;
19 │ _Bool _9;
20 │ unsigned char _10;
21 │ unsigned char _11;
22 │ unsigned char _12;
23 │
24 │ <bb 2> [local count: 1073741824]:
25 │ _6 = .ADD_OVERFLOW (x_4(D), y_5(D)); // Dead code
26 │ _1 = REALPART_EXPR <_6>; // Ditto
27 │ _8 = x_4(D) + y_5(D);
28 │ _9 = x_4(D) > _8;
29 │ _10 = (unsigned char) _9;
30 │ _11 = -_10;
31 │ _12 = _8 | _11;
32 │ return _12;
33 │
34 │ }
The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.
gcc/ChangeLog:
* match.pd: Remove unsigned branch form 6 for SAT_ADD, and
add simplify to branchless instead.
Signed-off-by: Pan Li <[email protected]>
---
gcc/match.pd | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/gcc/match.pd b/gcc/match.pd
index 8ed08b95bc0..c360e212340 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3165,16 +3165,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
SAT_U_ADD = IMAGPART_EXPR (SUM) == 0 ? REALPART_EXPR (SUM) : -1 */
(simplify (cond (eq (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop)
(realpart @2) integer_minus_onep)
+ (if (types_match (type, @0, @1))
+ (bit_ior (plus@3 @0 @1) (negate (convert (lt @3 @0))))))
+ /* From SUM = ADD_OVERFLOW (X, Y)
+ SAT_U_ADD = IMAGPART_EXPR (SUM) != 0 ? -1 : REALPART_EXPR (SUM) */
+ (simplify (cond (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop)
+ integer_minus_onep (realpart @2))
(if (types_match (type, @0, @1))
(bit_ior (plus@3 @0 @1) (negate (convert (lt @3 @0)))))))
-/* Unsigned saturation add, case 6 (branch with ne .ADD_OVERFLOW):
- SUM = ADD_OVERFLOW (X, Y)
- SAT_U_ADD = IMAGPART_EXPR (SUM) != 0 ? -1 : REALPART_EXPR (SUM). */
-(match (unsigned_integer_sat_add @0 @1)
- (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
- integer_minus_onep (usadd_left_part_2 @0 @1)))
-
/* Unsigned saturation add, case 9 (one op is imm):
SAT_U_ADD = (X + 3) >= x ? (X + 3) : -1. */
(match (unsigned_integer_sat_add @0 @1)
--
2.43.0