Hi,
Following is version 3 of the patch proposed for master aiming to fix
PR104116. This has been bootstrapped and regtested on powerpc64le with
no regression failures.
Kindly review.
Thanks and regards,
Avinash Jayakar
Changed from v2:
- Correct all formatting
- Update test case with a main function and remove powerpc
specific option
- Added null initialization for pattern_stmt and def_stmt in
vect_recog_divmod_pattern
Changes from v1:
- Added new tests for checking vectorization of FLOOR_{DIV.MOD}
for multiple paths.
- Incorporated review comments to use proper vector masks and
checks for if the target supports generated code.
vect: Add vectorization logic for FLOOR_{MOD,DIV}_EXPR
Add logic in tree-vectorizer for FLOOR_MOD_EXPR and FLOOR_DIV_EXPR. As
mentioned in PR104116 the logic for
FLOOR_MOD_EXPR:
r = x %[fl] y; is
r = x % y; if (r && (x ^ y) < 0) r += y;
FLOOR_DIV_EXPR:
d = x /[fl] y; is
r = x % y; d = x / y; if (r && (x ^ y) < 0) --d;
Added a new helper function "add_code_for_floor_divmod" in
tree-vect-patterns.cc for adding compensating code for floor mod and
floor div. This function checks if target supports all required
operations required to implement floor_{div,mod} and generates
vectorized code for the respective operations. A pseudocode of generated
code is given below:
v0 = x^y
v1 = -r
v2 = r | -r (if r!=0, then v2 < 0)
v3 = v0 & v2
v4 = v3 < 0 (equivalent to (r && (x ^ y) < 0))
if floor_mod
v5 = v4 ? y : 0
v6 = r + v5 (final result)
else if floor_div
v5 = v4 ? 1 : 0
v6 = d - 1 (final result)
Added tests to check vectorization in all paths
1. If operand1 == 2
2. If operand1 == power of 2
3. If operand1 != power of 2
2025-09-24 Avinash Jayakar <[email protected]>
gcc/ChangeLog:
PR vect/104116
* tree-vect-patterns.cc (add_code_for_floor_divmod): Helper to
generate vect code for floor_divmod.
(vect_recog_divmod_pattern): Added patterns for
floor_{div,mod}_expr.
gcc/testsuite/ChangeLog:
PR vect/104116
* gcc.dg/vect/pr104116-floor-divmod.c: New test.
---
.../gcc.dg/vect/pr104116-floor-divmod.c | 83 ++++++++
gcc/tree-vect-patterns.cc | 187 ++++++++++++++++--
2 files changed, 255 insertions(+), 15 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-floor-divmod.c
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-divmod.c
b/gcc/testsuite/gcc.dg/vect/pr104116-floor-divmod.c
new file mode 100644
index 00000000000..878d30ee803
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-divmod.c
@@ -0,0 +1,83 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+#define N 1024
+
+int arr[N];
+
+#include "tree-vect.h"
+
+#define TEST_FN(OP, CONST, NAME) void __GIMPLE (ssa,guessed_local(10737416)) \
+NAME (int * a) \
+{ \
+ int i; \
+ long unsigned int _1; \
+ long unsigned int _2; \
+ int * _3; \
+ int _4; \
+ int _5; \
+ unsigned int _12; \
+ unsigned int _13; \
+ \
+ __BB(2,guessed_local(10737416)): \
+ goto __BB3(precise(134217728)); \
+ \
+ __BB(3,loop_header(1),guessed_local(1063004408)): \
+ i_14 = __PHI (__BB5: i_11, __BB2: 0); \
+ _13 = __PHI (__BB5: _12, __BB2: 512u); \
+ _1 = (long unsigned int) i_14; \
+ _2 = _1 * 4ul; \
+ _3 = a_9(D) + _2; \
+ _4 = __MEM <int> (_3); \
+ _5 = _4 OP CONST; \
+ __MEM <int> (_3) = _5; \
+ i_11 = i_14 + 2; \
+ _12 = _13 - 1u; \
+ if (_12 != 0u) \
+ goto __BB5(guessed(132861994)); \
+ else \
+ goto __BB4(guessed(1355734)); \
+ \
+ __BB(5,guessed_local(1052266995)): \
+ goto __BB3(precise(134217728)); \
+ \
+ __BB(4,guessed_local(10737416)): \
+ return; \
+} \
+
+TEST_FN(%, 2, trunc_mod_2)
+TEST_FN(__FLOOR_MOD, 2, floor_mod_2)
+TEST_FN(__FLOOR_DIV, 2, floor_div_2)
+
+TEST_FN(%, 4, trunc_mod_pow2)
+TEST_FN(__FLOOR_MOD, 4, floor_mod_pow2)
+TEST_FN(__FLOOR_DIV, 4, floor_div_pow2)
+
+TEST_FN(%, 5, trunc_mod)
+TEST_FN(__FLOOR_MOD, 5, floor_mod)
+TEST_FN(__FLOOR_DIV, 5, floor_div)
+
+int main (void)
+{
+ check_vect ();
+ int * a = (int*)&arr;
+ trunc_mod_2(a);
+ floor_mod_2(a);
+ floor_div_2(a);
+
+ trunc_mod_pow2(a);
+ floor_mod_pow2(a);
+ floor_div_pow2(a);
+
+ trunc_mod(a);
+ floor_mod(a);
+ floor_div(a);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 18 "vect" }
}
+*/
+
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 782327235db..ea4a7ede425 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -4837,6 +4837,122 @@ vect_recog_sat_trunc_pattern (vec_info *vinfo,
stmt_vec_info stmt_vinfo,
return NULL;
}
+/* Function add_code_for_floor_divmod
+ *
+ * A helper function to add compensation code for implementing FLOOR_MOD_EXPR
+ * and FLOOR_DIV_EXPR.
+ * The quotient and remainder are needed for implemented these operators.
+ * r = x %[fl] y; r = x/[fl] y;
+ * is
+ * r = x % y; if (r && (x ^ y) < 0) r += y;
+ * r = x % y; d = x/y; if (r && (x ^ y) < 0) d--; Respectively
+ * Produce following sequence
+ * v0 = x^y
+ * v1 = -r
+ * v2 = r | -r
+ * v3 = v0 & v2
+ * v4 = v3 < 0 (equivalent to (r && (x ^ y) < 0))
+ * if (floor_mod)
+ * v5 = v4 ? y : 0
+ * v6 = r + v5 (final result)
+ * if (floor_div)
+ * v5 = v4 ? 1 : 0
+ * v6 = d - 1
+ * Inputs:
+ * VECTYPE: Vector type of the operands
+ * STMT_VINFO: Statement where pattern begins
+ * RHS_CODE: Should either be FLOOR_MOD_EXPR or FLOOR_DIV_EXPR
+ * Q: The quotient of division
+ * R: Remainder of division
+ * OPRDN0/OPRND1: Actual operands involved
+ * Output:
+ * NULL if vectorization not possible
+ * Gimple statement based on rhs_code
+ */
+static gimple *
+add_code_for_floor_divmod (tree vectype, vec_info* vinfo,
+ stmt_vec_info stmt_vinfo, enum tree_code rhs_code,
+ tree q, tree r, tree oprnd0, tree oprnd1, tree itype)
+{
+ gimple *def_stmt;
+ tree mask_vectype = truth_type_for (vectype);
+ if (!mask_vectype)
+ return NULL;
+ if (!target_has_vecop_for_code (NEGATE_EXPR, vectype)
+ || !target_has_vecop_for_code (BIT_XOR_EXPR, vectype)
+ || !target_has_vecop_for_code (BIT_IOR_EXPR, vectype)
+ || !target_has_vecop_for_code (PLUS_EXPR, vectype)
+ || !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR)
+ || !expand_vec_cond_expr_p (vectype, mask_vectype))
+ return NULL;
+
+
+ // r = x %[fl] y;
+ // is
+ // r = x % y; if (r && (x ^ y) < 0) r += y;
+ // Produce following sequence
+ // v0 = x^y
+ // v1 = -r
+ // v2 = r | -r
+ // v3 = v0 & v2
+ // v4 = v3 < 0 (equivalent to (r && (x ^ y) < 0))
+ // v5 = v4 ? y : 0
+ // v6 = r + v5 (final result)
+ tree cond_reg = vect_recog_temp_ssa_var (itype, NULL);
+ def_stmt = gimple_build_assign (cond_reg, BIT_XOR_EXPR, oprnd0, oprnd1);
+ append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+ // -r
+ tree negate_r = vect_recog_temp_ssa_var (itype, NULL);
+ def_stmt = gimple_build_assign (negate_r, NEGATE_EXPR, r);
+ append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+ // r | -r , sign bit is set if r!=0
+ tree r_or_negr = vect_recog_temp_ssa_var (itype, NULL);
+ def_stmt = gimple_build_assign (r_or_negr, BIT_IOR_EXPR, r, negate_r);
+ append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+ // (x^y) & (r|-r)
+ tree r_or_negr_and_xor = vect_recog_temp_ssa_var (itype, NULL);
+ def_stmt = gimple_build_assign (r_or_negr_and_xor, BIT_AND_EXPR, r_or_negr,
+ cond_reg);
+ append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+ // (x^y) & (r|-r) < 0 which is equivalent to (x^y < 0 && r!=0)
+ tree bool_cond = vect_recog_temp_ssa_var (boolean_type_node,NULL);
+ def_stmt = gimple_build_assign (bool_cond, LT_EXPR, r_or_negr_and_xor,
+ build_int_cst (itype, 0));
+ append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
+ mask_vectype, itype);
+
+ if (rhs_code == FLOOR_MOD_EXPR)
+ {
+ // (x^y < 0 && r) ? y : 0
+ tree extr_cond = vect_recog_temp_ssa_var (itype, NULL);
+ def_stmt = gimple_build_assign (extr_cond, COND_EXPR, bool_cond, oprnd1,
+ build_int_cst (itype, 0));
+ append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+ // r += (x ^ y < 0 && r) ? y : 0
+ tree floor_mod_r = vect_recog_temp_ssa_var (itype, NULL);
+ return gimple_build_assign (floor_mod_r, PLUS_EXPR, r, extr_cond);
+ }
+ else if (rhs_code == FLOOR_DIV_EXPR)
+ {
+ // (x^y < 0 && r) ? 1 : 0
+ tree extr_cond = vect_recog_temp_ssa_var (itype, NULL);
+ def_stmt = gimple_build_assign (extr_cond, COND_EXPR, bool_cond,
+ build_int_cst (itype, 1), build_int_cst (itype, 0));
+ append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+ // q -= (x ^ y < 0 && r) ? 1 : 0
+ tree floor_mod_r = vect_recog_temp_ssa_var (itype, NULL);
+ return gimple_build_assign (floor_mod_r, MINUS_EXPR, q, extr_cond);
+ }
+ else
+ return NULL;
+}
+
/* Detect a signed division by a constant that wouldn't be
otherwise vectorized:
@@ -4881,7 +4997,8 @@ vect_recog_divmod_pattern (vec_info *vinfo,
{
gimple *last_stmt = stmt_vinfo->stmt;
tree oprnd0, oprnd1, vectype, itype, cond;
- gimple *pattern_stmt, *def_stmt;
+ gimple *pattern_stmt = NULL;
+ gimple *def_stmt = NULL;
enum tree_code rhs_code;
optab optab;
tree q, cst;
@@ -4898,6 +5015,8 @@ vect_recog_divmod_pattern (vec_info *vinfo,
case TRUNC_DIV_EXPR:
case EXACT_DIV_EXPR:
case TRUNC_MOD_EXPR:
+ case FLOOR_MOD_EXPR:
+ case FLOOR_DIV_EXPR:
break;
default:
return NULL;
@@ -4949,17 +5068,28 @@ vect_recog_divmod_pattern (vec_info *vinfo,
gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
gimple_call_set_lhs (div_stmt, var_div);
- if (rhs_code == TRUNC_MOD_EXPR)
- {
+ if (rhs_code == TRUNC_MOD_EXPR
+ || rhs_code == FLOOR_MOD_EXPR
+ || rhs_code == FLOOR_DIV_EXPR)
+ {
append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
+ tree t1 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
- = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
- LSHIFT_EXPR, var_div, shift);
+ = gimple_build_assign (t1, LSHIFT_EXPR, var_div, shift);
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
- MINUS_EXPR, oprnd0,
- gimple_assign_lhs (def_stmt));
+ MINUS_EXPR, oprnd0, t1);
+ if (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR)
+ {
+ append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
+ pattern_stmt = add_code_for_floor_divmod (vectype, vinfo, stmt_vinfo,
+ rhs_code, var_div, t1, oprnd0, oprnd1,
+ itype);
+ if (pattern_stmt == NULL)
+ return NULL;
+ }
+
}
else
pattern_stmt = div_stmt;
@@ -4973,8 +5103,10 @@ vect_recog_divmod_pattern (vec_info *vinfo,
build_int_cst (itype, 0));
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
truth_type_for (vectype), itype);
+ tree div_result = NULL_TREE;
if (rhs_code == TRUNC_DIV_EXPR
- || rhs_code == EXACT_DIV_EXPR)
+ || rhs_code == EXACT_DIV_EXPR
+ || rhs_code == FLOOR_DIV_EXPR)
{
tree var = vect_recog_temp_ssa_var (itype, NULL);
tree shift;
@@ -4991,12 +5123,18 @@ vect_recog_divmod_pattern (vec_info *vinfo,
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
shift = build_int_cst (itype, tree_log2 (oprnd1));
+ div_result = vect_recog_temp_ssa_var (itype, NULL);
pattern_stmt
- = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
+ = gimple_build_assign (div_result,
RSHIFT_EXPR, var, shift);
}
- else
+ if (rhs_code == TRUNC_MOD_EXPR
+ || rhs_code == FLOOR_MOD_EXPR
+ || rhs_code == FLOOR_DIV_EXPR)
{
+ if (rhs_code == FLOOR_DIV_EXPR)
+ append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
+
tree signmask;
if (compare_tree_int (oprnd1, 2) == 0)
{
@@ -5041,10 +5179,18 @@ vect_recog_divmod_pattern (vec_info *vinfo,
build_int_cst (itype, 1)));
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+ tree r = vect_recog_temp_ssa_var (itype, NULL);
pattern_stmt
- = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
- MINUS_EXPR, gimple_assign_lhs (def_stmt),
+ = gimple_build_assign (r, MINUS_EXPR, gimple_assign_lhs (def_stmt),
signmask);
+ if (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR)
+ {
+ append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
+ pattern_stmt = add_code_for_floor_divmod (vectype, vinfo, stmt_vinfo,
+ rhs_code, div_result, r, oprnd0, oprnd1, itype);
+ if (pattern_stmt == NULL)
+ return NULL;
+ }
}
return pattern_stmt;
@@ -5351,13 +5497,15 @@ vect_recog_divmod_pattern (vec_info *vinfo,
}
}
- if (rhs_code == TRUNC_MOD_EXPR)
+ if (rhs_code == TRUNC_MOD_EXPR
+ || rhs_code == FLOOR_MOD_EXPR
+ || rhs_code == FLOOR_DIV_EXPR)
{
tree r, t1;
/* We divided. Now finish by:
- t1 = q * oprnd1;
- r = oprnd0 - t1; */
+ t1 = q * oprnd1;
+ r = oprnd0 - t1; */
append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
t1 = vect_recog_temp_ssa_var (itype, NULL);
@@ -5366,6 +5514,15 @@ vect_recog_divmod_pattern (vec_info *vinfo,
r = vect_recog_temp_ssa_var (itype, NULL);
pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
+
+ if (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR)
+ {
+ append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
+ pattern_stmt = add_code_for_floor_divmod (vectype, vinfo, stmt_vinfo,
+ rhs_code, q, r, oprnd0, oprnd1, itype);
+ if (pattern_stmt == NULL)
+ return NULL;
+ }
}
/* Pattern detected. */
--
2.51.0