Re: [PATCH] Extend is_cond_scalar_reduction to handle nop_expr after/before scalar reduction.[PR98365]
On Mon, May 31, 2021 at 6:14 PM Richard Biener wrote: > > On Thu, May 27, 2021 at 9:05 AM Hongtao Liu wrote: > > > > On Wed, May 26, 2021 at 8:41 PM Richard Biener > > wrote: > > > > > > On Wed, May 26, 2021 at 7:06 AM Hongtao Liu wrote: > > > > > > > > On Tue, May 25, 2021 at 6:24 PM Richard Biener > > > > wrote: > > > > > > > > > > On Mon, May 24, 2021 at 11:52 AM Hongtao Liu > > > > > wrote: > > > > > > > > > > > > Hi: > > > > > > Details described in PR. > > > > > > Bootstrapped and regtest on > > > > > > x86_64-linux-gnu{-m32,}/x86_64-linux-gnu{-m32\ > > > > > > -march=cascadelake,-march=cascadelake} > > > > > > Ok for trunk? > > > > > > > > > > +static tree > > > > > +strip_nop_cond_scalar_reduction (bool has_nop, tree op) > > > > > +{ > > > > > + if (!has_nop) > > > > > +return op; > > > > > + > > > > > + if (TREE_CODE (op) != SSA_NAME) > > > > > +return NULL_TREE; > > > > > + > > > > > + gimple* stmt = SSA_NAME_DEF_STMT (op); > > > > > + if (!stmt > > > > > + || gimple_code (stmt) != GIMPLE_ASSIGN > > > > > + || gimple_has_volatile_ops (stmt) > > > > > + || gimple_assign_rhs_code (stmt) != NOP_EXPR) > > > > > +return NULL_TREE; > > > > > + > > > > > + return gimple_assign_rhs1 (stmt); > > > > > > > > > > this allows arbitrary conversions where the comment suggests you > > > > > only want to allow conversions to the same precision but different > > > > > sign. > > > > > Sth like > > > > > > > > > > gassign *stmt = safe_dyn_cast (SSA_NAME_DEF_STMT (op)); > > > > > if (!stmt > > > > > || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)) > > > > > || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE > > > > > (gimple_assign_rhs1 (stmt > > > > > return NULL_TREE; > > > > > > > Changed. > > > > > + if (gimple_bb (stmt) != gimple_bb (*nop_reduc) > > > > > + || gimple_code (stmt) != GIMPLE_ASSIGN > > > > > + || gimple_has_volatile_ops (stmt)) > > > > > + return false; > > > > > > > > > > !is_gimple_assign (stmt) instead of gimple_code (stmt) != > > > > > GIMPLE_ASSIGN > > > > > > > Changed. > > > > > the gimple_has_volatile_ops check is superfluous given you restrict > > > > > the assign code. > > > > > > > Changed. > > > > > + /* Check that R_NOP1 is used in nop_stmt or in PHI only. */ > > > > > + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1) > > > > > + { > > > > > + gimple *use_stmt = USE_STMT (use_p); > > > > > + if (is_gimple_debug (use_stmt)) > > > > > + continue; > > > > > + if (use_stmt == SSA_NAME_DEF_STMT (r_op1)) > > > > > + continue; > > > > > + if (gimple_code (use_stmt) != GIMPLE_PHI) > > > > > + return false; > > > > > > > > > > can the last check be use_stmt == phi since we should have the > > > > > PHI readily available? > > > > > > > Yes, changed. > > > > > @@ -1735,6 +1822,23 @@ convert_scalar_cond_reduction (gimple *reduc, > > > > > gimple_stmt_iterator *gsi, > > > > >rhs = fold_build2 (gimple_assign_rhs_code (reduc), > > > > > TREE_TYPE (rhs1), op0, tmp); > > > > > > > > > > + if (has_nop) > > > > > +{ > > > > > + /* Create assignment nop_rhs = op0 +/- _ifc_. */ > > > > > + tree nop_rhs = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, > > > > > "_nop_"); > > > > > + gimple* new_assign2 = gimple_build_assign (nop_rhs, rhs); > > > > > + gsi_insert_before (gsi, new_assign2, GSI_SAME_STMT); > > > > > + /* Rebuild rhs for nop_expr. */ > > > > > + rhs = fold_build1 (NOP_EXPR, > > > > > +TREE_TYPE (gimple_assign_lhs (nop_reduc)), > > > > > +nop_rhs); > > > > > + > > > > > + /* Delete nop_reduc. */ > > > > > + stmt_it = gsi_for_stmt (nop_reduc); > > > > > + gsi_remove (_it, true); > > > > > + release_defs (nop_reduc); > > > > > +} > > > > > + > > > > > > > > > > hmm, the whole function could be cleaned up with sth like > > > > > > > > > > /* Build rhs for unconditional increment/decrement. */ > > > > > gimple_seq stmts = NULL; > > > > > rhs = gimple_build (, gimple_assing_rhs_code (reduc), > > > > > TREE_TYPE (rhs1), op0, tmp); > > > > > if (has_nop) > > > > >rhs = gimple_convert (, TREE_TYPE (gimple_assign_lhs > > > > > (nop_reduc)), rhs); > > > > > gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > > > > > > > > > > plus in the caller moving the > > > > > > > > > > new_stmt = gimple_build_assign (res, rhs); > > > > > gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); > > > > > > > > > > to the else branch as well as the folding done on new_stmt (maybe > > > > > return > > > > > new_stmt instead of rhs from convert_scalar_cond_reduction. > > > > Eventually, we needed to assign rhs to res, and with an extra mov stmt > > > > from rhs to res, the vectorizer failed. > > > > the only difference in
Re: [PATCH] Extend is_cond_scalar_reduction to handle nop_expr after/before scalar reduction.[PR98365]
On Thu, May 27, 2021 at 9:05 AM Hongtao Liu wrote: > > On Wed, May 26, 2021 at 8:41 PM Richard Biener > wrote: > > > > On Wed, May 26, 2021 at 7:06 AM Hongtao Liu wrote: > > > > > > On Tue, May 25, 2021 at 6:24 PM Richard Biener > > > wrote: > > > > > > > > On Mon, May 24, 2021 at 11:52 AM Hongtao Liu wrote: > > > > > > > > > > Hi: > > > > > Details described in PR. > > > > > Bootstrapped and regtest on > > > > > x86_64-linux-gnu{-m32,}/x86_64-linux-gnu{-m32\ > > > > > -march=cascadelake,-march=cascadelake} > > > > > Ok for trunk? > > > > > > > > +static tree > > > > +strip_nop_cond_scalar_reduction (bool has_nop, tree op) > > > > +{ > > > > + if (!has_nop) > > > > +return op; > > > > + > > > > + if (TREE_CODE (op) != SSA_NAME) > > > > +return NULL_TREE; > > > > + > > > > + gimple* stmt = SSA_NAME_DEF_STMT (op); > > > > + if (!stmt > > > > + || gimple_code (stmt) != GIMPLE_ASSIGN > > > > + || gimple_has_volatile_ops (stmt) > > > > + || gimple_assign_rhs_code (stmt) != NOP_EXPR) > > > > +return NULL_TREE; > > > > + > > > > + return gimple_assign_rhs1 (stmt); > > > > > > > > this allows arbitrary conversions where the comment suggests you > > > > only want to allow conversions to the same precision but different sign. > > > > Sth like > > > > > > > > gassign *stmt = safe_dyn_cast (SSA_NAME_DEF_STMT (op)); > > > > if (!stmt > > > > || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)) > > > > || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE > > > > (gimple_assign_rhs1 (stmt > > > > return NULL_TREE; > > > > > Changed. > > > > + if (gimple_bb (stmt) != gimple_bb (*nop_reduc) > > > > + || gimple_code (stmt) != GIMPLE_ASSIGN > > > > + || gimple_has_volatile_ops (stmt)) > > > > + return false; > > > > > > > > !is_gimple_assign (stmt) instead of gimple_code (stmt) != GIMPLE_ASSIGN > > > > > Changed. > > > > the gimple_has_volatile_ops check is superfluous given you restrict > > > > the assign code. > > > > > Changed. > > > > + /* Check that R_NOP1 is used in nop_stmt or in PHI only. */ > > > > + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1) > > > > + { > > > > + gimple *use_stmt = USE_STMT (use_p); > > > > + if (is_gimple_debug (use_stmt)) > > > > + continue; > > > > + if (use_stmt == SSA_NAME_DEF_STMT (r_op1)) > > > > + continue; > > > > + if (gimple_code (use_stmt) != GIMPLE_PHI) > > > > + return false; > > > > > > > > can the last check be use_stmt == phi since we should have the > > > > PHI readily available? > > > > > Yes, changed. > > > > @@ -1735,6 +1822,23 @@ convert_scalar_cond_reduction (gimple *reduc, > > > > gimple_stmt_iterator *gsi, > > > >rhs = fold_build2 (gimple_assign_rhs_code (reduc), > > > > TREE_TYPE (rhs1), op0, tmp); > > > > > > > > + if (has_nop) > > > > +{ > > > > + /* Create assignment nop_rhs = op0 +/- _ifc_. */ > > > > + tree nop_rhs = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, > > > > "_nop_"); > > > > + gimple* new_assign2 = gimple_build_assign (nop_rhs, rhs); > > > > + gsi_insert_before (gsi, new_assign2, GSI_SAME_STMT); > > > > + /* Rebuild rhs for nop_expr. */ > > > > + rhs = fold_build1 (NOP_EXPR, > > > > +TREE_TYPE (gimple_assign_lhs (nop_reduc)), > > > > +nop_rhs); > > > > + > > > > + /* Delete nop_reduc. */ > > > > + stmt_it = gsi_for_stmt (nop_reduc); > > > > + gsi_remove (_it, true); > > > > + release_defs (nop_reduc); > > > > +} > > > > + > > > > > > > > hmm, the whole function could be cleaned up with sth like > > > > > > > > /* Build rhs for unconditional increment/decrement. */ > > > > gimple_seq stmts = NULL; > > > > rhs = gimple_build (, gimple_assing_rhs_code (reduc), > > > > TREE_TYPE (rhs1), op0, tmp); > > > > if (has_nop) > > > >rhs = gimple_convert (, TREE_TYPE (gimple_assign_lhs > > > > (nop_reduc)), rhs); > > > > gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > > > > > > > > plus in the caller moving the > > > > > > > > new_stmt = gimple_build_assign (res, rhs); > > > > gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); > > > > > > > > to the else branch as well as the folding done on new_stmt (maybe return > > > > new_stmt instead of rhs from convert_scalar_cond_reduction. > > > Eventually, we needed to assign rhs to res, and with an extra mov stmt > > > from rhs to res, the vectorizer failed. > > > the only difference in 166t.ifcvt between successfully vectorization > > > and failed vectorization is below > > >char * _24; > > >char _25; > > >unsigned char _ifc__29; > > > + unsigned char _30; > > > > > > [local count: 118111600]: > > >if (n_10(D) != 0) > > > @@ -70,7 +71,8 @@ char foo2 (char * a, char * c, int n) > > >_5 = c_14(D) + _1; > >
Re: [PATCH] Extend is_cond_scalar_reduction to handle nop_expr after/before scalar reduction.[PR98365]
gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); > > > > > > to the else branch as well as the folding done on new_stmt (maybe return > > > new_stmt instead of rhs from convert_scalar_cond_reduction. > > Eventually, we needed to assign rhs to res, and with an extra mov stmt > > from rhs to res, the vectorizer failed. > > the only difference in 166t.ifcvt between successfully vectorization > > and failed vectorization is below > >char * _24; > >char _25; > >unsigned char _ifc__29; > > + unsigned char _30; > > > > [local count: 118111600]: > >if (n_10(D) != 0) > > @@ -70,7 +71,8 @@ char foo2 (char * a, char * c, int n) > >_5 = c_14(D) + _1; > >_6 = *_5; > >_ifc__29 = _3 == _6 ? 1 : 0; > > - cnt_7 = cnt_18 + _ifc__29; > > + _30 = cnt_18 + _ifc__29; > > + cnt_7 = _30; > >i_16 = i_20 + 1; > >if (n_10(D) != i_16) > > goto ; [89.00%] > > @@ -110,7 +112,7 @@ char foo2 (char * a, char * c, int n) > >goto ; [100.00%] > > > > [local count: 105119324]: > > - # cnt_19 = PHI > > + # cnt_19 = PHI <_30(3), cnt_27(15)> > >_21 = (char) cnt_19; > > > > if we want to eliminate the extra move, gimple_build and > > gimple_convert is not suitable since they create a new lhs, is there > > any interface like gimple_build_assign but accept stmts? > > Hmm, OK. So what you could do is replacing all uses of 'res' > with 'rhs', alternatively replacing the last generated stmt LHS > by 'res'. Both is a bit hackish of course. Usually the vectorizer > just ignores copies like this but the reduction matching is > unnecessarily strict here (but it's also a bit awkward to fix there). > > There's redundant_ssa_names which seems to be designed to > handle propagating those out and there's the do_rpo_vn run, > so I wonder why the stmt remains. Now, for redundant_ssa_names > you'd need to push a std::pair (res, rhs) to it in case rhs is an > SSA name - does that help? Yes, it worked. > > Richard. > > > > > > Richard. > > > > > > > gcc/ChangeLog: > > > > > > > > PR tree-optimization/pr98365 > > > > * tree-if-conv.c (strip_nop_cond_scalar_reduction): New > > > > function. > > > > (is_cond_scalar_reduction): Handle nop_expr in cond scalar > > > > reduction. > > > > (convert_scalar_cond_reduction): Ditto. > > > > (predicate_scalar_phi): Ditto. > > > > > > > > gcc/testsuite/ChangeLog: > > > > > > > > PR tree-optimization/pr98365 > > > > * gcc.target/i386/pr98365.c: New test. > > > > > > > > -- > > > > BR, > > > > Hongtao > > > > > > > > -- > > BR, > > Hongtao Update patch. -- BR, Hongtao From f0593d1866af6f752e70e2ef2f1477471c252536 Mon Sep 17 00:00:00 2001 From: liuhongt Date: Wed, 6 Jan 2021 16:33:27 +0800 Subject: [PATCH] Extend is_cond_scalar_reduction to handle nop_expr after/before scalar reduction.[PR98365] gcc/ChangeLog: PR tree-optimization/pr98365 * tree-if-conv.c (strip_nop_cond_scalar_reduction): New function. (is_cond_scalar_reduction): Handle nop_expr in cond scalar reduction. (convert_scalar_cond_reduction): Ditto. (predicate_scalar_phi): Ditto. gcc/testsuite/ChangeLog: PR tree-optimization/pr98365 * gcc.target/i386/pr98365.c: New test. --- gcc/testsuite/gcc.target/i386/pr98365.c | 22 gcc/tree-if-conv.c | 142 +--- 2 files changed, 146 insertions(+), 18 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr98365.c diff --git a/gcc/testsuite/gcc.target/i386/pr98365.c b/gcc/testsuite/gcc.target/i386/pr98365.c new file mode 100644 index 000..652210dcdd5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr98365.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx2 -ftree-vectorize -fdump-tree-vect-details" } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ +short foo1 (short* a, short* c, int n) +{ + int i; + short cnt=0; + for (int i = 0;i != n; i++) +if (a[i] == c[i]) + cnt++; + return cnt; +} + +char foo2 (char* a, char* c, int n) +{ + int i; + char cnt=0; + for (int i = 0;i != n; i++) +if (a[i] == c[i]) + cnt++; + return cnt; +} diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c index 716eae44a21..5eed087de36 100644 --- a/gcc/tree-if-conv.c +++ b/gcc/tree-if-conv.c @@ -1579,6 +1579,31 @@ if_convertible_loop_p (class loop *loop) return res; } +/* Return reduc_1 if has_nop. + + if (...) +
Re: [PATCH] Extend is_cond_scalar_reduction to handle nop_expr after/before scalar reduction.[PR98365]
On Wed, May 26, 2021 at 7:06 AM Hongtao Liu wrote: > > On Tue, May 25, 2021 at 6:24 PM Richard Biener > wrote: > > > > On Mon, May 24, 2021 at 11:52 AM Hongtao Liu wrote: > > > > > > Hi: > > > Details described in PR. > > > Bootstrapped and regtest on > > > x86_64-linux-gnu{-m32,}/x86_64-linux-gnu{-m32\ > > > -march=cascadelake,-march=cascadelake} > > > Ok for trunk? > > > > +static tree > > +strip_nop_cond_scalar_reduction (bool has_nop, tree op) > > +{ > > + if (!has_nop) > > +return op; > > + > > + if (TREE_CODE (op) != SSA_NAME) > > +return NULL_TREE; > > + > > + gimple* stmt = SSA_NAME_DEF_STMT (op); > > + if (!stmt > > + || gimple_code (stmt) != GIMPLE_ASSIGN > > + || gimple_has_volatile_ops (stmt) > > + || gimple_assign_rhs_code (stmt) != NOP_EXPR) > > +return NULL_TREE; > > + > > + return gimple_assign_rhs1 (stmt); > > > > this allows arbitrary conversions where the comment suggests you > > only want to allow conversions to the same precision but different sign. > > Sth like > > > > gassign *stmt = safe_dyn_cast (SSA_NAME_DEF_STMT (op)); > > if (!stmt > > || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)) > > || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE > > (gimple_assign_rhs1 (stmt > > return NULL_TREE; > > > > + if (gimple_bb (stmt) != gimple_bb (*nop_reduc) > > + || gimple_code (stmt) != GIMPLE_ASSIGN > > + || gimple_has_volatile_ops (stmt)) > > + return false; > > > > !is_gimple_assign (stmt) instead of gimple_code (stmt) != GIMPLE_ASSIGN > > > > the gimple_has_volatile_ops check is superfluous given you restrict > > the assign code. > > > > + /* Check that R_NOP1 is used in nop_stmt or in PHI only. */ > > + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1) > > + { > > + gimple *use_stmt = USE_STMT (use_p); > > + if (is_gimple_debug (use_stmt)) > > + continue; > > + if (use_stmt == SSA_NAME_DEF_STMT (r_op1)) > > + continue; > > + if (gimple_code (use_stmt) != GIMPLE_PHI) > > + return false; > > > > can the last check be use_stmt == phi since we should have the > > PHI readily available? > > > > @@ -1735,6 +1822,23 @@ convert_scalar_cond_reduction (gimple *reduc, > > gimple_stmt_iterator *gsi, > >rhs = fold_build2 (gimple_assign_rhs_code (reduc), > > TREE_TYPE (rhs1), op0, tmp); > > > > + if (has_nop) > > +{ > > + /* Create assignment nop_rhs = op0 +/- _ifc_. */ > > + tree nop_rhs = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_nop_"); > > + gimple* new_assign2 = gimple_build_assign (nop_rhs, rhs); > > + gsi_insert_before (gsi, new_assign2, GSI_SAME_STMT); > > + /* Rebuild rhs for nop_expr. */ > > + rhs = fold_build1 (NOP_EXPR, > > +TREE_TYPE (gimple_assign_lhs (nop_reduc)), > > +nop_rhs); > > + > > + /* Delete nop_reduc. */ > > + stmt_it = gsi_for_stmt (nop_reduc); > > + gsi_remove (_it, true); > > + release_defs (nop_reduc); > > +} > > + > > > > hmm, the whole function could be cleaned up with sth like > > > > /* Build rhs for unconditional increment/decrement. */ > > gimple_seq stmts = NULL; > > rhs = gimple_build (, gimple_assing_rhs_code (reduc), > > TREE_TYPE (rhs1), op0, tmp); > > if (has_nop) > >rhs = gimple_convert (, TREE_TYPE (gimple_assign_lhs > > (nop_reduc)), rhs); > > gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > > > > plus in the caller moving the > > > > new_stmt = gimple_build_assign (res, rhs); > > gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); > > > > to the else branch as well as the folding done on new_stmt (maybe return > > new_stmt instead of rhs from convert_scalar_cond_reduction. > Eventually, we needed to assign rhs to res, and with an extra mov stmt > from rhs to res, the vectorizer failed. > the only difference in 166t.ifcvt between successfully vectorization > and failed vectorization is below >char * _24; >char _25; >unsigned char _ifc__29; > + unsigned char _30; > > [local count: 118111600]: >if (n_10(D) != 0) > @@ -70,7 +71,8 @@ char foo2 (char * a, char * c, int n) >_5 = c_14(D) + _1; >_6 = *_5; >_ifc__29 = _3 == _6 ? 1 : 0; > - cnt_7 = cnt_18 + _ifc__29; > + _30 = cnt_18 + _ifc__29; > + cnt_7 = _30; >i_16 = i_20 + 1; >if (n_10(D) != i_16) > goto ; [89.00%] > @@ -110,7 +112,7 @@ char foo2 (char * a, char * c, int n) >goto ; [100.00%] > > [local count: 105119324]: > - # cnt_19 = PHI > + # cnt_19 = PHI <_30(3), cnt_27(15)> >_21 = (char) cnt_19; > > if we want to eliminate the extra move, gimple_build and > gimple_convert is not suitable since they create a new lhs, is there > any interface like gimple_build_assign but accept stmts? Hmm, OK. So what you could do is replacing all uses of 'res' with
Re: [PATCH] Extend is_cond_scalar_reduction to handle nop_expr after/before scalar reduction.[PR98365]
On Tue, May 25, 2021 at 6:24 PM Richard Biener wrote: > > On Mon, May 24, 2021 at 11:52 AM Hongtao Liu wrote: > > > > Hi: > > Details described in PR. > > Bootstrapped and regtest on > > x86_64-linux-gnu{-m32,}/x86_64-linux-gnu{-m32\ > > -march=cascadelake,-march=cascadelake} > > Ok for trunk? > > +static tree > +strip_nop_cond_scalar_reduction (bool has_nop, tree op) > +{ > + if (!has_nop) > +return op; > + > + if (TREE_CODE (op) != SSA_NAME) > +return NULL_TREE; > + > + gimple* stmt = SSA_NAME_DEF_STMT (op); > + if (!stmt > + || gimple_code (stmt) != GIMPLE_ASSIGN > + || gimple_has_volatile_ops (stmt) > + || gimple_assign_rhs_code (stmt) != NOP_EXPR) > +return NULL_TREE; > + > + return gimple_assign_rhs1 (stmt); > > this allows arbitrary conversions where the comment suggests you > only want to allow conversions to the same precision but different sign. > Sth like > > gassign *stmt = safe_dyn_cast (SSA_NAME_DEF_STMT (op)); > if (!stmt > || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)) > || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE > (gimple_assign_rhs1 (stmt > return NULL_TREE; > > + if (gimple_bb (stmt) != gimple_bb (*nop_reduc) > + || gimple_code (stmt) != GIMPLE_ASSIGN > + || gimple_has_volatile_ops (stmt)) > + return false; > > !is_gimple_assign (stmt) instead of gimple_code (stmt) != GIMPLE_ASSIGN > > the gimple_has_volatile_ops check is superfluous given you restrict > the assign code. > > + /* Check that R_NOP1 is used in nop_stmt or in PHI only. */ > + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1) > + { > + gimple *use_stmt = USE_STMT (use_p); > + if (is_gimple_debug (use_stmt)) > + continue; > + if (use_stmt == SSA_NAME_DEF_STMT (r_op1)) > + continue; > + if (gimple_code (use_stmt) != GIMPLE_PHI) > + return false; > > can the last check be use_stmt == phi since we should have the > PHI readily available? > > @@ -1735,6 +1822,23 @@ convert_scalar_cond_reduction (gimple *reduc, > gimple_stmt_iterator *gsi, >rhs = fold_build2 (gimple_assign_rhs_code (reduc), > TREE_TYPE (rhs1), op0, tmp); > > + if (has_nop) > +{ > + /* Create assignment nop_rhs = op0 +/- _ifc_. */ > + tree nop_rhs = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_nop_"); > + gimple* new_assign2 = gimple_build_assign (nop_rhs, rhs); > + gsi_insert_before (gsi, new_assign2, GSI_SAME_STMT); > + /* Rebuild rhs for nop_expr. */ > + rhs = fold_build1 (NOP_EXPR, > +TREE_TYPE (gimple_assign_lhs (nop_reduc)), > +nop_rhs); > + > + /* Delete nop_reduc. */ > + stmt_it = gsi_for_stmt (nop_reduc); > + gsi_remove (_it, true); > + release_defs (nop_reduc); > +} > + > > hmm, the whole function could be cleaned up with sth like > > /* Build rhs for unconditional increment/decrement. */ > gimple_seq stmts = NULL; > rhs = gimple_build (, gimple_assing_rhs_code (reduc), > TREE_TYPE (rhs1), op0, tmp); > if (has_nop) >rhs = gimple_convert (, TREE_TYPE (gimple_assign_lhs > (nop_reduc)), rhs); > gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > > plus in the caller moving the > > new_stmt = gimple_build_assign (res, rhs); > gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); > > to the else branch as well as the folding done on new_stmt (maybe return > new_stmt instead of rhs from convert_scalar_cond_reduction. Eventually, we needed to assign rhs to res, and with an extra mov stmt from rhs to res, the vectorizer failed. the only difference in 166t.ifcvt between successfully vectorization and failed vectorization is below char * _24; char _25; unsigned char _ifc__29; + unsigned char _30; [local count: 118111600]: if (n_10(D) != 0) @@ -70,7 +71,8 @@ char foo2 (char * a, char * c, int n) _5 = c_14(D) + _1; _6 = *_5; _ifc__29 = _3 == _6 ? 1 : 0; - cnt_7 = cnt_18 + _ifc__29; + _30 = cnt_18 + _ifc__29; + cnt_7 = _30; i_16 = i_20 + 1; if (n_10(D) != i_16) goto ; [89.00%] @@ -110,7 +112,7 @@ char foo2 (char * a, char * c, int n) goto ; [100.00%] [local count: 105119324]: - # cnt_19 = PHI + # cnt_19 = PHI <_30(3), cnt_27(15)> _21 = (char) cnt_19; if we want to eliminate the extra move, gimple_build and gimple_convert is not suitable since they create a new lhs, is there any interface like gimple_build_assign but accept stmts? > > Richard. > > > gcc/ChangeLog: > > > > PR tree-optimization/pr98365 > > * tree-if-conv.c (strip_nop_cond_scalar_reduction): New function. > > (is_cond_scalar_reduction): Handle nop_expr in cond scalar > > reduction. > > (convert_scalar_cond_reduction): Ditto. > > (predicate_scalar_phi): Ditto. > > > > gcc/testsuite/ChangeLog: > > > > PR
Re: [PATCH] Extend is_cond_scalar_reduction to handle nop_expr after/before scalar reduction.[PR98365]
On Mon, May 24, 2021 at 11:52 AM Hongtao Liu wrote: > > Hi: > Details described in PR. > Bootstrapped and regtest on > x86_64-linux-gnu{-m32,}/x86_64-linux-gnu{-m32\ > -march=cascadelake,-march=cascadelake} > Ok for trunk? +static tree +strip_nop_cond_scalar_reduction (bool has_nop, tree op) +{ + if (!has_nop) +return op; + + if (TREE_CODE (op) != SSA_NAME) +return NULL_TREE; + + gimple* stmt = SSA_NAME_DEF_STMT (op); + if (!stmt + || gimple_code (stmt) != GIMPLE_ASSIGN + || gimple_has_volatile_ops (stmt) + || gimple_assign_rhs_code (stmt) != NOP_EXPR) +return NULL_TREE; + + return gimple_assign_rhs1 (stmt); this allows arbitrary conversions where the comment suggests you only want to allow conversions to the same precision but different sign. Sth like gassign *stmt = safe_dyn_cast (SSA_NAME_DEF_STMT (op)); if (!stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)) || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE (gimple_assign_rhs1 (stmt return NULL_TREE; + if (gimple_bb (stmt) != gimple_bb (*nop_reduc) + || gimple_code (stmt) != GIMPLE_ASSIGN + || gimple_has_volatile_ops (stmt)) + return false; !is_gimple_assign (stmt) instead of gimple_code (stmt) != GIMPLE_ASSIGN the gimple_has_volatile_ops check is superfluous given you restrict the assign code. + /* Check that R_NOP1 is used in nop_stmt or in PHI only. */ + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1) + { + gimple *use_stmt = USE_STMT (use_p); + if (is_gimple_debug (use_stmt)) + continue; + if (use_stmt == SSA_NAME_DEF_STMT (r_op1)) + continue; + if (gimple_code (use_stmt) != GIMPLE_PHI) + return false; can the last check be use_stmt == phi since we should have the PHI readily available? @@ -1735,6 +1822,23 @@ convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi, rhs = fold_build2 (gimple_assign_rhs_code (reduc), TREE_TYPE (rhs1), op0, tmp); + if (has_nop) +{ + /* Create assignment nop_rhs = op0 +/- _ifc_. */ + tree nop_rhs = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_nop_"); + gimple* new_assign2 = gimple_build_assign (nop_rhs, rhs); + gsi_insert_before (gsi, new_assign2, GSI_SAME_STMT); + /* Rebuild rhs for nop_expr. */ + rhs = fold_build1 (NOP_EXPR, +TREE_TYPE (gimple_assign_lhs (nop_reduc)), +nop_rhs); + + /* Delete nop_reduc. */ + stmt_it = gsi_for_stmt (nop_reduc); + gsi_remove (_it, true); + release_defs (nop_reduc); +} + hmm, the whole function could be cleaned up with sth like /* Build rhs for unconditional increment/decrement. */ gimple_seq stmts = NULL; rhs = gimple_build (, gimple_assing_rhs_code (reduc), TREE_TYPE (rhs1), op0, tmp); if (has_nop) rhs = gimple_convert (, TREE_TYPE (gimple_assign_lhs (nop_reduc)), rhs); gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); plus in the caller moving the new_stmt = gimple_build_assign (res, rhs); gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); to the else branch as well as the folding done on new_stmt (maybe return new_stmt instead of rhs from convert_scalar_cond_reduction. Richard. > gcc/ChangeLog: > > PR tree-optimization/pr98365 > * tree-if-conv.c (strip_nop_cond_scalar_reduction): New function. > (is_cond_scalar_reduction): Handle nop_expr in cond scalar reduction. > (convert_scalar_cond_reduction): Ditto. > (predicate_scalar_phi): Ditto. > > gcc/testsuite/ChangeLog: > > PR tree-optimization/pr98365 > * gcc.target/i386/pr98365.c: New test. > > -- > BR, > Hongtao
[PATCH] Extend is_cond_scalar_reduction to handle nop_expr after/before scalar reduction.[PR98365]
Hi: Details described in PR. Bootstrapped and regtest on x86_64-linux-gnu{-m32,}/x86_64-linux-gnu{-m32\ -march=cascadelake,-march=cascadelake} Ok for trunk? gcc/ChangeLog: PR tree-optimization/pr98365 * tree-if-conv.c (strip_nop_cond_scalar_reduction): New function. (is_cond_scalar_reduction): Handle nop_expr in cond scalar reduction. (convert_scalar_cond_reduction): Ditto. (predicate_scalar_phi): Ditto. gcc/testsuite/ChangeLog: PR tree-optimization/pr98365 * gcc.target/i386/pr98365.c: New test. -- BR, Hongtao From 0248efe310454a31e4bbeb1430f907ec663c39ae Mon Sep 17 00:00:00 2001 From: liuhongt Date: Wed, 6 Jan 2021 16:33:27 +0800 Subject: [PATCH] Extend is_cond_scalar_reduction to handle nop_expr after/before scalar reduction.[PR98365] gcc/ChangeLog: PR tree-optimization/pr98365 * tree-if-conv.c (strip_nop_cond_scalar_reduction): New function. (is_cond_scalar_reduction): Handle nop_expr in cond scalar reduction. (convert_scalar_cond_reduction): Ditto. (predicate_scalar_phi): Ditto. gcc/testsuite/ChangeLog: PR tree-optimization/pr98365 * gcc.target/i386/pr98365.c: New test. --- gcc/testsuite/gcc.target/i386/pr98365.c | 22 gcc/tree-if-conv.c | 131 +--- 2 files changed, 141 insertions(+), 12 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr98365.c diff --git a/gcc/testsuite/gcc.target/i386/pr98365.c b/gcc/testsuite/gcc.target/i386/pr98365.c new file mode 100644 index 000..652210dcdd5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr98365.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx2 -ftree-vectorize -fdump-tree-vect-details" } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ +short foo1 (short* a, short* c, int n) +{ + int i; + short cnt=0; + for (int i = 0;i != n; i++) +if (a[i] == c[i]) + cnt++; + return cnt; +} + +char foo2 (char* a, char* c, int n) +{ + int i; + char cnt=0; + for (int i = 0;i != n; i++) +if (a[i] == c[i]) + cnt++; + return cnt; +} diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c index 716eae44a21..5811eb00478 100644 --- a/gcc/tree-if-conv.c +++ b/gcc/tree-if-conv.c @@ -1579,6 +1579,31 @@ if_convertible_loop_p (class loop *loop) return res; } +/* Return reduc_1 if has_nop. + + if (...) + tmp1 = (unsigned type) reduc_1; + tmp2 = tmp1 + rhs2; + reduc_3 = (signed type) tmp2. */ +static tree +strip_nop_cond_scalar_reduction (bool has_nop, tree op) +{ + if (!has_nop) +return op; + + if (TREE_CODE (op) != SSA_NAME) +return NULL_TREE; + + gimple* stmt = SSA_NAME_DEF_STMT (op); + if (!stmt + || gimple_code (stmt) != GIMPLE_ASSIGN + || gimple_has_volatile_ops (stmt) + || gimple_assign_rhs_code (stmt) != NOP_EXPR) +return NULL_TREE; + + return gimple_assign_rhs1 (stmt); +} + /* Returns true if def-stmt for phi argument ARG is simple increment/decrement which is in predicated basic block. In fact, the following PHI pattern is searching: @@ -1595,9 +1620,10 @@ if_convertible_loop_p (class loop *loop) static bool is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1, - tree *op0, tree *op1, bool extended) + tree *op0, tree *op1, bool extended, bool* has_nop, + gimple **nop_reduc) { - tree lhs, r_op1, r_op2; + tree lhs, r_op1, r_op2, r_nop1, r_nop2; gimple *stmt; gimple *header_phi = NULL; enum tree_code reduction_op; @@ -1608,7 +1634,7 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1, use_operand_p use_p; edge e; edge_iterator ei; - bool result = false; + bool result = *has_nop = false; if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME) return false; @@ -1656,18 +1682,78 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1, return false; reduction_op = gimple_assign_rhs_code (stmt); + +/* Catch something like below + + loop-header: + reduc_1 = PHI <..., reduc_2> + ... + if (...) + tmp1 = (unsigned type) reduc_1; + tmp2 = tmp1 + rhs2; + reduc_3 = (signed type) tmp2; + + reduc_2 = PHI + + and convert to + + reduc_2 = PHI <0, reduc_3> + tmp1 = (unsigned type)reduce_1; + ifcvt = cond_expr ? rhs2 : 0 + tmp2 = tmp1 +/- ifcvt; + reduce_1 = (signed type)tmp2; */ + + if (reduction_op == NOP_EXPR) +{ + lhs = gimple_assign_rhs1 (stmt); + if (TREE_CODE (lhs) != SSA_NAME + || !has_single_use (lhs)) + return false; + + *nop_reduc = stmt; + stmt = SSA_NAME_DEF_STMT (lhs); + if (gimple_bb (stmt) != gimple_bb (*nop_reduc) + || gimple_code (stmt) != GIMPLE_ASSIGN + || gimple_has_volatile_ops (stmt)) + return false; + + *has_nop = true; + reduction_op = gimple_assign_rhs_code (stmt); +} + if