Hi,
This is the same patch posted at
https://gcc.gnu.org/ml/gcc-patches/2016-05/msg02000.html,
after rebase against this patch series. This patch was blocked because without
this patch
series, it could generate worse code on targets with limited addressing mode
support, like
AArch64.
There was some discussion about alternative fix for PRs, but after thinking
twice I think
this fix is in the correct direction. A CSE interface is useful to clean up
code generated
in vectorizer, and we should improve this CSE interface into a region base one.
for the
moment, optimal code is not generated on targets like x86, I believe it's
because the CSE
is weak and doesn't cover all basic blocks generated by vectorizer, the issue
should be
fixed if region-based CSE is implemented.
Is it OK?
Thanks,
bin
2017-04-11 Bin Cheng <bin.ch...@arm.com>
PR tree-optimization/68030
PR tree-optimization/69710
* tree-ssa-dom.c (cse_bbs): New function.
* tree-ssa-dom.h (cse_bbs): New declaration.
* tree-vect-data-refs.c (vect_create_addr_base_for_vector_ref):
Re-associate address by splitting constant offset.
(vect_create_data_ref_ptr, vect_setup_realignment): Record changed
basic block.
* tree-vect-loop-manip.c (vect_gen_prolog_loop_niters): Record
changed basic block.
* tree-vectorizer.c (tree-ssa-dom.h): Include header file.
(changed_bbs): New variable.
(vectorize_loops): Allocate and free CHANGED_BBS. Call cse_bbs.
* tree-vectorizer.h (changed_bbs): New declaration.
diff --git a/gcc/tree-ssa-dom.c b/gcc/tree-ssa-dom.c
index d9e5942..6d74c07 100644
--- a/gcc/tree-ssa-dom.c
+++ b/gcc/tree-ssa-dom.c
@@ -1765,3 +1765,50 @@ optimize_stmt (basic_block bb, gimple_stmt_iterator si,
}
return retval;
}
+
+/* A local CSE interface which runs CSE for basic blocks recorded in
+ CHANGED_BBS. */
+
+void
+cse_bbs (bitmap changed_bbs)
+{
+ unsigned index;
+ bitmap_iterator bi;
+ gimple_stmt_iterator gsi;
+
+ hash_table<expr_elt_hasher> *avail_exprs;
+ class avail_exprs_stack *avail_exprs_stack;
+ class const_and_copies *const_and_copies;
+
+ avail_exprs = new hash_table<expr_elt_hasher> (1024);
+ avail_exprs_stack = new class avail_exprs_stack (avail_exprs);
+ const_and_copies = new class const_and_copies ();
+
+ threadedge_initialize_values ();
+ /* Push a marker on the stacks of local information so that we know how
+ far to unwind when we finalize this block. */
+ avail_exprs_stack->push_marker ();
+ const_and_copies->push_marker ();
+
+ EXECUTE_IF_SET_IN_BITMAP (changed_bbs, 0, index, bi)
+ {
+ basic_block bb = BASIC_BLOCK_FOR_FN (cfun, index);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\n\nRun local cse on block #%d\n\n", bb->index);
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ optimize_stmt (bb, gsi, const_and_copies, avail_exprs_stack);
+
+ /* Pop stacks to keep it small. */
+ avail_exprs_stack->pop_to_marker ();
+ const_and_copies->pop_to_marker ();
+ }
+
+ delete avail_exprs;
+ avail_exprs = NULL;
+
+ delete avail_exprs_stack;
+ delete const_and_copies;
+ threadedge_finalize_values ();
+}
diff --git a/gcc/tree-ssa-dom.h b/gcc/tree-ssa-dom.h
index ad1b7ef..88869fd 100644
--- a/gcc/tree-ssa-dom.h
+++ b/gcc/tree-ssa-dom.h
@@ -24,5 +24,6 @@ extern bool simple_iv_increment_p (gimple *);
extern void record_temporary_equivalences (edge,
class const_and_copies *,
class avail_exprs_stack *);
+extern void cse_bbs (bitmap);
#endif /* GCC_TREE_SSA_DOM_H */
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index aa504b6..beffa17 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -4247,23 +4247,27 @@ vect_create_addr_base_for_vector_ref (gimple *stmt,
base_name = get_name (DR_REF (dr));
}
- /* Create base_offset */
- base_offset = size_binop (PLUS_EXPR,
- fold_convert (sizetype, base_offset),
- fold_convert (sizetype, init));
+ base_offset = fold_convert (sizetype, base_offset);
+ init = fold_convert (sizetype, init);
if (offset)
{
offset = fold_build2 (MULT_EXPR, sizetype,
fold_convert (sizetype, offset), step);
- base_offset = fold_build2 (PLUS_EXPR, sizetype,
- base_offset, offset);
+ if (TREE_CODE (offset) == INTEGER_CST)
+ init = fold_build2 (PLUS_EXPR, sizetype, init, offset);
+ else
+ base_offset = fold_build2 (PLUS_EXPR, sizetype,
+ base_offset, offset);
}
if (byte_offset)
{
byte_offset = fold_convert (sizetype, byte_offset);
- base_offset = fold_build2 (PLUS_EXPR, sizetype,
- base_offset, byte_offset);
+ if (TREE_CODE (byte_offset) == INTEGER_CST)
+ init = fold_build2 (PLUS_EXPR, sizetype, init, byte_offset);
+ else
+ base_offset = fold_build2 (PLUS_EXPR, sizetype,
+ base_offset, byte_offset);
}
/* base + base_offset */
@@ -4277,6 +4281,10 @@ vect_create_addr_base_for_vector_ref (gimple *stmt,
}
vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
+ addr_base = force_gimple_operand (addr_base, &seq, true, NULL);
+ gimple_seq_add_seq (new_stmt_list, seq);
+ /* Add constant offset at last. */
+ addr_base = fold_build_pointer_plus (addr_base, init);
dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
addr_base = force_gimple_operand (addr_base, &seq, true, dest);
gimple_seq_add_seq (new_stmt_list, seq);
@@ -4507,12 +4515,13 @@ vect_create_data_ref_ptr (gimple *stmt, tree aggr_type,
struct loop *at_loop,
if (new_stmt_list)
{
if (pe)
- {
- new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
- gcc_assert (!new_bb);
- }
+ {
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
+ gcc_assert (!new_bb);
+ bitmap_set_bit (changed_bbs, pe->src->index);
+ }
else
- gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
+ gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
}
*initial_address = new_temp;
@@ -5220,9 +5229,10 @@ vect_setup_realignment (gimple *stmt,
gimple_stmt_iterator *gsi,
NULL_TREE, loop);
if (loop)
{
- pe = loop_preheader_edge (loop);
+ pe = loop_preheader_edge (loop);
new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
gcc_assert (!new_bb);
+ bitmap_set_bit (changed_bbs, pe->src->index);
}
else
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index 0ff474d..4ad5ba8 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -1023,6 +1023,7 @@ vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
if (stmts)
{
gcc_assert (single_succ_p (bb));
+ bitmap_set_bit (changed_bbs, bb->index);
gimple_stmt_iterator gsi = gsi_last_bb (bb);
if (gsi_end_p (gsi))
gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index f928dec..0f59dbc 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -74,6 +74,7 @@ along with GCC; see the file COPYING3. If not see
#include "cfgloop.h"
#include "tree-vectorizer.h"
#include "tree-ssa-propagate.h"
+#include "tree-ssa-dom.h"
#include "dbgcnt.h"
#include "tree-scalar-evolution.h"
@@ -83,6 +84,9 @@ source_location vect_location;
/* Vector mapping GIMPLE stmt to stmt_vec_info. */
vec<stmt_vec_info> stmt_vec_info_vec;
+
+/* Basic blocks should be cleaned up by CSE after vectorization. */
+bitmap changed_bbs;
/* For mapping simduid to vectorization factor. */
@@ -540,6 +544,7 @@ vectorize_loops (void)
note_simd_array_uses (&simd_array_to_simduid_htab);
init_stmt_vec_info_vec ();
+ changed_bbs = BITMAP_ALLOC (NULL);
/* ----------- Analyze loops. ----------- */
@@ -762,6 +767,9 @@ vectorize_loops (void)
loop->aux = NULL;
}
+ if (!bitmap_empty_p (changed_bbs))
+ cse_bbs (changed_bbs);
+ BITMAP_FREE (changed_bbs);
free_stmt_vec_info_vec ();
/* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 12bb904..f16ab79 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -797,6 +797,7 @@ struct dataref_aux {
&& TYPE_UNSIGNED (TYPE)))
extern vec<stmt_vec_info> stmt_vec_info_vec;
+extern bitmap changed_bbs;
void init_stmt_vec_info_vec (void);
void free_stmt_vec_info_vec (void);