Hi Richard,
> -----Original Message-----
> From: Richard Biener [mailto:[email protected]]
> Sent: Friday, October 30, 2015 5:00 PM
> To: Kumar, Venkataramanan
> Cc: Andrew Pinski; [email protected]
> Subject: Re: [RFC] [Patch] Relax tree-if-conv.c trap assumptions.
>
> On Fri, Oct 30, 2015 at 11:21 AM, Kumar, Venkataramanan
> <[email protected]> wrote:
> > Hi Andrew,
> >
> >> -----Original Message-----
> >> From: Andrew Pinski [mailto:[email protected]]
> >> Sent: Friday, October 30, 2015 3:38 PM
> >> To: Kumar, Venkataramanan
> >> Cc: Richard Beiner ([email protected]);
> >> [email protected]
> >> Subject: Re: [RFC] [Patch] Relax tree-if-conv.c trap assumptions.
> >>
> >> On Fri, Oct 30, 2015 at 6:06 PM, Kumar, Venkataramanan
> >> <[email protected]> wrote:
> >> > Hi Richard,
> >> >
> >> > I am trying to "if covert the store" in the below test case and
> >> > later help it to get vectorized under -Ofast
> >> > -ftree-loop-if-convert-stores -fno-common
> >> >
> >> > #define LEN 4096
> >> > __attribute__((aligned(32))) float array[LEN]; void test() { for
> >> > (int i = 0; i <
> >> LEN; i++) {
> >> > if (array[i] > (float)0.)
> >> > array[i] =3 ;
> >> >
> >> > }
> >> > }
> >> >
> >> > Currently in GCC 5.2 does not vectorize it.
> >> > https://goo.gl/9nS6Pd
> >> >
> >> > However ICC seems to vectorize it
> >> > https://goo.gl/y1yGHx
> >> >
> >> > As discussed with you earlier, to allow "if convert store" I am
> >> > checking the
> >> following:
> >> >
> >> > (1) We already read the reference "array[i]" unconditionally once .
> >> > (2) I am now checking if we are conditionally writing to memory
> >> > which is
> >> defined as read and write and is bound to the definition we are seeing.
> >>
> >>
> >> I don't think this is thread safe ....
> >>
> >
> > I thought under -ftree-loop-if-convert-stores it is ok to do this
> transformation.
>
> Yes, that's what we have done in the past here. Note that I think we should
> remove the flag in favor of the --param allow-store-data-races and if-convert
> safe stores always (stores to thread-local memory). Esp. using masked
> stores should be always safe.
>
> > Regards,
> > Venkat.
> >
> >> Thanks,
> >> Andrew
> >>
> >> >
> >> > With this change, I get able to if convert and the vectorize the case
> >> > also.
> >> >
> >> > /build/gcc/xgcc -B ./build/gcc/ ifconv.c -Ofast -fopt-info-vec -S
> >> > -ftree-loop-if-convert-stores -fno-common
> >> > ifconv.c:2:63: note: loop vectorized
> >> >
> >> > Patch
> >> > ------
> >> > diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c index
> >> > f201ab5..6475cc0 100644
> >> > --- a/gcc/tree-if-conv.c
> >> > +++ b/gcc/tree-if-conv.c
> >> > @@ -727,6 +727,34 @@ write_memrefs_written_at_least_once
> (gimple
> >> *stmt,
> >> > return true;
> >> > }
> >> >
> >> > +static bool
> >> > +write_memrefs_safe_to_access_unconditionally (gimple *stmt,
> >> > +
> >> > +vec<data_reference_p> drs) {
>
> { to the next line
>
> The function has a bad name it should be write_memrefs_writable ()
>
> >> > + int i;
> >> > + data_reference_p a;
> >> > + bool found = false;
> >> > +
> >> > + for (i = 0; drs.iterate (i, &a); i++)
> >> > + {
> >> > + if (DR_STMT (a) == stmt
> >> > + && DR_IS_WRITE (a)
> >> > + && (DR_WRITTEN_AT_LEAST_ONCE (a) == 0)
> >> > + && (DR_RW_UNCONDITIONALLY (a) == 1))
> >> > + {
> >> > + tree base = get_base_address (DR_REF (a));
> >> > + found = false;
> >> > + if (DECL_P (base)
> >> > + && decl_binds_to_current_def_p (base)
> >> > + && !TREE_READONLY (base))
> >> > + {
> >> > + found = true;
>
> So if the vector ever would contain more than one write you'd return true if
> only one of them is not readonly.
>
> IMHO all the routines need refactoring to operate on single DRs which AFAIK
> is the only case if-conversion handles anyway (can't if-convert calls or
> aggregate assignments or asms). Ugh, it seems the whole thing is quadratic,
> doing linear walks to find the DRs for a stmt ...
>
> A simple
>
> Index: gcc/tree-if-conv.c
> ==========================================================
> =========
> --- gcc/tree-if-conv.c (revision 229572)
> +++ gcc/tree-if-conv.c (working copy)
> @@ -612,9 +612,10 @@ memrefs_read_or_written_unconditionally
> data_reference_p a, b;
> tree ca = bb_predicate (gimple_bb (stmt));
>
> - for (i = 0; drs.iterate (i, &a); i++)
> - if (DR_STMT (a) == stmt)
> - {
> + for (i = gimple_uid (stmt) - 1; drs.iterate (i, &a); i++)
> + {
> + if (DR_STMT (a) != stmt)
> + break;
> bool found = false;
> int x = DR_RW_UNCONDITIONALLY (a);
>
> @@ -684,10 +685,13 @@ write_memrefs_written_at_least_once (gim
> data_reference_p a, b;
> tree ca = bb_predicate (gimple_bb (stmt));
>
> - for (i = 0; drs.iterate (i, &a); i++)
> - if (DR_STMT (a) == stmt
> - && DR_IS_WRITE (a))
> - {
> + for (i = gimple_uid (stmt) - 1; drs.iterate (i, &a); i++)
> + {
> + if (DR_STMT (a) != stmt)
> + break;
> + if (! DR_IS_WRITE (a))
> + continue;
> +
> bool found = false;
> int x = DR_WRITTEN_AT_LEAST_ONCE (a);
>
> @@ -721,7 +725,7 @@ write_memrefs_written_at_least_once (gim
> DR_WRITTEN_AT_LEAST_ONCE (a) = 0;
> return false;
> }
> - }
> + }
>
> return true;
> }
> @@ -1291,6 +1297,7 @@ if_convertible_loop_p_1 (struct loop *lo
> case GIMPLE_CALL:
> case GIMPLE_DEBUG:
> case GIMPLE_COND:
> + gimple_set_uid (gsi_stmt (gsi), 0);
> break;
> default:
> return false;
> @@ -1304,6 +1311,8 @@ if_convertible_loop_p_1 (struct loop *lo
> dr->aux = XNEW (struct ifc_dr);
> DR_WRITTEN_AT_LEAST_ONCE (dr) = -1;
> DR_RW_UNCONDITIONALLY (dr) = -1;
> + if (gimple_uid (DR_STMT (dr)) == 0)
> + gimple_set_uid (DR_STMT (dr), i + 1);
> }
> predicate_bbs (loop);
>
>
> would improve that tremendously ... (well, given the array of DRs is sorted
> by stmt which is an implementation detail not documented).
>
> Computing all the DR flags in separate loops also doesn't make much sense to
> me and just complicates code.
>
> Richard.
I have now implemented storing of DR and references using hash maps.
Please find attached patch.
As discussed, I am now storing the ref, DR and baseref, DR pairs along with
unconditional read/write information in hash tables while iterating over DR
during its initialization.
Then during checking for possible traps for if-converting, just check if the
memory reference for a gimple statement is read/written unconditionally by
querying the hash table instead of quadratic walk.
Boot strapped and regression tested on x86_64.
gcc/ChangeLog
2015-11-07 Venkataramanan <[email protected]>
*tree-hash-traits.h (struct tree_operand_hash). Use compare_type and
value_type.
(equal_keys): Rename to equal and use compare_type and value_type.
* tree-if-conv.c (ref_DR_map): Define.
(baseref_DR_map): Like wise
(struct ifc_dr): New tree predicate field.
(hash_memrefs_baserefs_and_store_DRs_read_written_info): New
function.
(memrefs_read_or_written_unconditionally): Use hashmap to query
unconditional read/written information.
(write_memrefs_written_at_least_once) : Remove.
(ifcvt_memrefs_wont_trap): Remove call to
write_memrefs_written_at_least_once.
(if_convertible_loop_p_1): Initialize/delete hash maps an initialize
predicates
before the call to
hash_memrefs_baserefs_and_store_DRs_read_written_info.
gcc/testsuite/ChangeLog
2015-11-07 Venkataramanan <[email protected]>
*gcc.dg/tree-ssa/ifc-8.c: Add new.
Ok for trunk?
Regards,
Venkat.
>
> >> > + }
> >> > + }
> >> > + }
> >> > + return found;
> >> > +}
> >> > +
> >> > /* Return true when the memory references of STMT won't trap in the
> >> > if-converted code. There are two things that we have to check for:
> >> >
> >> > @@ -748,8 +776,20 @@ write_memrefs_written_at_least_once
> (gimple
> >> > *stmt, static bool ifcvt_memrefs_wont_trap (gimple *stmt,
> >> > vec<data_reference_p> refs) {
> >> > - return write_memrefs_written_at_least_once (stmt, refs)
> >> > - && memrefs_read_or_written_unconditionally (stmt, refs);
> >> > + bool memrefs_written_once,
> memrefs_read_written_unconditionally;
> >> > + bool memrefs_safe_to_access;
> >> > +
> >> > + memrefs_written_once
> >> > + = write_memrefs_written_at_least_once (stmt, refs);
> >> > +
> >> > + memrefs_read_written_unconditionally
> >> > + = memrefs_read_or_written_unconditionally (stmt,
> >> > + refs);
> >> > +
> >> > + memrefs_safe_to_access
> >> > + = write_memrefs_safe_to_access_unconditionally (stmt,
> >> > + refs);
> >> > +
> >> > + return ((memrefs_written_once || memrefs_safe_to_access)
> >> > + && memrefs_read_written_unconditionally);
> >> > }
> >> >
> >> > /* Wrapper around gimple_could_trap_p refined for the needs of the
> >> >
> >> >
> >> > do I need this function write_memrefs_written_at_least_once
> anymore?
> >> > Please suggest if there a better way to do this.
> >> >
> >> > Bootstrapped and regression tested on x86_64.
> >> > I am not adding change log and comments now, as I wanted to check
> >> approach first.
> >> >
> >> > Regards,
> >> > Venkat.
> >> >
> >> >
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c
b/gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c
new file mode 100644
index 0000000..c155e5b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c
@@ -0,0 +1,17 @@
+
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-ifcvt-details -fno-common
-ftree-loop-if-convert-stores" } */
+
+#define LEN 4096
+ __attribute__((aligned(32))) float array[LEN];
+
+void test ()
+{
+ for (int i = 0; i < LEN; i++)
+ {
+ if (array[i] > (float)0.)
+ array[i] =3 ;
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */
diff --git a/gcc/tree-hash-traits.h b/gcc/tree-hash-traits.h
index 1edc49e..be93106 100644
--- a/gcc/tree-hash-traits.h
+++ b/gcc/tree-hash-traits.h
@@ -19,22 +19,23 @@ along with GCC; see the file COPYING3. If not see
#ifndef tree_hash_traits_h
#define tree_hash_traits_h
-
/* Hash for trees based on operand_equal_p. */
struct tree_operand_hash : ggc_ptr_hash <tree_node>
{
- static inline hashval_t hash (const_tree);
- static inline bool equal_keys (const_tree, const_tree);
+ static inline hashval_t hash (const value_type &);
+ static inline bool equal (const value_type &,
+ const compare_type &);
};
inline hashval_t
-tree_operand_hash::hash (const_tree t)
+tree_operand_hash::hash (const value_type &t)
{
return iterative_hash_expr (t, 0);
}
inline bool
-tree_operand_hash::equal_keys (const_tree t1, const_tree t2)
+tree_operand_hash::equal (const value_type &t1,
+ const compare_type &t2)
{
return operand_equal_p (t1, t2, 0);
}
diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
index f201ab5..8795faa 100644
--- a/gcc/tree-if-conv.c
+++ b/gcc/tree-if-conv.c
@@ -129,6 +129,12 @@ static basic_block *ifc_bbs;
/* Apply more aggressive (extended) if-conversion if true. */
static bool aggressive_if_conv;
+/* Hash table to store references, DR pairs. */
+static hash_map<tree_operand_hash, data_reference_p> *ref_DR_map;
+
+/* Hash table to store base reference, DR pairs. */
+static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
+
/* Structure used to predicate basic blocks. This is attached to the
->aux field of the BBs in the loop to be if-converted. */
struct bb_predicate {
@@ -592,137 +598,153 @@ struct ifc_dr {
/* -1 when not initialized, 0 when false, 1 when true. */
int rw_unconditionally;
+
+ tree ored_result;
+
};
#define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
#define DR_WRITTEN_AT_LEAST_ONCE(DR) (IFC_DR (DR)->written_at_least_once)
#define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
-/* Returns true when the memory references of STMT are read or written
- unconditionally. In other words, this function returns true when
- for every data reference A in STMT there exist other accesses to
- a data reference with the same base with predicates that add up (OR-up) to
- the true predicate: this ensures that the data reference A is touched
- (read or written) on every iteration of the if-converted loop. */
-
-static bool
-memrefs_read_or_written_unconditionally (gimple *stmt,
- vec<data_reference_p> drs)
+/* Iterates over DR's and stores refs, DR and base refs, DR pairs in
+ HASH tables. While storing them in HASH table, it checks if the
+ reference is unconditionally read or written and stores that as a flag
+ information. For base reference it checks if it is written atlest once
+ unconditionally and stores it as flag information along with DR.
+ In other words for every data reference A in STMT there exist other
+ accesses to a data reference with the same base with predicates that
+ add up (OR-up) to the true predicate: this ensures that the data
+ reference A is touched (read or written) on every iteration of the
+ if-converted loop. */
+static void
+hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
{
- int i, j;
- data_reference_p a, b;
- tree ca = bb_predicate (gimple_bb (stmt));
- for (i = 0; drs.iterate (i, &a); i++)
- if (DR_STMT (a) == stmt)
- {
- bool found = false;
- int x = DR_RW_UNCONDITIONALLY (a);
-
- if (x == 0)
- return false;
+ data_reference_p *master_dr, *base_master_dr;
+ tree ref = DR_REF (a);
+ tree base_ref = DR_BASE_OBJECT (a);
+ tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
+ bool exsist1, exsist2;
- if (x == 1)
- continue;
+ while (TREE_CODE (ref) == COMPONENT_REF
+ || TREE_CODE (ref) == IMAGPART_EXPR
+ || TREE_CODE (ref) == REALPART_EXPR)
+ ref = TREE_OPERAND (ref, 0);
- for (j = 0; drs.iterate (j, &b); j++)
- {
- tree ref_base_a = DR_REF (a);
- tree ref_base_b = DR_REF (b);
+ master_dr = &ref_DR_map->get_or_insert (ref, &exsist1);
- if (DR_STMT (b) == stmt)
- continue;
+ if (!exsist1)
+ {
+ if (is_true_predicate (ca))
+ {
+ DR_RW_UNCONDITIONALLY (a) = 1;
+ }
- while (TREE_CODE (ref_base_a) == COMPONENT_REF
- || TREE_CODE (ref_base_a) == IMAGPART_EXPR
- || TREE_CODE (ref_base_a) == REALPART_EXPR)
- ref_base_a = TREE_OPERAND (ref_base_a, 0);
+ IFC_DR (a)->ored_result = ca;
+ *master_dr = a;
+ }
+ else
+ {
+ IFC_DR (*master_dr)->ored_result
+ = fold_or_predicates
+ (EXPR_LOCATION (IFC_DR (*master_dr)->ored_result),
+ ca, IFC_DR (*master_dr)->ored_result);
- while (TREE_CODE (ref_base_b) == COMPONENT_REF
- || TREE_CODE (ref_base_b) == IMAGPART_EXPR
- || TREE_CODE (ref_base_b) == REALPART_EXPR)
- ref_base_b = TREE_OPERAND (ref_base_b, 0);
+ if (is_true_predicate (ca)
+ || is_true_predicate (IFC_DR (*master_dr)->ored_result))
+ {
+ DR_RW_UNCONDITIONALLY (*master_dr) = 1;
+ }
+ }
- if (operand_equal_p (ref_base_a, ref_base_b, 0))
- {
- tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
-
- if (DR_RW_UNCONDITIONALLY (b) == 1
- || is_true_predicate (cb)
- || is_true_predicate (ca
- = fold_or_predicates (EXPR_LOCATION (cb), ca, cb)))
- {
- DR_RW_UNCONDITIONALLY (a) = 1;
- DR_RW_UNCONDITIONALLY (b) = 1;
- found = true;
- break;
- }
- }
- }
+ base_master_dr = &baseref_DR_map->get_or_insert (base_ref,&exsist2);
- if (!found)
- {
- DR_RW_UNCONDITIONALLY (a) = 0;
- return false;
- }
- }
+ if (!exsist2)
+ {
+ if (DR_IS_WRITE (a) && is_true_predicate (ca))
+ {
+ DR_WRITTEN_AT_LEAST_ONCE (a) = 1;
+ }
+ IFC_DR (a)->ored_result = ca;
+ *base_master_dr = a;
+ }
+ else
+ {
+ IFC_DR (*base_master_dr)->ored_result
+ = fold_or_predicates
+ (EXPR_LOCATION (IFC_DR (*base_master_dr)->ored_result),
+ ca, IFC_DR (*base_master_dr)->ored_result);
- return true;
+ if (DR_IS_WRITE (a)
+ && (is_true_predicate (ca)
+ || (is_true_predicate
+ (IFC_DR (*base_master_dr)->ored_result))))
+ {
+ DR_WRITTEN_AT_LEAST_ONCE (*base_master_dr) = 1;
+ }
+ }
}
-/* Returns true when the memory references of STMT are unconditionally
- written. In other words, this function returns true when for every
- data reference A written in STMT, there exist other writes to the
- same data reference with predicates that add up (OR-up) to the true
- predicate: this ensures that the data reference A is written on
- every iteration of the if-converted loop. */
-
+/* Returns true for the memory reference in STMT, same memory reference
+ is read or written unconditionally atleast once and the base memory
+ reference is written unconditionally once. This is to check reference
+ will not write fault. Also retuns true if the memory reference is
+ unconditionally read once then we are conditionally writing to memory
+ which is defined as read and write and is bound to the definition
+ we are seeing. */
static bool
-write_memrefs_written_at_least_once (gimple *stmt,
- vec<data_reference_p> drs)
+memrefs_read_or_written_unconditionally (gimple *stmt,
+ vec<data_reference_p> drs)
{
- int i, j;
- data_reference_p a, b;
- tree ca = bb_predicate (gimple_bb (stmt));
+ int i;
+ data_reference_p a, *master_dr, *base_master_dr;
+ bool found = false;
+ for (i = gimple_uid (stmt) - 1; drs.iterate (i, &a); i++)
+ {
+ if (DR_STMT (a) != stmt)
+ break;
- for (i = 0; drs.iterate (i, &a); i++)
- if (DR_STMT (a) == stmt
- && DR_IS_WRITE (a))
- {
- bool found = false;
- int x = DR_WRITTEN_AT_LEAST_ONCE (a);
+ tree ref_base_a = DR_REF (a);
+ tree base = DR_BASE_OBJECT (a);
- if (x == 0)
- return false;
+ while (TREE_CODE (ref_base_a) == COMPONENT_REF
+ || TREE_CODE (ref_base_a) == IMAGPART_EXPR
+ || TREE_CODE (ref_base_a) == REALPART_EXPR)
+ ref_base_a = TREE_OPERAND (ref_base_a, 0);
- if (x == 1)
- continue;
+ master_dr = ref_DR_map->get (ref_base_a);
+ base_master_dr = baseref_DR_map->get (base);
- for (j = 0; drs.iterate (j, &b); j++)
- if (DR_STMT (b) != stmt
- && DR_IS_WRITE (b)
- && same_data_refs_base_objects (a, b))
- {
- tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
+ gcc_assert (master_dr != NULL && base_master_dr != NULL);
- if (DR_WRITTEN_AT_LEAST_ONCE (b) == 1
- || is_true_predicate (cb)
- || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION
(cb),
- ca, cb)))
+ if (DR_RW_UNCONDITIONALLY (*master_dr) == 1)
+ {
+ if (DR_WRITTEN_AT_LEAST_ONCE (*base_master_dr) == 1)
+ {
+ found = true;
+ break;
+ }
+ else
+ {
+ tree base_tree = get_base_address (DR_REF (a));
+ if (DECL_P (base_tree)
+ && decl_binds_to_current_def_p (base_tree)
+ && !TREE_READONLY (base_tree))
{
- DR_WRITTEN_AT_LEAST_ONCE (a) = 1;
- DR_WRITTEN_AT_LEAST_ONCE (b) = 1;
- found = true;
+ found = true;
break;
}
}
+ }
- if (!found)
- {
- DR_WRITTEN_AT_LEAST_ONCE (a) = 0;
- return false;
- }
- }
+ if (!found)
+ {
+ DR_WRITTEN_AT_LEAST_ONCE (a) =0;
+ DR_RW_UNCONDITIONALLY (a) = 0;
+ return false;
+ }
+ }
return true;
}
@@ -748,8 +770,7 @@ write_memrefs_written_at_least_once (gimple *stmt,
static bool
ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> refs)
{
- return write_memrefs_written_at_least_once (stmt, refs)
- && memrefs_read_or_written_unconditionally (stmt, refs);
+ return memrefs_read_or_written_unconditionally (stmt, refs);
}
/* Wrapper around gimple_could_trap_p refined for the needs of the
@@ -1292,6 +1313,7 @@ if_convertible_loop_p_1 (struct loop *loop,
case GIMPLE_CALL:
case GIMPLE_DEBUG:
case GIMPLE_COND:
+ gimple_set_uid (gsi_stmt (gsi), 0);
break;
default:
return false;
@@ -1300,13 +1322,20 @@ if_convertible_loop_p_1 (struct loop *loop,
data_reference_p dr;
+ ref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
+ baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
+
+ predicate_bbs (loop);
+
for (i = 0; refs->iterate (i, &dr); i++)
{
dr->aux = XNEW (struct ifc_dr);
DR_WRITTEN_AT_LEAST_ONCE (dr) = -1;
DR_RW_UNCONDITIONALLY (dr) = -1;
+ if (gimple_uid (DR_STMT (dr)) == 0)
+ gimple_set_uid (DR_STMT (dr), i + 1);
+ hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
}
- predicate_bbs (loop);
for (i = 0; i < loop->num_nodes; i++)
{
@@ -1403,6 +1432,15 @@ if_convertible_loop_p (struct loop *loop, bool
*any_mask_load_store)
free_data_refs (refs);
free_dependence_relations (ddrs);
+
+ if (ref_DR_map)
+ delete ref_DR_map;
+ ref_DR_map = NULL;
+
+ if (baseref_DR_map)
+ delete baseref_DR_map;
+ baseref_DR_map = NULL;
+
return res;
}