Hi all, Previously, the fix for PR89430<https://gcc.gnu.org/bugzilla//show_bug.cgi?id=89430> was reverted by PR94734<https://gcc.gnu.org/bugzilla//show_bug.cgi?id=94734> due to a bug. The root cause is missing non-trapping check with dominating LOAD/STORE.
This patch extends the cselim non-trapping check to support ARRAY_REF and COMPONENT_REF (previously only support MEM_REF) by get_inner_reference and hashing according to the comments from Jakub. To support cases in PR89430, if there is a dominating LOAD of local variable without address escape, as local stack is always writable, the STORE is not trapped and can be optimized. Review, please. -------------------- diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c index b1e0dce93d8..3733780a0bc 100644 --- a/gcc/tree-ssa-phiopt.c +++ b/gcc/tree-ssa-phiopt.c @@ -1986,26 +1986,31 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb, ??? We currently are very conservative and assume that a load might trap even if a store doesn't (write-only memory). This probably is - overly conservative. */ + overly conservative. -/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF - through it was seen, which would constitute a no-trap region for - same accesses. */ -struct name_to_bb + We currently support a special case that for !TREE_ADDRESSABLE automatic + variables, it could ignore whether something is a load or store because the + local stack should be always writable. */ + +/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which + basic block an *_REF through it was seen, which would constitute a + no-trap region for same accesses. */ +struct ref_to_bb { - unsigned int ssa_name_ver; + tree base; + poly_int64 bitsize, bitpos; + tree offset; unsigned int phase; - bool store; - HOST_WIDE_INT offset, size; + bool writable; basic_block bb; }; /* Hashtable helpers. */ -struct ssa_names_hasher : free_ptr_hash <name_to_bb> +struct refs_hasher : free_ptr_hash<ref_to_bb> { - static inline hashval_t hash (const name_to_bb *); - static inline bool equal (const name_to_bb *, const name_to_bb *); + static inline hashval_t hash (const ref_to_bb *); + static inline bool equal (const ref_to_bb *, const ref_to_bb *); }; /* Used for quick clearing of the hash-table when we see calls. @@ -2015,28 +2020,44 @@ static unsigned int nt_call_phase; /* The hash function. */ inline hashval_t -ssa_names_hasher::hash (const name_to_bb *n) +refs_hasher::hash (const ref_to_bb *n) { - return n->ssa_name_ver ^ (((hashval_t) n->store) << 31) - ^ (n->offset << 6) ^ (n->size << 3); + inchash::hash hstate; + inchash::add_expr (n->base, hstate); + hstate.add_poly_int (n->bitsize); + hstate.add_poly_int (n->bitpos); + hstate.add_int (n->writable); + if (n->offset != NULL_TREE) + { + inchash::add_expr (n->offset, hstate); + } + return hstate.end (); } /* The equality function of *P1 and *P2. */ inline bool -ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2) +refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2) { - return n1->ssa_name_ver == n2->ssa_name_ver - && n1->store == n2->store - && n1->offset == n2->offset - && n1->size == n2->size; + if (operand_equal_p (n1->base, n2->base, 0) + && known_eq (n1->bitsize, n2->bitsize) + && known_eq (n1->bitpos, n2->bitpos) && n1->writable == n2->writable) + { + /* Should not call operand_equal_p with NULL_TREE. */ + if (n1->offset == NULL_TREE || n2->offset == NULL_TREE) + return n1->offset == n2->offset; + else + return operand_equal_p (n1->offset, n2->offset, 0); + } + return false; } class nontrapping_dom_walker : public dom_walker { public: nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps) - : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {} + : dom_walker (direction), m_nontrapping (ps), m_seen_refs (256) + {} virtual edge before_dom_children (basic_block); virtual void after_dom_children (basic_block); @@ -2053,7 +2074,7 @@ private: hash_set<tree> *m_nontrapping; /* The hash table for remembering what we've seen. */ - hash_table<ssa_names_hasher> m_seen_ssa_names; + hash_table<refs_hasher> m_seen_refs; }; /* Called by walk_dominator_tree, when entering the block BB. */ @@ -2102,65 +2123,76 @@ nontrapping_dom_walker::after_dom_children (basic_block bb) } /* We see the expression EXP in basic block BB. If it's an interesting - expression (an MEM_REF through an SSA_NAME) possibly insert the - expression into the set NONTRAP or the hash table of seen expressions. - STORE is true if this expression is on the LHS, otherwise it's on - the RHS. */ + expression of: + 1) MEM_REF + 2) ARRAY_REF + 3) COMPONENT_REF + possibly insert the expression into the set NONTRAP or the hash table + of seen expressions. STORE is true if this expression is on the LHS, + otherwise it's on the RHS. */ void nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store) { - HOST_WIDE_INT size; - - if (TREE_CODE (exp) == MEM_REF - && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME - && tree_fits_shwi_p (TREE_OPERAND (exp, 1)) - && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0) + if (TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF + || TREE_CODE (exp) == COMPONENT_REF) { - tree name = TREE_OPERAND (exp, 0); - struct name_to_bb map; - name_to_bb **slot; - struct name_to_bb *n2bb; + struct ref_to_bb map; + ref_to_bb **slot; + struct ref_to_bb *r2bb; basic_block found_bb = 0; - /* Try to find the last seen MEM_REF through the same - SSA_NAME, which can trap. */ - map.ssa_name_ver = SSA_NAME_VERSION (name); - map.phase = 0; - map.bb = 0; - map.store = store; - map.offset = tree_to_shwi (TREE_OPERAND (exp, 1)); - map.size = size; - - slot = m_seen_ssa_names.find_slot (&map, INSERT); - n2bb = *slot; - if (n2bb && n2bb->phase >= nt_call_phase) - found_bb = n2bb->bb; - - /* If we've found a trapping MEM_REF, _and_ it dominates EXP - (it's in a basic block on the path from us to the dominator root) + /* Try to find the last seen *_REF (through the same base expression + + bitsize + bitpos + offset expression), which can trap. */ + machine_mode nmode; + poly_int64 bitsize, bitpos; + tree offset; + int nunsignedp, nreversep, nvolatilep = 0; + tree base = get_inner_reference (exp, &bitsize, &bitpos, &offset, &nmode, + &nunsignedp, &nreversep, &nvolatilep); + gcc_assert (base != NULL_TREE); + map.base = base; + map.offset = offset; + map.bitsize = bitsize; + map.bitpos = bitpos; + + /* The reference is writable if EXP is a: + 1) STORE, or + 2) LOAD of a local variable without address-taken (as the local stack + is always writable). This allows cselim on a STORE with a dominating + LOAD. */ + map.writable = store || (auto_var_p (base) && !TREE_ADDRESSABLE (base)); + + slot = m_seen_refs.find_slot (&map, INSERT); + r2bb = *slot; + if (r2bb && r2bb->phase >= nt_call_phase) + found_bb = r2bb->bb; + + /* If we've found a trapping *_REF, _and_ it dominates EXP + (it's in a basic block on the path from us to the dominator root) then we can't trap. */ - if (found_bb && (((size_t)found_bb->aux) & 1) == 1) + if (found_bb && (((size_t) found_bb->aux) & 1) == 1) { m_nontrapping->add (exp); } else - { + { /* EXP might trap, so insert it into the hash table. */ - if (n2bb) + if (r2bb) { - n2bb->phase = nt_call_phase; - n2bb->bb = bb; + r2bb->phase = nt_call_phase; + r2bb->bb = bb; } else { - n2bb = XNEW (struct name_to_bb); - n2bb->ssa_name_ver = SSA_NAME_VERSION (name); - n2bb->phase = nt_call_phase; - n2bb->bb = bb; - n2bb->store = store; - n2bb->offset = map.offset; - n2bb->size = size; - *slot = n2bb; + r2bb = XNEW (struct ref_to_bb); + r2bb->phase = nt_call_phase; + r2bb->bb = bb; + r2bb->base = base; + r2bb->offset = map.offset; + r2bb->bitsize = map.bitsize; + r2bb->bitpos = map.bitpos; + r2bb->writable = map.writable; + *slot = r2bb; } } } Thanks, -Hao
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a96dedc8838..eae03a10948 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2020-05-27 Hao Liu <h...@os.amperecomputing.com> + + PR tree-optimization/89430 + * tree-ssa-phiopt.c (cond_store_replacement): Extend add_or_mark_expr to + support ARRAY_REF and COMPONENT_REF by get_inner_reference, so we can check + if there is a dominating unconditional load/store. A special case is also + supported: if there is a dominating load of local variable without address + escape, a store to the same location can not be trapped (as local stack is + always writable). + 2020-05-25 Eric Botcazou <ebotca...@adacore.com> * gimple-ssa-store-merging.c (merged_store_group::can_be_merged_into): diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index aec3a219953..5ffa8127f43 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,13 @@ +2020-05-27 Hao Liu <h...@os.amperecomputing.com> + + PR tree-optimization/89430 + * gcc.dg/tree-ssa/pr89430-1.c: Remove xfail. + * gcc.dg/tree-ssa/pr89430-2.c: Remove xfail. + * gcc.dg/tree-ssa/pr89430-5.c: Remove xfail. + * gcc.dg/tree-ssa/pr89430-6.c: Remove xfail. + * gcc.dg/tree-ssa/pr89430-7-comp-ref.c.c: New test. + * gcc.dg/tree-ssa/ssa-pre-17.c: Add -fno-tree-cselim. + 2020-05-25 Eric Botcazou <ebotca...@adacore.com> * gnat.dg/opt84.adb: New test. diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c index ce242ba569b..8ee1850ac63 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c @@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) { return a[0]+a[1]; } -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c index 90ae36bfce2..9b96875ac7a 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c @@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) { return a[0]+a[1]; } -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c index c633cbe947d..b2d04119381 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c @@ -13,4 +13,4 @@ int test(int b, int k) { return a.data[0] + a.data[1]; } -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c index 7cad563128d..8d3c4f7cc6a 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c @@ -16,4 +16,4 @@ int test(int b, int k) { return a.data[0].x + a.data[1].x; } -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c new file mode 100644 index 00000000000..c35a2afc70b --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-cselim-details" } */ + +typedef union { + int i; + float f; +} U; + +int foo(U *u, int b, int i) +{ + u->i = 0; + if (b) + u->i = i; + return u->i; +} + +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c index 09313716598..a06f339f0bb 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-pre-stats" } */ +/* { dg-options "-O2 -fdump-tree-pre-stats -fno-tree-cselim" } */ typedef union { int i; diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c index b1e0dce93d8..3733780a0bc 100644 --- a/gcc/tree-ssa-phiopt.c +++ b/gcc/tree-ssa-phiopt.c @@ -1986,26 +1986,31 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb, ??? We currently are very conservative and assume that a load might trap even if a store doesn't (write-only memory). This probably is - overly conservative. */ + overly conservative. -/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF - through it was seen, which would constitute a no-trap region for - same accesses. */ -struct name_to_bb + We currently support a special case that for !TREE_ADDRESSABLE automatic + variables, it could ignore whether something is a load or store because the + local stack should be always writable. */ + +/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which + basic block an *_REF through it was seen, which would constitute a + no-trap region for same accesses. */ +struct ref_to_bb { - unsigned int ssa_name_ver; + tree base; + poly_int64 bitsize, bitpos; + tree offset; unsigned int phase; - bool store; - HOST_WIDE_INT offset, size; + bool writable; basic_block bb; }; /* Hashtable helpers. */ -struct ssa_names_hasher : free_ptr_hash <name_to_bb> +struct refs_hasher : free_ptr_hash<ref_to_bb> { - static inline hashval_t hash (const name_to_bb *); - static inline bool equal (const name_to_bb *, const name_to_bb *); + static inline hashval_t hash (const ref_to_bb *); + static inline bool equal (const ref_to_bb *, const ref_to_bb *); }; /* Used for quick clearing of the hash-table when we see calls. @@ -2015,28 +2020,44 @@ static unsigned int nt_call_phase; /* The hash function. */ inline hashval_t -ssa_names_hasher::hash (const name_to_bb *n) +refs_hasher::hash (const ref_to_bb *n) { - return n->ssa_name_ver ^ (((hashval_t) n->store) << 31) - ^ (n->offset << 6) ^ (n->size << 3); + inchash::hash hstate; + inchash::add_expr (n->base, hstate); + hstate.add_poly_int (n->bitsize); + hstate.add_poly_int (n->bitpos); + hstate.add_int (n->writable); + if (n->offset != NULL_TREE) + { + inchash::add_expr (n->offset, hstate); + } + return hstate.end (); } /* The equality function of *P1 and *P2. */ inline bool -ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2) +refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2) { - return n1->ssa_name_ver == n2->ssa_name_ver - && n1->store == n2->store - && n1->offset == n2->offset - && n1->size == n2->size; + if (operand_equal_p (n1->base, n2->base, 0) + && known_eq (n1->bitsize, n2->bitsize) + && known_eq (n1->bitpos, n2->bitpos) && n1->writable == n2->writable) + { + /* Should not call operand_equal_p with NULL_TREE. */ + if (n1->offset == NULL_TREE || n2->offset == NULL_TREE) + return n1->offset == n2->offset; + else + return operand_equal_p (n1->offset, n2->offset, 0); + } + return false; } class nontrapping_dom_walker : public dom_walker { public: nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps) - : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {} + : dom_walker (direction), m_nontrapping (ps), m_seen_refs (256) + {} virtual edge before_dom_children (basic_block); virtual void after_dom_children (basic_block); @@ -2053,7 +2074,7 @@ private: hash_set<tree> *m_nontrapping; /* The hash table for remembering what we've seen. */ - hash_table<ssa_names_hasher> m_seen_ssa_names; + hash_table<refs_hasher> m_seen_refs; }; /* Called by walk_dominator_tree, when entering the block BB. */ @@ -2102,65 +2123,76 @@ nontrapping_dom_walker::after_dom_children (basic_block bb) } /* We see the expression EXP in basic block BB. If it's an interesting - expression (an MEM_REF through an SSA_NAME) possibly insert the - expression into the set NONTRAP or the hash table of seen expressions. - STORE is true if this expression is on the LHS, otherwise it's on - the RHS. */ + expression of: + 1) MEM_REF + 2) ARRAY_REF + 3) COMPONENT_REF + possibly insert the expression into the set NONTRAP or the hash table + of seen expressions. STORE is true if this expression is on the LHS, + otherwise it's on the RHS. */ void nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store) { - HOST_WIDE_INT size; - - if (TREE_CODE (exp) == MEM_REF - && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME - && tree_fits_shwi_p (TREE_OPERAND (exp, 1)) - && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0) + if (TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF + || TREE_CODE (exp) == COMPONENT_REF) { - tree name = TREE_OPERAND (exp, 0); - struct name_to_bb map; - name_to_bb **slot; - struct name_to_bb *n2bb; + struct ref_to_bb map; + ref_to_bb **slot; + struct ref_to_bb *r2bb; basic_block found_bb = 0; - /* Try to find the last seen MEM_REF through the same - SSA_NAME, which can trap. */ - map.ssa_name_ver = SSA_NAME_VERSION (name); - map.phase = 0; - map.bb = 0; - map.store = store; - map.offset = tree_to_shwi (TREE_OPERAND (exp, 1)); - map.size = size; - - slot = m_seen_ssa_names.find_slot (&map, INSERT); - n2bb = *slot; - if (n2bb && n2bb->phase >= nt_call_phase) - found_bb = n2bb->bb; - - /* If we've found a trapping MEM_REF, _and_ it dominates EXP - (it's in a basic block on the path from us to the dominator root) + /* Try to find the last seen *_REF (through the same base expression + + bitsize + bitpos + offset expression), which can trap. */ + machine_mode nmode; + poly_int64 bitsize, bitpos; + tree offset; + int nunsignedp, nreversep, nvolatilep = 0; + tree base = get_inner_reference (exp, &bitsize, &bitpos, &offset, &nmode, + &nunsignedp, &nreversep, &nvolatilep); + gcc_assert (base != NULL_TREE); + map.base = base; + map.offset = offset; + map.bitsize = bitsize; + map.bitpos = bitpos; + + /* The reference is writable if EXP is a: + 1) STORE, or + 2) LOAD of a local variable without address-taken (as the local stack + is always writable). This allows cselim on a STORE with a dominating + LOAD. */ + map.writable = store || (auto_var_p (base) && !TREE_ADDRESSABLE (base)); + + slot = m_seen_refs.find_slot (&map, INSERT); + r2bb = *slot; + if (r2bb && r2bb->phase >= nt_call_phase) + found_bb = r2bb->bb; + + /* If we've found a trapping *_REF, _and_ it dominates EXP + (it's in a basic block on the path from us to the dominator root) then we can't trap. */ - if (found_bb && (((size_t)found_bb->aux) & 1) == 1) + if (found_bb && (((size_t) found_bb->aux) & 1) == 1) { m_nontrapping->add (exp); } else - { + { /* EXP might trap, so insert it into the hash table. */ - if (n2bb) + if (r2bb) { - n2bb->phase = nt_call_phase; - n2bb->bb = bb; + r2bb->phase = nt_call_phase; + r2bb->bb = bb; } else { - n2bb = XNEW (struct name_to_bb); - n2bb->ssa_name_ver = SSA_NAME_VERSION (name); - n2bb->phase = nt_call_phase; - n2bb->bb = bb; - n2bb->store = store; - n2bb->offset = map.offset; - n2bb->size = size; - *slot = n2bb; + r2bb = XNEW (struct ref_to_bb); + r2bb->phase = nt_call_phase; + r2bb->bb = bb; + r2bb->base = base; + r2bb->offset = map.offset; + r2bb->bitsize = map.bitsize; + r2bb->bitpos = map.bitpos; + r2bb->writable = map.writable; + *slot = r2bb; } } }