On Thu, 5 Nov 2020, Jason Merrill wrote: > On 11/4/20 2:19 PM, Patrick Palka wrote: > > [ This patch depends on > > > > c++: Reuse identical ATOMIC_CONSTRs during normalization > > > > https://gcc.gnu.org/pipermail/gcc-patches/2020-November/557929.html ] > > > > This improves the effectiveness of caching in satisfy_atom by querying > > the cache again after we've instantiated the atom's parameter mapping. > > > > Before instantiating its mapping, the identity of an (atom,args) pair > > within the satisfaction cache is determined by idiosyncratic things such > > as the level and index of each template parameter used in targets of the > > parameter mapping. For example, the associated constraints of foo in > > > > template <class T> concept range = range_v<T>; > > template <class U, class V> void foo () requires range<U> && range<V>; > > > > are range_v<T> (with mapping T -> U) /\ range_v<T> (with mapping T -> V). > > If during satisfaction the template arguments supplied for U and V are > > the same, then the satisfaction value of these two atoms will be the > > same (despite their uninstantiated parameter mappings being different). > > > > But sat_cache doesn't see this because it compares the uninstantiated > > parameter mapping and the supplied template arguments of sat_entry's > > independently. So satisy_atom currently will end up fully evaluating > > the latter atom instead of reusing the satisfaction value of the former. > > > > But there is a point when the two atoms do look the same to sat_cache, > > and that's after instantiating their parameter mappings. By querying > > the cache again at this point, we're at least able to avoid substituting > > the instantiated mapping into the second atom's expression. > > > > With this patch, compile time and memory usage for the cmcstl2 test > > test/algorithm/set_symmetric_diference4.cpp drops from 11s/1.4GB to > > 8.5s/1.2GB with an --enable-checking=release compiler. > > How does the performance compare if we *only* cache after substituting into > the parameter mapping? I'd expect that substitution to be pretty cheap in > general.
tsubst_parameter_mapping is surprisingly expensive. If we only cache after substituting into the mapping, then for e.g. the libstdc++ test std/ranges/adaptor/join.cc the performance stats are 5s/600MB vs 2s/225MB (with the three patches). Profiling shows that tsubst_parameter_mapping accounts for ~50% of compile time (and apparently a ton of garbage generation) and tsubst_expr only for ~10% of compile time in this scheme. Compiling the cmcstl2 test mentioned above required 8+GB memory before I killed the procress. Also, only caching after substituting into the mapping means there's no way to cache negative satisfaction results that arise from failed substitution into the parameter mapping, IIUC. > > > Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for > > trunk? > > > > gcc/cp/ChangeLog: > > > > * cp-tree.h (ATOMIC_CONSTR_MAP_INSTANTIATED_P): Define this flag > > for ATOMIC_CONSTRs. > > * constraint.cc (sat_hasher::hash): Use hash_atomic_constraint > > if the flag is set, otherwise keep using a pointer hash. > > (sat_hasher::equal): Return false if the flag's setting differs > > on two atoms. Call atomic_constraints_identical_p if the flag > > is set, otherwise keep using a pointer equality test. > > (satisfy_atom): After instantiating the parameter mapping, form > > another ATOMIC_CONSTR using the instantiated mapping and query > > the cache again. Cache the satisfaction value of both atoms. > > (diagnose_atomic_constraint): Simplify now that the supplied > > atom has an instantiated mapping. > > --- > > gcc/cp/constraint.cc | 47 +++++++++++++++++++++++++++++++++++--------- > > gcc/cp/cp-tree.h | 6 ++++++ > > 2 files changed, 44 insertions(+), 9 deletions(-) > > > > diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc > > index 55dba362ca5..c612bfba13b 100644 > > --- a/gcc/cp/constraint.cc > > +++ b/gcc/cp/constraint.cc > > @@ -2315,12 +2315,32 @@ struct sat_hasher : ggc_ptr_hash<sat_entry> > > { > > static hashval_t hash (sat_entry *e) > > { > > + if (ATOMIC_CONSTR_MAP_INSTANTIATED_P (e->constr)) > > + { > > + gcc_assert (!e->args); > > + return hash_atomic_constraint (e->constr); > > + } > > + > > hashval_t value = htab_hash_pointer (e->constr); > > return iterative_hash_template_arg (e->args, value); > > } > > static bool equal (sat_entry *e1, sat_entry *e2) > > { > > + if (ATOMIC_CONSTR_MAP_INSTANTIATED_P (e1->constr) > > + != ATOMIC_CONSTR_MAP_INSTANTIATED_P (e2->constr)) > > + return false; > > + > > + if (ATOMIC_CONSTR_MAP_INSTANTIATED_P (e1->constr)) > > + { > > + /* Atoms with instantiated mappings are built in satisfy_atom. */ > > + gcc_assert (!e1->args && !e2->args); > > + return atomic_constraints_identical_p (e1->constr, e2->constr); > > + } > > + > > + /* Atoms with uninstantiated mappings are built in normalize_atom. > > + Their identity is determined by their pointer value due to > > + the caching of ATOMIC_CONSTRs performed therein. */ > > if (e1->constr != e2->constr) > > return false; > > return template_args_equal (e1->args, e2->args); > > @@ -2614,6 +2634,18 @@ satisfy_atom (tree t, tree args, subst_info info) > > return cache.save (boolean_false_node); > > } > > + /* Now build a new atom using the instantiated mapping. We use > > + this atom as a second key to the satisfaction cache, and we > > + also pass it to diagnose_atomic_constraint so that diagnostics > > + which refer to the atom display the instantiated mapping. */ > > + t = copy_node (t); > > + ATOMIC_CONSTR_MAP (t) = map; > > + gcc_assert (!ATOMIC_CONSTR_MAP_INSTANTIATED_P (t)); > > + ATOMIC_CONSTR_MAP_INSTANTIATED_P (t) = true; > > + satisfaction_cache inst_cache (t, /*args=*/NULL_TREE, info.complain); > > + if (tree r = inst_cache.get ()) > > + return cache.save (r); > > + > > /* Rebuild the argument vector from the parameter mapping. */ > > args = get_mapped_args (map); > > @@ -2626,19 +2658,19 @@ satisfy_atom (tree t, tree args, subst_info info) > > is not satisfied. Replay the substitution. */ > > if (info.noisy ()) > > tsubst_expr (expr, args, info.complain, info.in_decl, false); > > - return cache.save (boolean_false_node); > > + return cache.save (inst_cache.save (boolean_false_node)); > > } > > /* [17.4.1.2] ... lvalue-to-rvalue conversion is performed as > > necessary, > > and EXPR shall be a constant expression of type bool. */ > > result = force_rvalue (result, info.complain); > > if (result == error_mark_node) > > - return cache.save (error_mark_node); > > + return cache.save (inst_cache.save (error_mark_node)); > > if (!same_type_p (TREE_TYPE (result), boolean_type_node)) > > { > > if (info.noisy ()) > > diagnose_atomic_constraint (t, map, result, info); > > - return cache.save (error_mark_node); > > + return cache.save (inst_cache.save (error_mark_node)); > > } > > /* Compute the value of the constraint. */ > > @@ -2655,7 +2687,7 @@ satisfy_atom (tree t, tree args, subst_info info) > > if (result == boolean_false_node && info.noisy ()) > > diagnose_atomic_constraint (t, map, result, info); > > - return cache.save (result); > > + return cache.save (inst_cache.save (result)); > > } > > /* Determine if the normalized constraint T is satisfied. > > @@ -3495,14 +3527,11 @@ diagnose_atomic_constraint (tree t, tree map, tree > > result, subst_info info) > > diagnose_requires_expr (expr, map, info.in_decl); > > break; > > default: > > - tree a = copy_node (t); > > - ATOMIC_CONSTR_MAP (a) = map; > > if (!same_type_p (TREE_TYPE (result), boolean_type_node)) > > error_at (loc, "constraint %qE has type %qT, not %<bool%>", > > - a, TREE_TYPE (result)); > > + t, TREE_TYPE (result)); > > else > > - inform (loc, "the expression %qE evaluated to %<false%>", a); > > - ggc_free (a); > > + inform (loc, "the expression %qE evaluated to %<false%>", t); > > } > > } > > diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h > > index 26852f6f2e3..83975a93769 100644 > > --- a/gcc/cp/cp-tree.h > > +++ b/gcc/cp/cp-tree.h > > @@ -435,6 +435,7 @@ extern GTY(()) tree cp_global_trees[CPTI_MAX]; > > REINTERPRET_CAST_P (in NOP_EXPR) > > ALIGNOF_EXPR_STD_P (in ALIGNOF_EXPR) > > OVL_DEDUP_P (in OVERLOAD) > > + ATOMIC_CONSTR_MAP_INSTANTIATED_P (in ATOMIC_CONSTR) > > 1: IDENTIFIER_KIND_BIT_1 (in IDENTIFIER_NODE) > > TI_PENDING_TEMPLATE_FLAG. > > TEMPLATE_PARMS_FOR_INLINE. > > @@ -1593,6 +1594,11 @@ check_constraint_info (tree t) > > #define ATOMIC_CONSTR_MAP(NODE) \ > > TREE_OPERAND (TREE_CHECK (NODE, ATOMIC_CONSTR), 0) > > +/* Whether the parameter mapping of this atomic constraint > > + is already instantiated with concrete template arguments. */ > > +#define ATOMIC_CONSTR_MAP_INSTANTIATED_P(NODE) \ > > + TREE_LANG_FLAG_0 (ATOMIC_CONSTR_CHECK (NODE)) > > + > > /* The expression of an atomic constraint. */ > > #define ATOMIC_CONSTR_EXPR(NODE) \ > > CONSTR_EXPR (ATOMIC_CONSTR_CHECK (NODE)) > > > >