Boszormenyi Zoltan írta:
> Boszormenyi Zoltan írta:
>
>> Heikki Linnakangas írta:
>>
>>
>>> On 26.10.2010 18:34, Boszormenyi Zoltan wrote:
>>>
>>>
>>>> thank you very much for pointing me to dynahash, here is the
>>>> next version that finally seems to work.
>>>>
>>>> Two patches are attached, the first is the absolute minimum for
>>>> making it work, this still has the Tree type for canon_pathkeys
>>>> and eq_classes got the same treatment as join_rel_list/join_rel_hash
>>>> has in the current sources: if the list grows larger than 32, a hash
>>>> table
>>>> is created. It seems to be be enough for doing in for
>>>> get_eclass_for_sort_expr()
>>>> only, the other users of eq_classes aren't bothered by this change.
>>>>
>>>>
>>> That's better, but can't you use dynahash for canon_pathkeys as well?
>>>
>>>
>> Here's a purely dynahash solution. It's somewhat slower than
>> the tree version, 0.45 vs 0.41 seconds in the cached case for the
>> previously posted test case.
>>
>>
>
> And now in context diff, sorry for my affection towards unified diffs. :-)
>
A little better version, no need for the heavy hash_any, hash_uint32
on the lower 32 bits on pk_eclass is enough. The profiling runtime
is now 0.42 seconds vs the previous 0.41 seconds for the tree version.
Best regards,
Zoltán Böszörményi
--
----------------------------------
Zoltán Böszörményi
Cybertec Schönig & Schönig GmbH
Gröhrmühlgasse 26
A-2700 Wiener Neustadt, Austria
Web: http://www.postgresql-support.de
http://www.postgresql.at/
diff -dcrpN postgresql.orig/src/backend/optimizer/path/equivclass.c postgresql.1/src/backend/optimizer/path/equivclass.c
*** postgresql.orig/src/backend/optimizer/path/equivclass.c 2010-10-15 10:31:47.000000000 +0200
--- postgresql.1/src/backend/optimizer/path/equivclass.c 2010-10-28 12:37:47.000000000 +0200
***************
*** 24,29 ****
--- 24,30 ----
#include "optimizer/planmain.h"
#include "optimizer/prep.h"
#include "optimizer/var.h"
+ #include "utils/hsearch.h"
#include "utils/lsyscache.h"
*************** add_eq_member(EquivalenceClass *ec, Expr
*** 360,434 ****
/*
! * get_eclass_for_sort_expr
! * Given an expression and opfamily info, find an existing equivalence
! * class it is a member of; if none, build a new single-member
! * EquivalenceClass for it.
! *
! * sortref is the SortGroupRef of the originating SortGroupClause, if any,
! * or zero if not. (It should never be zero if the expression is volatile!)
! *
! * This can be used safely both before and after EquivalenceClass merging;
! * since it never causes merging it does not invalidate any existing ECs
! * or PathKeys.
! *
! * Note: opfamilies must be chosen consistently with the way
! * process_equivalence() would do; that is, generated from a mergejoinable
! * equality operator. Else we might fail to detect valid equivalences,
! * generating poor (but not incorrect) plans.
*/
! EquivalenceClass *
! get_eclass_for_sort_expr(PlannerInfo *root,
! Expr *expr,
! Oid expr_datatype,
! List *opfamilies,
! Index sortref)
{
! EquivalenceClass *newec;
! EquivalenceMember *newem;
ListCell *lc1;
! MemoryContext oldcontext;
/*
! * Scan through the existing EquivalenceClasses for a match
*/
! foreach(lc1, root->eq_classes)
{
! EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
! ListCell *lc2;
/*
! * Never match to a volatile EC, except when we are looking at another
! * reference to the same volatile SortGroupClause.
*/
! if (cur_ec->ec_has_volatile &&
! (sortref == 0 || sortref != cur_ec->ec_sortref))
! continue;
!
! if (!equal(opfamilies, cur_ec->ec_opfamilies))
continue;
! foreach(lc2, cur_ec->ec_members)
{
! EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
!
! /*
! * If below an outer join, don't match constants: they're not as
! * constant as they look.
! */
! if (cur_ec->ec_below_outer_join &&
! cur_em->em_is_const)
! continue;
! if (expr_datatype == cur_em->em_datatype &&
! equal(expr, cur_em->em_expr))
! return cur_ec; /* Match! */
}
}
/*
- * No match, so build a new single-member EC
- *
* Here, we must be sure that we construct the EC in the right context. We
* can assume, however, that the passed expr is long-lived.
*/
--- 361,463 ----
/*
! * eq_classes_match - matching function for eq_classes_hash in PlannerInfo
*/
! static int
! eq_classes_match(const void *key1, const void *key2, Size keysize)
{
! EquivalenceClass *ec1 = (EquivalenceClass *) key1; /* this is in the hashtable */
! EquivalenceClass *ec2 = (EquivalenceClass *) key2; /* this is the new matched entry */
ListCell *lc1;
! ListCell *lc2;
/*
! * Never match to a volatile EC, except when we are looking at another
! * reference to the same volatile SortGroupClause.
*/
! if (ec1->ec_has_volatile &&
! (ec2->ec_sortref == 0 || ec2->ec_sortref != ec1->ec_sortref))
! return 1;
!
! if (!equal(ec1->ec_opfamilies, ec2->ec_opfamilies))
! return 1;
!
! foreach(lc1, ec1->ec_members)
{
! EquivalenceMember *em1 = (EquivalenceMember *) lfirst(lc1);
/*
! * If below an outer join, don't match constants: they're not as
! * constant as they look.
*/
! if (ec1->ec_below_outer_join &&
! em1->em_is_const)
continue;
! foreach(lc2, ec2->ec_members)
{
! EquivalenceMember *em2 = (EquivalenceMember *) lfirst(lc2);
! if (em1->em_datatype == em2->em_datatype &&
! equal(em1->em_expr, em2->em_expr))
! return 0;
}
}
+ return 1;
+ }
+
+
+ /*
+ * build_eq_classes_hash
+ * Build the initial contents of PlannerInfo.eq_classes_hash
+ * for faster search in PlannerInfo.eq_classes. This is used
+ * to make get_eclass_for_sort_expr() faster for large
+ * inheritance trees.
+ */
+ static void
+ build_eq_classes_hash(PlannerInfo *root)
+ {
+ MemoryContext oldcontext;
+ HASHCTL info;
+
+ ListCell *lc;
+
+ info.match = eq_classes_match;
+ info.hcxt = root->planner_cxt;
+ info.keysize = sizeof(Relids);
+ info.entrysize = sizeof(EquivalenceClass);
+
+ oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+ root->eq_classes_hash = hash_create("eq_classes", 2048, &info,
+ HASH_ELEM | HASH_COMPARE | HASH_CONTEXT);
+
+ foreach(lc, root->eq_classes)
+ {
+ EquivalenceClass *ec = lfirst(lc);
+ bool found;
+
+ hash_search_with_hash_value(root->eq_classes_hash, ec,
+ bms_hash_value(ec->ec_relids),
+ HASH_ENTER, &found);
+ Assert(!found);
+ }
+ }
+
+
+ static EquivalenceClass *
+ build_new_ec(PlannerInfo *root,
+ Expr *expr,
+ Oid expr_datatype,
+ List *opfamilies,
+ Index sortref)
+ {
+ MemoryContext oldcontext;
+ EquivalenceClass *newec;
+ EquivalenceMember *newem;
+
/*
* Here, we must be sure that we construct the EC in the right context. We
* can assume, however, that the passed expr is long-lived.
*/
*************** get_eclass_for_sort_expr(PlannerInfo *ro
*** 471,483 ****
}
}
- root->eq_classes = lappend(root->eq_classes, newec);
-
MemoryContextSwitchTo(oldcontext);
return newec;
}
/*
* generate_base_implied_equalities
--- 500,631 ----
}
}
MemoryContextSwitchTo(oldcontext);
return newec;
}
+ /*
+ * get_eclass_for_sort_expr
+ * Given an expression and opfamily info, find an existing equivalence
+ * class it is a member of; if none, build a new single-member
+ * EquivalenceClass for it.
+ *
+ * sortref is the SortGroupRef of the originating SortGroupClause, if any,
+ * or zero if not. (It should never be zero if the expression is volatile!)
+ *
+ * This can be used safely both before and after EquivalenceClass merging;
+ * since it never causes merging it does not invalidate any existing ECs
+ * or PathKeys.
+ *
+ * Note: opfamilies must be chosen consistently with the way
+ * process_equivalence() would do; that is, generated from a mergejoinable
+ * equality operator. Else we might fail to detect valid equivalences,
+ * generating poor (but not incorrect) plans.
+ */
+ EquivalenceClass *
+ get_eclass_for_sort_expr(PlannerInfo *root,
+ Expr *expr,
+ Oid expr_datatype,
+ List *opfamilies,
+ Index sortref)
+ {
+ EquivalenceClass *newec;
+ ListCell *lc1;
+ MemoryContext oldcontext;
+
+ if (root->eq_classes_hash == NULL &&
+ list_length(root->eq_classes) > 32)
+ build_eq_classes_hash(root);
+
+ if (root->eq_classes_hash == NULL)
+ {
+ /*
+ * Scan through the existing EquivalenceClasses for a match
+ */
+ foreach(lc1, root->eq_classes)
+ {
+ EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
+ ListCell *lc2;
+
+ /*
+ * Never match to a volatile EC, except when we are looking at another
+ * reference to the same volatile SortGroupClause.
+ */
+ if (cur_ec->ec_has_volatile &&
+ (sortref == 0 || sortref != cur_ec->ec_sortref))
+ continue;
+
+ if (!equal(opfamilies, cur_ec->ec_opfamilies))
+ continue;
+
+ foreach(lc2, cur_ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
+
+ /*
+ * If below an outer join, don't match constants: they're not as
+ * constant as they look.
+ */
+ if (cur_ec->ec_below_outer_join &&
+ cur_em->em_is_const)
+ continue;
+
+ if (expr_datatype == cur_em->em_datatype &&
+ equal(expr, cur_em->em_expr))
+ return cur_ec; /* Match! */
+ }
+ }
+
+ /*
+ * No match, so build a new single-member EC
+ */
+ newec = build_new_ec(root, expr, expr_datatype, opfamilies, sortref);
+
+ oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+ root->eq_classes = lappend(root->eq_classes, newec);
+ MemoryContextSwitchTo(oldcontext);
+
+ return newec;
+ }
+ else
+ {
+ EquivalenceClass *ec_found;
+ bool found;
+ uint32 hashval;
+
+ /*
+ * Build the new single-member EC to match against in hash_search()
+ */
+ newec = build_new_ec(root, expr, expr_datatype, opfamilies, sortref);
+
+ hashval = bms_hash_value(newec->ec_relids);
+
+ ec_found = hash_search_with_hash_value(root->eq_classes_hash, newec, hashval, HASH_FIND, &found);
+
+ if (found)
+ {
+ list_free(newec->ec_opfamilies);
+ list_free_deep(newec->ec_members);
+ bms_free(newec->ec_relids);
+ pfree(newec);
+ return ec_found;
+ }
+
+ oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+ root->eq_classes = lappend(root->eq_classes, newec);
+
+ hash_search_with_hash_value(root->eq_classes_hash, newec, hashval, HASH_ENTER, &found);
+
+ Assert(!found);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return newec;
+ }
+ }
+
/*
* generate_base_implied_equalities
diff -dcrpN postgresql.orig/src/backend/optimizer/path/pathkeys.c postgresql.1/src/backend/optimizer/path/pathkeys.c
*** postgresql.orig/src/backend/optimizer/path/pathkeys.c 2010-09-21 13:49:57.000000000 +0200
--- postgresql.1/src/backend/optimizer/path/pathkeys.c 2010-10-28 12:39:22.000000000 +0200
***************
*** 17,22 ****
--- 17,23 ----
*/
#include "postgres.h"
+ #include "access/hash.h"
#include "access/skey.h"
#include "catalog/pg_type.h"
#include "nodes/makefuncs.h"
***************
*** 27,32 ****
--- 28,34 ----
#include "optimizer/paths.h"
#include "optimizer/tlist.h"
#include "parser/parsetree.h"
+ #include "utils/hsearch.h"
#include "utils/lsyscache.h"
*************** makePathKey(EquivalenceClass *eclass, Oi
*** 72,77 ****
--- 74,144 ----
}
/*
+ * pk_hash
+ * hashtable hash function for PlannerInfo.canon_pathkeys_hash
+ */
+ static uint32
+ pk_hash(const void *key, Size keysize)
+ {
+ PathKey *pk = (PathKey *) key;
+ intptr_t ptr = (intptr_t) pk->pk_eclass;
+
+ return DatumGetUInt32(hash_uint32((uint32)ptr));
+ }
+
+ /*
+ * pk_match
+ * hashtable match function for PlannerInfo.canon_pathkeys_hash
+ */
+ static int
+ pk_match(const void *key1, const void *key2, Size keysize)
+ {
+ PathKey *pk1 = (PathKey *)key1;
+ PathKey *pk2 = (PathKey *)key2;
+
+ if (pk1->pk_eclass == pk2->pk_eclass &&
+ pk1->pk_opfamily == pk2->pk_opfamily &&
+ pk1->pk_strategy == pk2->pk_strategy &&
+ pk1->pk_nulls_first == pk2->pk_nulls_first)
+ return 0;
+ return 1;
+ }
+
+ /*
+ * build_canonical_pathkey_hash
+ *
+ * Build PlannerInfo.canon_pathkeys_hash from canon_pathkeys list.
+ */
+ static void
+ build_canonical_pathkey_hash(PlannerInfo *root)
+ {
+ MemoryContext oldcontext;
+ HASHCTL info;
+
+ ListCell *lc;
+
+ info.hash = pk_hash;
+ info.match = pk_match;
+ info.hcxt = root->planner_cxt;
+ info.keysize = sizeof(EquivalenceClass *);
+ info.entrysize = sizeof(PathKey);
+
+ oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+ root->canon_pathkeys_hash = hash_create("canon_pathkeys", 2048, &info,
+ HASH_FUNCTION | HASH_ELEM | HASH_COMPARE | HASH_CONTEXT);
+
+ foreach(lc, root->canon_pathkeys)
+ {
+ PathKey *pk = lfirst(lc);
+ bool found;
+
+ hash_search(root->canon_pathkeys_hash, pk, HASH_ENTER, &found);
+ Assert(!found);
+ }
+ }
+
+ /*
* make_canonical_pathkey
* Given the parameters for a PathKey, find any pre-existing matching
* pathkey in the query's list of "canonical" pathkeys. Make a new
*************** make_canonical_pathkey(PlannerInfo *root
*** 85,91 ****
EquivalenceClass *eclass, Oid opfamily,
int strategy, bool nulls_first)
{
! PathKey *pk;
ListCell *lc;
MemoryContext oldcontext;
--- 152,158 ----
EquivalenceClass *eclass, Oid opfamily,
int strategy, bool nulls_first)
{
! PathKey *pk, *pk_found;
ListCell *lc;
MemoryContext oldcontext;
*************** make_canonical_pathkey(PlannerInfo *root
*** 93,118 ****
while (eclass->ec_merged)
eclass = eclass->ec_merged;
! foreach(lc, root->canon_pathkeys)
{
! pk = (PathKey *) lfirst(lc);
! if (eclass == pk->pk_eclass &&
! opfamily == pk->pk_opfamily &&
! strategy == pk->pk_strategy &&
! nulls_first == pk->pk_nulls_first)
! return pk;
}
! /*
! * Be sure canonical pathkeys are allocated in the main planning context.
! * Not an issue in normal planning, but it is for GEQO.
! */
! oldcontext = MemoryContextSwitchTo(root->planner_cxt);
! pk = makePathKey(eclass, opfamily, strategy, nulls_first);
! root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
! MemoryContextSwitchTo(oldcontext);
return pk;
}
--- 160,222 ----
while (eclass->ec_merged)
eclass = eclass->ec_merged;
! if (root->canon_pathkeys_hash == NULL &&
! list_length(root->canon_pathkeys) > 32)
! build_canonical_pathkey_hash(root);
!
! if (root->canon_pathkeys_hash == NULL)
{
! foreach(lc, root->canon_pathkeys)
! {
! pk = (PathKey *) lfirst(lc);
! if (eclass == pk->pk_eclass &&
! opfamily == pk->pk_opfamily &&
! strategy == pk->pk_strategy &&
! nulls_first == pk->pk_nulls_first)
! return pk;
! }
!
! /*
! * Be sure canonical pathkeys are allocated in the main planning context.
! * Not an issue in normal planning, but it is for GEQO.
! */
! oldcontext = MemoryContextSwitchTo(root->planner_cxt);
!
! pk = makePathKey(eclass, opfamily, strategy, nulls_first);
! root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
!
! MemoryContextSwitchTo(oldcontext);
}
+ else
+ {
+ uint32 hashval;
+ bool found;
! /*
! * Be sure canonical pathkeys are allocated in the main planning context.
! * Not an issue in normal planning, but it is for GEQO.
! */
! oldcontext = MemoryContextSwitchTo(root->planner_cxt);
! pk = makePathKey(eclass, opfamily, strategy, nulls_first);
! hashval = pk_hash(pk, sizeof(PathKey));
!
! pk_found = hash_search_with_hash_value(root->canon_pathkeys_hash, pk, hashval, HASH_FIND, &found);
!
! if (found)
! {
! pfree(pk);
! pk = pk_found;
! }
! else
! {
! root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
! hash_search_with_hash_value(root->canon_pathkeys_hash, pk, hashval, HASH_ENTER, &found);
! }
!
! MemoryContextSwitchTo(oldcontext);
! }
return pk;
}
diff -dcrpN postgresql.orig/src/backend/optimizer/plan/planmain.c postgresql.1/src/backend/optimizer/plan/planmain.c
*** postgresql.orig/src/backend/optimizer/plan/planmain.c 2010-10-08 11:04:23.000000000 +0200
--- postgresql.1/src/backend/optimizer/plan/planmain.c 2010-10-28 12:37:47.000000000 +0200
***************
*** 27,32 ****
--- 27,33 ----
#include "optimizer/placeholder.h"
#include "optimizer/planmain.h"
#include "optimizer/tlist.h"
+ #include "utils/hsearch.h"
#include "utils/selfuncs.h"
*************** query_planner(PlannerInfo *root, List *t
*** 118,123 ****
--- 119,129 ----
* something like "SELECT 2+2 ORDER BY 1".
*/
root->canon_pathkeys = NIL;
+ if (root->canon_pathkeys_hash)
+ {
+ hash_destroy(root->canon_pathkeys_hash);
+ root->canon_pathkeys_hash = NULL;
+ }
root->query_pathkeys = canonicalize_pathkeys(root,
root->query_pathkeys);
root->group_pathkeys = canonicalize_pathkeys(root,
*************** query_planner(PlannerInfo *root, List *t
*** 146,151 ****
--- 152,162 ----
root->join_rel_level = NULL;
root->join_cur_level = 0;
root->canon_pathkeys = NIL;
+ if (root->canon_pathkeys_hash)
+ {
+ hash_destroy(root->canon_pathkeys_hash);
+ root->canon_pathkeys_hash = NULL;
+ }
root->left_join_clauses = NIL;
root->right_join_clauses = NIL;
root->full_join_clauses = NIL;
diff -dcrpN postgresql.orig/src/include/nodes/relation.h postgresql.1/src/include/nodes/relation.h
*** postgresql.orig/src/include/nodes/relation.h 2010-10-15 10:31:47.000000000 +0200
--- postgresql.1/src/include/nodes/relation.h 2010-10-28 12:37:47.000000000 +0200
*************** typedef struct PlannerInfo
*** 159,166 ****
--- 159,168 ----
List *cte_plan_ids; /* per-CTE-item list of subplan IDs */
List *eq_classes; /* list of active EquivalenceClasses */
+ struct HTAB *eq_classes_hash; /* optional hashtable for equivalence classes */
List *canon_pathkeys; /* list of "canonical" PathKeys */
+ struct HTAB *canon_pathkeys_hash; /* optional hashtable for "canonical" PathKeys */
List *left_join_clauses; /* list of RestrictInfos for
* mergejoinable outer join clauses
Binary files postgresql.orig/src/test/regress/gmon.out and postgresql.1/src/test/regress/gmon.out differ
Binary files postgresql.orig/src/timezone/gmon.out and postgresql.1/src/timezone/gmon.out differ
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers