"Lawrence, Ramon" <[email protected]> writes:
> Attached is a patch that will disable the physical-tlist optimization
> for hash join if the number of batches is greater than 1. The patch and
> performance results were created by Michael Henderson (graduate
> student).
I've applied the attached modified version of this patch.
regards, tom lane
Index: src/backend/nodes/outfuncs.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v
retrieving revision 1.355
diff -c -r1.355 outfuncs.c
*** src/backend/nodes/outfuncs.c 21 Mar 2009 00:04:39 -0000 1.355
--- src/backend/nodes/outfuncs.c 26 Mar 2009 15:19:43 -0000
***************
*** 1448,1453 ****
--- 1448,1454 ----
_outJoinPathInfo(str, (JoinPath *) node);
WRITE_NODE_FIELD(path_hashclauses);
+ WRITE_INT_FIELD(num_batches);
}
static void
Index: src/backend/optimizer/path/costsize.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v
retrieving revision 1.205
diff -c -r1.205 costsize.c
*** src/backend/optimizer/path/costsize.c 21 Mar 2009 00:04:39 -0000
1.205
--- src/backend/optimizer/path/costsize.c 26 Mar 2009 15:19:43 -0000
***************
*** 1880,1885 ****
--- 1880,1887 ----
&numbatches,
&num_skew_mcvs);
virtualbuckets = (double) numbuckets *(double) numbatches;
+ /* mark the path with estimated # of batches */
+ path->num_batches = numbatches;
/*
* Determine bucketsize fraction for inner relation. We use the
smallest
Index: src/backend/optimizer/plan/createplan.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v
retrieving revision 1.256
diff -c -r1.256 createplan.c
*** src/backend/optimizer/plan/createplan.c 21 Mar 2009 00:04:39 -0000
1.256
--- src/backend/optimizer/plan/createplan.c 26 Mar 2009 15:19:44 -0000
***************
*** 1910,1915 ****
--- 1910,1919 ----
/* We don't want any excess columns in the hashed tuples */
disuse_physical_tlist(inner_plan, best_path->jpath.innerjoinpath);
+ /* If we expect batching, suppress excess columns in outer tuples too */
+ if (best_path->num_batches > 1)
+ disuse_physical_tlist(outer_plan,
best_path->jpath.outerjoinpath);
+
/*
* If there is a single join clause and we can identify the outer
* variable as a simple column reference, supply its identity for
Index: src/backend/optimizer/util/pathnode.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v
retrieving revision 1.150
diff -c -r1.150 pathnode.c
*** src/backend/optimizer/util/pathnode.c 27 Feb 2009 00:06:27 -0000
1.150
--- src/backend/optimizer/util/pathnode.c 26 Mar 2009 15:19:44 -0000
***************
*** 1480,1488 ****
pathnode->jpath.outerjoinpath = outer_path;
pathnode->jpath.innerjoinpath = inner_path;
pathnode->jpath.joinrestrictinfo = restrict_clauses;
! /* A hashjoin never has pathkeys, since its ordering is unpredictable */
pathnode->jpath.path.pathkeys = NIL;
pathnode->path_hashclauses = hashclauses;
cost_hashjoin(pathnode, root, sjinfo);
--- 1480,1499 ----
pathnode->jpath.outerjoinpath = outer_path;
pathnode->jpath.innerjoinpath = inner_path;
pathnode->jpath.joinrestrictinfo = restrict_clauses;
! /*
! * A hashjoin never has pathkeys, since its output ordering is
! * unpredictable due to possible batching. XXX If the inner relation is
! * small enough, we could instruct the executor that it must not batch,
! * and then we could assume that the output inherits the outer
relation's
! * ordering, which might save a sort step. However there is
considerable
! * downside if our estimate of the inner relation size is badly off.
! * For the moment we don't risk it. (Note also that if we wanted to
take
! * this seriously, joinpath.c would have to consider many more paths for
! * the outer rel than it does now.)
! */
pathnode->jpath.path.pathkeys = NIL;
pathnode->path_hashclauses = hashclauses;
+ /* cost_hashjoin will fill in pathnode->num_batches */
cost_hashjoin(pathnode, root, sjinfo);
Index: src/include/nodes/relation.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/nodes/relation.h,v
retrieving revision 1.170
diff -c -r1.170 relation.h
*** src/include/nodes/relation.h 5 Mar 2009 23:06:45 -0000 1.170
--- src/include/nodes/relation.h 26 Mar 2009 15:19:44 -0000
***************
*** 845,850 ****
--- 845,851 ----
{
JoinPath jpath;
List *path_hashclauses; /* join clauses used for
hashing */
+ int num_batches; /* number of
batches expected */
} HashPath;
/*
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers