Hello, This is new version of identity projection patch. Reverted projectionInfo and ExecBuildProjectionInfo. Identity projection is recognized directly in ExecGroup, ExecResult, and ExecWindowAgg. nodeAgg is reverted because I couldn't make it sane..
The following is the result of performance test posted before in order to show the source of the gain. regards, -- -- Kyotaro Horiguchi NTT Open Source Software Center At Fri, 05 Oct 2012 16:04:16 +0900, Kyotaro HORIGUCHI wrote in <20121005.160416.256387378.horiguchi.kyot...@lab.ntt.co.jp> > > Although I said as following, the gain seems a bit larger... I'll > > recheck the testing conditions... > > I had inspected more precisely on two aspects maginifying the > effect of this patch by putting 300 columns into table. > > > First, explain analyze says the difference caused by this patch > is only in the actual time of Result node. > > orig$ psql -c 'explain analyze select * from parenta' > QUERY PLAN > -------------------------------------------------------------------------- > Result (cost=0.00..176667.00 rows=1000001 width=1202) > (actual time=0.013.. *2406.792* rows=1000000 loops=1) > -> Append (cost=0.00..176667.00 rows=1000001 width=1202) > (actual time=0.011..412.749 rows=1000000 loops=1) > -> Seq Scan on parenta (cost=0.00..0.00 rows=1 width=1228) > (actual time=0.001..0.001 rows=0 loops=1) > -> Seq Scan on childa000 parenta > (cost=0.00..176667.00 rows=1000000 width=1202) > (actual time=0.009..334.633 rows=1000000 loops=1) > Total runtime: 2446.474 ms > (5 rows) > > patched$ psql -c 'explain analyze select * from parenta' > QUERY PLAN > -------------------------------------------------------------------------- > Result (cost=0.00..176667.00 rows=1000001 width=1202) > (actual time=0.011.. *507.239* rows=1000000 loops=1) > -> Append (cost=0.00..176667.00 rows=1000001 width=1202) > (actual time=0.011..419.420 rows=1000000 loops=1) > -> Seq Scan on parenta (cost=0.00..0.00 rows=1 width=1228) > (actual time=0.000..0.000 rows=0 loops=1) > -> Seq Scan on childa000 parenta > (cost=0.00..176667.00 rows=1000000 width=1202) > (actual time=0.010..335.721 rows=1000000 loops=1) > Total runtime: 545.879 ms > (5 rows) > > > Second, the results of configure --enable-profiling shows that > the exectime of ExecProject chages greately. This is consistent > with what explain showed. > > orig: > > % cumulative self self total > > time seconds seconds calls s/call s/call name > > 60.29 1.26 1.26 1000005 0.00 0.00 slot_deform_tuple > !> 30.14 1.89 0.63 1000000 0.00 0.00 ExecProject > > 3.35 1.96 0.07 3000004 0.00 0.00 ExecProcNode > > 0.96 1.98 0.02 1000002 0.00 0.00 ExecScan > > 0.96 2.00 0.02 166379 0.00 0.00 TerminateBufferIO > > 0.48 2.01 0.01 3000004 0.00 0.00 InstrStartNode > > 0.48 2.02 0.01 3000004 0.00 0.00 InstrStopNode > !> 0.48 2.03 0.01 1000001 0.00 0.00 ExecResult > > 0.48 2.04 0.01 830718 0.00 0.00 LWLockAcquire > > 0.48 2.05 0.01 506834 0.00 0.00 > hash_search_with_hash_value > > 0.48 2.06 0.01 341656 0.00 0.00 LockBuffer > > 0.48 2.07 0.01 168383 0.00 0.00 ReadBuffer_common > > 0.48 2.08 0.01 4 0.00 0.00 InstrEndLoop > > 0.48 2.09 0.01 > ExecCleanTargetListLength > > 0.00 2.09 0.00 2000005 0.00 0.00 MemoryContextReset > > patched: > > % cumulative self self total > > time seconds seconds calls ms/call ms/call name > > 23.08 0.03 0.03 3000004 0.00 0.00 ExecProcNode > > 15.38 0.05 0.02 1000002 0.00 0.00 heapgettup_pagemode > > 15.38 0.07 0.02 830718 0.00 0.00 LWLockAcquire > > 7.69 0.08 0.01 2000005 0.00 0.00 MemoryContextReset > > 7.69 0.09 0.01 1000002 0.00 0.00 ExecScan > > 7.69 0.10 0.01 1000000 0.00 0.00 ExecStoreTuple > > 7.69 0.11 0.01 841135 0.00 0.00 LWLockRelease > > 7.69 0.12 0.01 168383 0.00 0.00 ReadBuffer_common > > 7.69 0.13 0.01 168383 0.00 0.00 UnpinBuffer > > 0.00 0.13 0.00 3000004 0.00 0.00 InstrStartNode > ... > !> 0.00 0.13 0.00 1000001 0.00 0.00 ExecResult > !> 0.00 0.13 0.00 1000000 0.00 0.00 ExecProject ==============================
diff --git a/src/backend/executor/nodeGroup.c b/src/backend/executor/nodeGroup.c index a8a1fe6..38037f9 100644 --- a/src/backend/executor/nodeGroup.c +++ b/src/backend/executor/nodeGroup.c @@ -110,7 +110,10 @@ ExecGroup(GroupState *node) TupleTableSlot *result; ExprDoneCond isDone; - result = ExecProject(node->ss.ps.ps_ProjInfo, &isDone); + if (node->ss.ps.ps_ProjInfo) + result = ExecProject(node->ss.ps.ps_ProjInfo, &isDone); + else /* Assign outertuple for identity projection */ + result = econtext->ecxt_outertuple; if (isDone != ExprEndResult) { @@ -173,7 +176,10 @@ ExecGroup(GroupState *node) TupleTableSlot *result; ExprDoneCond isDone; - result = ExecProject(node->ss.ps.ps_ProjInfo, &isDone); + if (node->ss.ps.ps_ProjInfo) + result = ExecProject(node->ss.ps.ps_ProjInfo, &isDone); + else /* Assign outertuple for identity projection */ + result = econtext->ecxt_outertuple; if (isDone != ExprEndResult) { @@ -244,7 +250,10 @@ ExecInitGroup(Group *node, EState *estate, int eflags) * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&grpstate->ss.ps); - ExecAssignProjectionInfo(&grpstate->ss.ps, NULL); + if (node->plan.tlist_lower_congruent) + grpstate->ss.ps.ps_ProjInfo = NULL; + else + ExecAssignProjectionInfo(&grpstate->ss.ps, NULL); grpstate->ss.ps.ps_TupFromTlist = false; diff --git a/src/backend/executor/nodeResult.c b/src/backend/executor/nodeResult.c index b51efd8..4a129d6 100644 --- a/src/backend/executor/nodeResult.c +++ b/src/backend/executor/nodeResult.c @@ -152,7 +152,10 @@ ExecResult(ResultState *node) * the projection produces an empty set, in which case we must loop * back to see if there are more outerPlan tuples. */ - resultSlot = ExecProject(node->ps.ps_ProjInfo, &isDone); + if (node->ps.ps_ProjInfo) + resultSlot = ExecProject(node->ps.ps_ProjInfo, &isDone); + else /* Assign outertuple for identity projection */ + resultSlot = econtext->ecxt_outertuple; if (isDone != ExprEndResult) { @@ -261,7 +264,10 @@ ExecInitResult(Result *node, EState *estate, int eflags) * initialize tuple type and projection info */ ExecAssignResultTypeFromTL(&resstate->ps); - ExecAssignProjectionInfo(&resstate->ps, NULL); + if (node->plan.tlist_lower_congruent) + resstate->ps.ps_ProjInfo = NULL; + else + ExecAssignProjectionInfo(&resstate->ps, NULL); return resstate; } diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index ade9b57..d34c45a 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -1373,7 +1373,10 @@ restart: * evaluated with respect to that row. */ econtext->ecxt_outertuple = winstate->ss.ss_ScanTupleSlot; - result = ExecProject(winstate->ss.ps.ps_ProjInfo, &isDone); + if (winstate->ss.ps.ps_ProjInfo) + result = ExecProject(winstate->ss.ps.ps_ProjInfo, &isDone); + else /* Assign outertuple for identity projection */ + result = econtext->ecxt_outertuple; if (isDone == ExprEndResult) { @@ -1490,7 +1493,10 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&winstate->ss.ps); - ExecAssignProjectionInfo(&winstate->ss.ps, NULL); + if (node->plan.tlist_lower_congruent) + winstate->ss.ps.ps_ProjInfo = NULL; + else + ExecAssignProjectionInfo(&winstate->ss.ps, NULL); winstate->ss.ps.ps_TupFromTlist = false; diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index ccd69fc..39d54ce 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -1184,6 +1184,7 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset) indexed_tlist *subplan_itlist; List *output_targetlist; ListCell *l; + int nmatch = 0; subplan_itlist = build_tlist_index(subplan->targetlist); @@ -1214,12 +1215,25 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset) subplan_itlist, OUTER_VAR, rtoffset); + + if (IsA(newexpr, Var) && ((Var*)newexpr)->varattno == nmatch + 1) + nmatch++; + tle = flatCopyTargetEntry(tle); tle->expr = (Expr *) newexpr; output_targetlist = lappend(output_targetlist, tle); } - plan->targetlist = output_targetlist; + /* + * Directly refer to the lower tuple slot on projection if the all elements + * in target list exactly correspond to the ones in the lower tlist. + */ + plan->tlist_lower_congruent = + (nmatch == list_length(plan->targetlist) && + nmatch == list_length(subplan->targetlist)); + + plan->targetlist = output_targetlist; + plan->qual = (List *) fix_upper_expr(root, (Node *) plan->qual, diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index fb9a863..9e7729c 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -106,6 +106,7 @@ typedef struct Plan * Common structural data for all Plan types. */ List *targetlist; /* target list to be computed at this node */ + bool tlist_lower_congruent; /* target list is lower-congruent */ List *qual; /* implicitly-ANDed qual conditions */ struct Plan *lefttree; /* input plan tree(s) */ struct Plan *righttree;
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers