On 2018/06/13 23:39, Tom Lane wrote:
> Robert Haas <[email protected]> writes:
>> Seems reasonable. Really, I think we should look for a way to hang
>> onto the relation at the point where it's originally opened and locked
>> instead of reopening it here. But that's probably more invasive than
>> we can really justify right at the moment, and I think this is a step
>> in a good direction.
>
> The existing coding there makes me itch a bit, because there's only a
> rather fragile line of reasoning justifying the assumption that there is a
> pre-existing lock at all. So I'd be in favor of what you suggest just to
> get rid of the "open(NoLock)" hazard. But I agree that it'd be rather
> invasive and right now is probably not the time for it.
I had sent a patch to try to get rid of the open(NoLock) there a couple of
months ago [1]. The idea was to both lock and open the relation in
ExecNonLeafAppendTables, which is the first time all partitioned tables in
a given Append node are locked for execution. Also, the patch makes it a
responsibility of ExecEndAppend to release the relcache pins, so the
recently added ExecDestroyPartitionPruneState would not be needed.
Attached is a rebased version of that patch if there is interest in it.
Thanks,
Amit
[1]
https://www.postgresql.org/message-id/0b361a22-f995-e15c-a385-6d1b72dd0d13%40lab.ntt.co.jp
diff --git a/src/backend/executor/execPartition.c
b/src/backend/executor/execPartition.c
index 7a4665cc4e..ea6b05934b 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -1401,7 +1401,8 @@ adjust_partition_tlist(List *tlist, TupleConversionMap
*map)
* in each PartitionPruneInfo.
*/
PartitionPruneState *
-ExecCreatePartitionPruneState(PlanState *planstate, List *partitionpruneinfo)
+ExecCreatePartitionPruneState(PlanState *planstate, List *partitionpruneinfo,
+ Relation
*partitioned_rels)
{
PartitionPruneState *prunestate;
PartitionPruningData *prunedata;
@@ -1440,6 +1441,7 @@ ExecCreatePartitionPruneState(PlanState *planstate, List
*partitionpruneinfo)
PartitionPruneInfo *pinfo = castNode(PartitionPruneInfo,
lfirst(lc));
PartitionPruningData *pprune = &prunedata[i];
PartitionPruneContext *context = &pprune->context;
+ Relation rel;
PartitionDesc partdesc;
PartitionKey partkey;
int partnatts;
@@ -1460,17 +1462,11 @@ ExecCreatePartitionPruneState(PlanState *planstate,
List *partitionpruneinfo)
/* present_parts is also subject to later modification */
pprune->present_parts = bms_copy(pinfo->present_parts);
- /*
- * We need to hold a pin on the partitioned table's relcache
entry so
- * that we can rely on its copies of the table's partition key
and
- * partition descriptor. We need not get a lock though; one
should
- * have been acquired already by InitPlan or
- * ExecLockNonLeafAppendTables.
- */
- context->partrel = relation_open(pinfo->reloid, NoLock);
-
- partkey = RelationGetPartitionKey(context->partrel);
- partdesc = RelationGetPartitionDesc(context->partrel);
+ Assert(partitioned_rels[i] != NULL);
+ rel = partitioned_rels[i];
+ Assert(RelationGetRelid(rel) == pinfo->reloid);
+ partkey = RelationGetPartitionKey(rel);
+ partdesc = RelationGetPartitionDesc(rel);
n_steps = list_length(pinfo->pruning_steps);
context->strategy = partkey->strategy;
@@ -1546,26 +1542,6 @@ ExecCreatePartitionPruneState(PlanState *planstate, List
*partitionpruneinfo)
}
/*
- * ExecDestroyPartitionPruneState
- * Release resources at plan shutdown.
- *
- * We don't bother to free any memory here, since the whole executor context
- * will be going away shortly. We do need to release our relcache pins.
- */
-void
-ExecDestroyPartitionPruneState(PartitionPruneState *prunestate)
-{
- int i;
-
- for (i = 0; i < prunestate->num_partprunedata; i++)
- {
- PartitionPruningData *pprune = &prunestate->partprunedata[i];
-
- relation_close(pprune->context.partrel, NoLock);
- }
-}
-
-/*
* ExecFindInitialMatchingSubPlans
* Identify the set of subplans that cannot be eliminated by
initial
* pruning (disregarding any pruning constraints involving
PARAM_EXEC
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index b963cae730..87809054ed 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -858,22 +858,52 @@ ShutdownExprContext(ExprContext *econtext, bool isCommit)
/*
* ExecLockNonLeafAppendTables
*
- * Locks, if necessary, the tables indicated by the RT indexes contained in
- * the partitioned_rels list. These are the non-leaf tables in the partition
- * tree controlled by a given Append or MergeAppend node.
+ * Opens and/or locks, if necessary, the tables indicated by the RT indexes
+ * contained in the partitioned_rels list. These are the non-leaf tables in
+ * the partition tree controlled by a given Append or MergeAppend node.
*/
void
-ExecLockNonLeafAppendTables(List *partitioned_rels, EState *estate)
+ExecLockNonLeafAppendTables(PlanState *planstate,
+ EState *estate,
+ List *partitioned_rels)
{
PlannedStmt *stmt = estate->es_plannedstmt;
ListCell *lc;
+ int i;
+ if (partitioned_rels == NIL)
+ return;
+
+ /*
+ * If we're going to be performing pruning, allocate space for Relation
+ * pointers to be used later when setting up partition pruning state in
+ * ExecCreatePartitionPruneState.
+ */
+ if (IsA(planstate, AppendState))
+ {
+ AppendState *appendstate = (AppendState *) planstate;
+ Append *node = (Append *) planstate->plan;
+
+ if (node->part_prune_infos != NIL)
+ {
+ Assert(list_length(node->part_prune_infos) ==
+ list_length(partitioned_rels));
+ appendstate->partitioned_rels = (Relation *)
+
palloc(sizeof(Relation) *
+
list_length(node->part_prune_infos));
+ appendstate->num_partitioned_rels =
+
list_length(node->part_prune_infos);
+ }
+ }
+
+ i = 0;
foreach(lc, partitioned_rels)
{
ListCell *l;
Index rti = lfirst_int(lc);
bool is_result_rel = false;
Oid relid = getrelid(rti,
estate->es_range_table);
+ int lockmode;
/* If this is a result relation, already locked in InitPlan */
foreach(l, stmt->nonleafResultRelations)
@@ -903,9 +933,39 @@ ExecLockNonLeafAppendTables(List *partitioned_rels, EState
*estate)
}
if (rc && RowMarkRequiresRowShareLock(rc->markType))
- LockRelationOid(relid, RowShareLock);
+ lockmode = RowShareLock;
else
- LockRelationOid(relid, AccessShareLock);
+ lockmode = AccessShareLock;
+ switch (nodeTag(planstate))
+ {
+ /*
+ * We need to also hold a pin on the
partitioned table's
+ * relcache entry so that we can rely on its
copies of the
+ * table's partition key and partition
descriptor, which
+ * are used when setting up partition pruning
state.
+ */
+ case T_AppendState:
+ {
+ AppendState *appendstate =
(AppendState *) planstate;
+
+ if
(appendstate->partitioned_rels)
+
appendstate->partitioned_rels[i] =
+
heap_open(relid, lockmode);
+ else
+ LockRelationOid(relid,
lockmode);
+ i++;
+ }
+
+ /* Just lock here; there is no pruning support.
*/
+ case T_MergeAppendState:
+ LockRelationOid(relid, lockmode);
+ break;
+
+ default:
+ elog(ERROR, "invalid PlanState node:
%d",
+ nodeTag(planstate));
+ break;
+ }
}
}
}
diff --git a/src/backend/executor/nodeAppend.c
b/src/backend/executor/nodeAppend.c
index 5ce4fb43e1..5ce4601a35 100644
--- a/src/backend/executor/nodeAppend.c
+++ b/src/backend/executor/nodeAppend.c
@@ -113,18 +113,19 @@ ExecInitAppend(Append *node, EState *estate, int eflags)
Assert(!(eflags & EXEC_FLAG_MARK));
/*
- * Lock the non-leaf tables in the partition tree controlled by this
node.
- * It's a no-op for non-partitioned parent tables.
- */
- ExecLockNonLeafAppendTables(node->partitioned_rels, estate);
-
- /*
* create new AppendState for our append node
*/
appendstate->ps.plan = (Plan *) node;
appendstate->ps.state = estate;
appendstate->ps.ExecProcNode = ExecAppend;
+ /*
+ * Lock the non-leaf tables in the partition tree controlled by this
node.
+ * It's a no-op for non-partitioned parent tables.
+ */
+ ExecLockNonLeafAppendTables((PlanState *) appendstate, estate,
+
node->partitioned_rels);
+
/* Let choose_next_subplan_* function handle setting the first subplan
*/
appendstate->as_whichplan = INVALID_SUBPLAN_INDEX;
@@ -137,8 +138,12 @@ ExecInitAppend(Append *node, EState *estate, int eflags)
ExecAssignExprContext(estate, &appendstate->ps);
/* Create the working data structure for pruning. */
+
+ /* ExecLockNonLeafAppendTables must have set this up. */
+ Assert(appendstate->partitioned_rels != NULL);
prunestate = ExecCreatePartitionPruneState(&appendstate->ps,
-
node->part_prune_infos);
+
node->part_prune_infos,
+
appendstate->partitioned_rels);
appendstate->as_prune_state = prunestate;
/* Perform an initial partition prune, if required. */
@@ -318,6 +323,7 @@ ExecEndAppend(AppendState *node)
PlanState **appendplans;
int nplans;
int i;
+ int num_partitioned_rels;
/*
* get information from the node
@@ -326,16 +332,19 @@ ExecEndAppend(AppendState *node)
nplans = node->as_nplans;
/*
+ * Close partitioned rels that we may have opened for partition
+ * pruning.
+ */
+ num_partitioned_rels = node->num_partitioned_rels;
+ Assert(node->partitioned_rels != NULL || num_partitioned_rels == 0);
+ for (i = 0; i < num_partitioned_rels; i++)
+ heap_close(node->partitioned_rels[i], NoLock);
+
+ /*
* shut down each of the subscans
*/
for (i = 0; i < nplans; i++)
ExecEndNode(appendplans[i]);
-
- /*
- * release any resources associated with run-time pruning
- */
- if (node->as_prune_state)
- ExecDestroyPartitionPruneState(node->as_prune_state);
}
void
diff --git a/src/backend/executor/nodeMergeAppend.c
b/src/backend/executor/nodeMergeAppend.c
index 118f4ef07d..6f28002ce2 100644
--- a/src/backend/executor/nodeMergeAppend.c
+++ b/src/backend/executor/nodeMergeAppend.c
@@ -76,7 +76,8 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int
eflags)
* Lock the non-leaf tables in the partition tree controlled by this
node.
* It's a no-op for non-partitioned parent tables.
*/
- ExecLockNonLeafAppendTables(node->partitioned_rels, estate);
+ ExecLockNonLeafAppendTables((PlanState *) mergestate, estate,
+
node->partitioned_rels);
/*
* Set up empty vector of subplan states
diff --git a/src/include/executor/execPartition.h
b/src/include/executor/execPartition.h
index 862bf65060..33c3f73e78 100644
--- a/src/include/executor/execPartition.h
+++ b/src/include/executor/execPartition.h
@@ -209,8 +209,8 @@ extern HeapTuple
ConvertPartitionTupleSlot(TupleConversionMap *map,
extern void ExecCleanupTupleRouting(ModifyTableState *mtstate,
PartitionTupleRouting *proute);
extern PartitionPruneState *ExecCreatePartitionPruneState(PlanState *planstate,
- List
*partitionpruneinfo);
-extern void ExecDestroyPartitionPruneState(PartitionPruneState *prunestate);
+ List
*partitionpruneinfo,
+ Relation
*partitioned_rels);
extern Bitmapset *ExecFindMatchingSubPlans(PartitionPruneState *prunestate);
extern Bitmapset *ExecFindInitialMatchingSubPlans(PartitionPruneState
*prunestate,
int nsubplans);
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index f82b51667f..2b5eec9896 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -524,7 +524,9 @@ extern void UnregisterExprContextCallback(ExprContext
*econtext,
ExprContextCallbackFunction function,
Datum arg);
-extern void ExecLockNonLeafAppendTables(List *partitioned_rels, EState
*estate);
+extern void ExecLockNonLeafAppendTables(PlanState *planstate,
+ EState *estate,
+ List *partitioned_rels);
extern Datum GetAttributeByName(HeapTupleHeader tuple, const char *attname,
bool *isNull);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index da7f52cab0..a20d94fd9f 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1092,6 +1092,8 @@ struct AppendState
* the first partial plan */
ParallelAppendState *as_pstate; /* parallel coordination info */
Size pstate_len; /* size of parallel
coordination info */
+ Relation *partitioned_rels;
+ int num_partitioned_rels; /* number of entries in
above array */
struct PartitionPruneState *as_prune_state;
Bitmapset *as_valid_subplans;
bool (*choose_next_subplan) (AppendState *);