From d1db7218213503a91ccb4b56034bae1c973a9a2e Mon Sep 17 00:00:00 2001
From: Ilya Gladyshev <i.gladyshev@posgrespro.ru>
Date: Thu, 5 Aug 2021 00:13:22 +0300
Subject: [PATCH] Partition key update in foreign table

Implements update of partition key for rows in foreign tables via foreign
delete + insert. This requires additional FDW apis: BeginForeignDelete
and EndForeignDelete, mainly because BeginForeignModify is, in any case,
called in the beginning of foreign update and we only know that we will
need separate remote delete+insert at ExecRun after checking partition
constraints. So postgresBeginForeignDelete keeps a separate state in
aux_delete_fmstate not to conflict with BeginForeignModify.
---
 .../postgres_fdw/expected/postgres_fdw.out    |  69 ++++++----
 contrib/postgres_fdw/postgres_fdw.c           | 125 +++++++++++++++++-
 contrib/postgres_fdw/sql/postgres_fdw.sql     |  13 +-
 src/backend/executor/execMain.c               |   1 +
 src/backend/executor/nodeModifyTable.c        |  29 +++-
 src/include/foreign/fdwapi.h                  |   8 ++
 src/include/nodes/execnodes.h                 |   2 +
 7 files changed, 212 insertions(+), 35 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index e3ee30f1aaf..d591f1a28aa 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8122,35 +8122,44 @@ select tableoid::regclass, * FROM locp;
  locp     | 2 | qux
 (1 row)
 
--- It's not allowed to move a row from a partition that is foreign to another
-update utrtest set a = 2 where b = 'foo' returning *;
-ERROR:  new row for relation "loct" violates check constraint "loct_a_check"
-DETAIL:  Failing row contains (2, foo).
-CONTEXT:  remote SQL command: UPDATE public.loct SET a = 2 WHERE ((b = 'foo'::text)) RETURNING a, b
--- But the reverse is allowed
-update utrtest set a = 1 where b = 'qux' returning *;
+-- Moving a row from a foreign partition to local with partition key as qual
+update utrtest set a = 1 where a = 2 returning *;
+ a |  b  
+---+-----
+ 1 | qux
+(1 row)
+
+-- From local to foreign with partition key as qual
+update utrtest set a = 2 where a = 1 and b = 'foo' returning *;
+ a |  b  
+---+-----
+ 2 | foo
+(1 row)
+
+-- Doesn't work without partition key in qual
+update utrtest set a = 1 where b = 'foo' returning *;
 ERROR:  cannot route tuples into foreign table to be updated "remp"
 select tableoid::regclass, * FROM utrtest;
  tableoid | a |  b  
 ----------+---+-----
- remp     | 1 | foo
- locp     | 2 | qux
+ remp     | 1 | qux
+ locp     | 2 | foo
 (2 rows)
 
 select tableoid::regclass, * FROM remp;
  tableoid | a |  b  
 ----------+---+-----
- remp     | 1 | foo
+ remp     | 1 | qux
 (1 row)
 
 select tableoid::regclass, * FROM locp;
  tableoid | a |  b  
 ----------+---+-----
- locp     | 2 | qux
+ locp     | 2 | foo
 (1 row)
 
 -- The executor should not let unexercised FDWs shut down
-update utrtest set a = 1 where b = 'foo';
+update utrtest set a = 1 where b = 'qux';
 -- Test that remote triggers work with update tuple routing
 create trigger loct_br_insert_trigger before insert on loct
 	for each row execute procedure br_insert_trigfunc();
@@ -8159,19 +8168,21 @@ insert into utrtest values (2, 'qux');
 -- Check case where the foreign partition is a subplan target rel
 explain (verbose, costs off)
 update utrtest set a = 1 where a = 1 or a = 2 returning *;
-                                             QUERY PLAN                                             
-----------------------------------------------------------------------------------------------------
+                                              QUERY PLAN                                              
+------------------------------------------------------------------------------------------------------
  Update on public.utrtest
    Output: utrtest_1.a, utrtest_1.b
    Foreign Update on public.remp utrtest_1
+     Remote SQL: UPDATE public.loct SET a = $2 WHERE ctid = $1 RETURNING a, b
    Update on public.locp utrtest_2
    ->  Append
-         ->  Foreign Update on public.remp utrtest_1
-               Remote SQL: UPDATE public.loct SET a = 1 WHERE (((a = 1) OR (a = 2))) RETURNING a, b
+         ->  Foreign Scan on public.remp utrtest_1
+               Output: 1, utrtest_1.tableoid, utrtest_1.ctid, utrtest_1.*
+               Remote SQL: SELECT a, b, ctid FROM public.loct WHERE (((a = 1) OR (a = 2))) FOR UPDATE
          ->  Seq Scan on public.locp utrtest_2
                Output: 1, utrtest_2.tableoid, utrtest_2.ctid, NULL::record
                Filter: ((utrtest_2.a = 1) OR (utrtest_2.a = 2))
-(10 rows)
+(12 rows)
 
 -- The new values are concatenated with ' triggered !'
 update utrtest set a = 1 where a = 1 or a = 2 returning *;
@@ -8208,18 +8219,20 @@ insert into utrtest values (2, 'qux');
 -- with a direct modification plan
 explain (verbose, costs off)
 update utrtest set a = 1 returning *;
-                                QUERY PLAN                                 
----------------------------------------------------------------------------
+                                  QUERY PLAN                                  
+------------------------------------------------------------------------------
  Update on public.utrtest
    Output: utrtest_1.a, utrtest_1.b
    Foreign Update on public.remp utrtest_1
+     Remote SQL: UPDATE public.loct SET a = $2 WHERE ctid = $1 RETURNING a, b
    Update on public.locp utrtest_2
    ->  Append
-         ->  Foreign Update on public.remp utrtest_1
-               Remote SQL: UPDATE public.loct SET a = 1 RETURNING a, b
+         ->  Foreign Scan on public.remp utrtest_1
+               Output: 1, utrtest_1.tableoid, utrtest_1.ctid, utrtest_1.*
+               Remote SQL: SELECT a, b, ctid FROM public.loct FOR UPDATE
          ->  Seq Scan on public.locp utrtest_2
                Output: 1, utrtest_2.tableoid, utrtest_2.ctid, NULL::record
-(9 rows)
+(11 rows)
 
 update utrtest set a = 1 returning *;
 ERROR:  cannot route tuples into foreign table to be updated "remp"
@@ -8268,18 +8281,20 @@ insert into utrtest values (3, 'xyzzy');
 -- with a direct modification plan
 explain (verbose, costs off)
 update utrtest set a = 3 returning *;
-                                QUERY PLAN                                 
----------------------------------------------------------------------------
+                                  QUERY PLAN                                  
+------------------------------------------------------------------------------
  Update on public.utrtest
    Output: utrtest_1.a, utrtest_1.b
    Update on public.locp utrtest_1
    Foreign Update on public.remp utrtest_2
+     Remote SQL: UPDATE public.loct SET a = $2 WHERE ctid = $1 RETURNING a, b
    ->  Append
          ->  Seq Scan on public.locp utrtest_1
                Output: 3, utrtest_1.tableoid, utrtest_1.ctid, NULL::record
-         ->  Foreign Update on public.remp utrtest_2
-               Remote SQL: UPDATE public.loct SET a = 3 RETURNING a, b
-(9 rows)
+         ->  Foreign Scan on public.remp utrtest_2
+               Output: 3, utrtest_2.tableoid, utrtest_2.ctid, utrtest_2.*
+               Remote SQL: SELECT a, b, ctid FROM public.loct FOR UPDATE
+(11 rows)
 
 update utrtest set a = 3 returning *; -- ERROR
 ERROR:  cannot route tuples into foreign table to be updated "remp"
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 9d443baf02a..bb945098ee4 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -209,6 +209,12 @@ typedef struct PgFdwModifyState
 	/* for update row movement if subplan result rel */
 	struct PgFdwModifyState *aux_fmstate;	/* foreign-insert state, if
 											 * created */
+	struct PgFdwModifyState *aux_delete_fmstate;	/* foreign delete state.
+													 * normally used when
+													 * tuple routing with
+													 * foreign delete takes
+													 * place and fmstate is
+													 * already taken. */
 } PgFdwModifyState;
 
 /*
@@ -375,6 +381,10 @@ static void postgresBeginForeignInsert(ModifyTableState *mtstate,
 									   ResultRelInfo *resultRelInfo);
 static void postgresEndForeignInsert(EState *estate,
 									 ResultRelInfo *resultRelInfo);
+static void postgresBeginForeignDelete(ModifyTableState *mtstate,
+									   ResultRelInfo *resultRelInfo);
+static void postgresEndForeignDelete(EState *estate,
+									 ResultRelInfo *resultRelInfo);
 static int	postgresIsForeignRelUpdatable(Relation rel);
 static bool postgresPlanDirectModify(PlannerInfo *root,
 									 ModifyTable *plan,
@@ -571,6 +581,8 @@ postgres_fdw_handler(PG_FUNCTION_ARGS)
 	routine->EndForeignModify = postgresEndForeignModify;
 	routine->BeginForeignInsert = postgresBeginForeignInsert;
 	routine->EndForeignInsert = postgresEndForeignInsert;
+	routine->BeginForeignDelete = postgresBeginForeignDelete;
+	routine->EndForeignDelete = postgresEndForeignDelete;
 	routine->IsForeignRelUpdatable = postgresIsForeignRelUpdatable;
 	routine->PlanDirectModify = postgresPlanDirectModify;
 	routine->BeginDirectModify = postgresBeginDirectModify;
@@ -2078,12 +2090,19 @@ postgresExecForeignDelete(EState *estate,
 						  TupleTableSlot *slot,
 						  TupleTableSlot *planSlot)
 {
+	PgFdwModifyState *fmstate = resultRelInfo->ri_FdwState;
 	TupleTableSlot **rslot;
 	int			numSlots = 1;
 
+	if (fmstate->aux_delete_fmstate)
+		resultRelInfo->ri_FdwState = fmstate->aux_delete_fmstate;
+
 	rslot = execute_foreign_modify(estate, resultRelInfo, CMD_DELETE,
 								   &slot, &planSlot, &numSlots);
 
+	if (fmstate->aux_delete_fmstate)
+		resultRelInfo->ri_FdwState = fmstate;
+
 	return rslot ? rslot[0] : NULL;
 }
 
@@ -2259,6 +2278,106 @@ postgresEndForeignInsert(EState *estate,
 	finish_foreign_modify(fmstate);
 }
 
+/*
+ * postgresBeginForeignDelete
+ *     Setup state for delete on foreign table.
+ *     Primarily needed when tuple routing from a foreign table is required.
+ *     Sets up state in aux_delete_state if necessary
+ */
+static void
+postgresBeginForeignDelete(ModifyTableState *mtstate,
+						   ResultRelInfo *resultRelInfo)
+{
+	ModifyTable *plan = castNode(ModifyTable, mtstate->ps.plan);
+	EState	   *estate = mtstate->ps.state;
+	PgFdwModifyState *fmstate;
+	StringInfoData sql;
+	Index		resultRelation;
+	Relation	rel = resultRelInfo->ri_RelationDesc;
+	RangeTblEntry *rte;
+	List	   *retrievedAttrs;
+
+	initStringInfo(&sql);
+
+	/*
+	 * If the foreign table is a partition that doesn't have a corresponding
+	 * RTE entry, we need to create a new RTE describing the foreign table for
+	 * use by deparseDeleteSql and create_foreign_modify() below, after first
+	 * copying the parent's RTE and modifying some fields to describe the
+	 * foreign partition to work on. However, if this is invoked by UPDATE,
+	 * the existing RTE may already correspond to this partition if it is one
+	 * of the UPDATE subplan target rels; in that case, we can just use the
+	 * existing RTE as-is.
+	 */
+	if (resultRelInfo->ri_RangeTableIndex == 0)
+	{
+		ResultRelInfo *rootResultRelInfo = resultRelInfo->ri_RootResultRelInfo;
+
+		rte = exec_rt_fetch(rootResultRelInfo->ri_RangeTableIndex, estate);
+		rte = copyObject(rte);
+		rte->relid = RelationGetRelid(rel);
+		rte->relkind = RELKIND_FOREIGN_TABLE;
+
+		/*
+		 * For UPDATE, we must use the RT index of the first subplan target
+		 * rel's RTE, because the core code would have built expressions for
+		 * the partition, such as RETURNING, using that RT index as varno of
+		 * Vars contained in those expressions.
+		 */
+		if (plan && plan->operation == CMD_UPDATE &&
+			rootResultRelInfo->ri_RangeTableIndex == plan->rootRelation)
+			resultRelation = mtstate->resultRelInfo[0].ri_RangeTableIndex;
+		else
+			resultRelation = rootResultRelInfo->ri_RangeTableIndex;
+	}
+	else
+	{
+		resultRelation = resultRelInfo->ri_RangeTableIndex;
+		rte = exec_rt_fetch(resultRelation, estate);
+	}
+
+	deparseDeleteSql(&sql, rte, resultRelation, rel, NIL, &retrievedAttrs);
+
+	fmstate = create_foreign_modify(mtstate->ps.state,
+									rte,
+									resultRelInfo,
+									CMD_DELETE,
+									outerPlanState(mtstate)->plan,
+									sql.data,
+									NIL,
+									0,
+									false,
+									NIL);
+
+	if (resultRelInfo->ri_FdwState)
+	{
+		Assert(plan && plan->operation == CMD_UPDATE);
+		Assert(resultRelInfo->ri_usesFdwDirectModify == false);
+		((PgFdwModifyState *) resultRelInfo->ri_FdwState)->aux_delete_fmstate = fmstate;
+	}
+	else
+		resultRelInfo->ri_FdwState = fmstate;
+}
+
+/*
+ * postgresEndForeignDelete
+ *     Tears down state for delete on foreign table
+ */
+static void
+postgresEndForeignDelete(EState *estate,
+						 ResultRelInfo *resultRelInfo)
+{
+	PgFdwModifyState *fmstate = (PgFdwModifyState *) resultRelInfo->ri_FdwState;
+
+	Assert(fmstate != NULL);
+
+	if (fmstate->aux_delete_fmstate)
+		fmstate = fmstate->aux_delete_fmstate;
+
+	/* Destroy the execution state */
+	finish_foreign_modify(fmstate);
+}
+
 /*
  * postgresIsForeignRelUpdatable
  *		Determine whether a foreign table supports INSERT, UPDATE and/or
@@ -2426,8 +2545,11 @@ postgresPlanDirectModify(PlannerInfo *root,
 
 	/*
 	 * The table modification must be an UPDATE or DELETE.
+	 * Partition column update might involve tuple routing,
+	 * which is implemented in ModifyTable, hence direct modification
+	 * should be forbidden in this case.
 	 */
-	if (operation != CMD_UPDATE && operation != CMD_DELETE)
+	if (plan->partColsUpdated || (operation != CMD_UPDATE && operation != CMD_DELETE))
 		return false;
 
 	/*
@@ -4031,6 +4153,7 @@ create_foreign_modify(EState *estate,
 
 	/* Initialize auxiliary state */
 	fmstate->aux_fmstate = NULL;
+	fmstate->aux_delete_fmstate = NULL;
 
 	return fmstate;
 }
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 30b5175da5b..04bd5f2e54c 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2177,18 +2177,21 @@ select tableoid::regclass, * FROM utrtest;
 select tableoid::regclass, * FROM remp;
 select tableoid::regclass, * FROM locp;
 
--- It's not allowed to move a row from a partition that is foreign to another
-update utrtest set a = 2 where b = 'foo' returning *;
+-- Moving a row from a foreign partition to local with partition key as qual
+update utrtest set a = 1 where a = 2 returning *;
+
+-- From local to foreign with partition key as qual
+update utrtest set a = 2 where a = 1 and b = 'foo' returning *;
 
--- But the reverse is allowed
-update utrtest set a = 1 where b = 'qux' returning *;
+-- Doesn't work without partition key in qual
+update utrtest set a = 1 where b = 'foo' returning *;
 
 select tableoid::regclass, * FROM utrtest;
 select tableoid::regclass, * FROM remp;
 select tableoid::regclass, * FROM locp;
 
 -- The executor should not let unexercised FDWs shut down
-update utrtest set a = 1 where b = 'foo';
+update utrtest set a = 1 where b = 'qux';
 
 -- Test that remote triggers work with update tuple routing
 create trigger loct_br_insert_trigger before insert on loct
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index b3ce4bae530..e8dabcff507 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -1235,6 +1235,7 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo,
 	resultRelInfo->ri_projectNewInfoValid = false;
 	resultRelInfo->ri_FdwState = NULL;
 	resultRelInfo->ri_usesFdwDirectModify = false;
+	resultRelInfo->ri_usesFdwForeignDelete = false;
 	resultRelInfo->ri_ConstraintExprs = NULL;
 	resultRelInfo->ri_GeneratedExprs = NULL;
 	resultRelInfo->ri_projectReturning = NULL;
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index d328856ae5b..b238712d318 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -1484,6 +1484,14 @@ ExecCrossPartitionUpdate(ModifyTableState *mtstate,
 		MemoryContextSwitchTo(oldcxt);
 	}
 
+	if (!resultRelInfo->ri_usesFdwDirectModify &&
+		resultRelInfo->ri_FdwRoutine != NULL &&
+		resultRelInfo->ri_FdwRoutine->BeginForeignDelete != NULL)
+	{
+		resultRelInfo->ri_usesFdwForeignDelete = true;
+		resultRelInfo->ri_FdwRoutine->BeginForeignDelete(mtstate, resultRelInfo);
+	}
+
 	/*
 	 * Row movement, part 1.  Delete the tuple, but skip RETURNING processing.
 	 * We want to return rows from INSERT.
@@ -1612,6 +1620,7 @@ ExecUpdate(ModifyTableState *mtstate,
 	TM_Result	result;
 	TM_FailureData tmfd;
 	List	   *recheckIndexes = NIL;
+	bool		partition_constraint_failed;
 
 	/*
 	 * abort the operation if not running transactions
@@ -1638,6 +1647,10 @@ ExecUpdate(ModifyTableState *mtstate,
 			return NULL;		/* "do nothing" */
 	}
 
+	ExecMaterializeSlot(slot);
+	partition_constraint_failed = resultRelationDesc->rd_rel->relispartition &&
+		!ExecPartitionCheck(resultRelInfo, slot, estate, false);
+
 	/* INSTEAD OF ROW UPDATE Triggers */
 	if (resultRelInfo->ri_TrigDesc &&
 		resultRelInfo->ri_TrigDesc->trig_update_instead_row)
@@ -1646,7 +1659,14 @@ ExecUpdate(ModifyTableState *mtstate,
 								  oldtuple, slot))
 			return NULL;		/* "do nothing" */
 	}
-	else if (resultRelInfo->ri_FdwRoutine)
+
+	/*
+	 * if no tuple routing from remote source is required or if it's not
+	 * supported by FDW
+	 */
+	else if (resultRelInfo->ri_FdwRoutine &&
+			 (!partition_constraint_failed ||
+			  resultRelInfo->ri_FdwRoutine->BeginForeignDelete == NULL))
 	{
 		/*
 		 * GENERATED expressions might reference the tableoid column, so
@@ -1683,7 +1703,6 @@ ExecUpdate(ModifyTableState *mtstate,
 	else
 	{
 		LockTupleMode lockmode;
-		bool		partition_constraint_failed;
 		bool		update_indexes;
 
 		/*
@@ -3184,6 +3203,12 @@ ExecEndModifyTable(ModifyTableState *node)
 			resultRelInfo->ri_FdwRoutine->EndForeignModify(node->ps.state,
 														   resultRelInfo);
 
+		if (resultRelInfo->ri_usesFdwForeignDelete &&
+			resultRelInfo->ri_FdwRoutine != NULL &&
+			resultRelInfo->ri_FdwRoutine->EndForeignDelete != NULL)
+			resultRelInfo->ri_FdwRoutine->EndForeignDelete(node->ps.state,
+														   resultRelInfo);
+
 		/*
 		 * Cleanup the initialized batch slots. This only matters for FDWs
 		 * with batching, but the other cases will have ri_NumSlotsInitialized
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index a801cd30576..cc498932d8e 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -113,6 +113,12 @@ typedef void (*BeginForeignInsert_function) (ModifyTableState *mtstate,
 typedef void (*EndForeignInsert_function) (EState *estate,
 										   ResultRelInfo *rinfo);
 
+typedef void (*BeginForeignDelete_function) (ModifyTableState *mtstate,
+											 ResultRelInfo *rinfo);
+
+typedef void (*EndForeignDelete_function) (EState *estate,
+										   ResultRelInfo *rinfo);
+
 typedef int (*IsForeignRelUpdatable_function) (Relation rel);
 
 typedef bool (*PlanDirectModify_function) (PlannerInfo *root,
@@ -237,6 +243,8 @@ typedef struct FdwRoutine
 	EndForeignModify_function EndForeignModify;
 	BeginForeignInsert_function BeginForeignInsert;
 	EndForeignInsert_function EndForeignInsert;
+	BeginForeignDelete_function BeginForeignDelete;
+	EndForeignDelete_function EndForeignDelete;
 	IsForeignRelUpdatable_function IsForeignRelUpdatable;
 	PlanDirectModify_function PlanDirectModify;
 	BeginDirectModify_function BeginDirectModify;
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 37cb4f3d59a..a858d94b143 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -460,6 +460,8 @@ typedef struct ResultRelInfo
 	/* true when modifying foreign table directly */
 	bool		ri_usesFdwDirectModify;
 
+	bool		ri_usesFdwForeignDelete;
+
 	/* batch insert stuff */
 	int			ri_NumSlots;	/* number of slots in the array */
 	int			ri_NumSlotsInitialized; /* number of initialized slots */
-- 
2.30.1 (Apple Git-130)

