On Sat, 2021-06-05 at 09:47 -0700, Zhihong Yu wrote:
> On Fri, Jun 4, 2021 at 4:14 PM Jacob Champion <pchamp...@vmware.com> wrote:
> > Agreed. I'm going to double-check with Deep that the new calls
> > to table_tuple_fetch_row_version() should be projecting the full row,
> > then post an updated patch some time next week.

(The discussions over the fallout of the inheritance_planner fallout
are still going, but in the meantime here's an updated v4 that builds
and passes `make check`.)

> +       return 
> relation->rd_tableam->scan_begin_with_column_projection(relation, snapshot, 
> 0, NULL,
> +                                           parallel_scan, flags, proj);
> 
> scan_begin_with_column_projection() adds a parameter to scan_begin().
> Can scan_begin() be enhanced with this projection parameter ?
> Otherwise in the future we may have 
> scan_begin_with_column_projection_with_x_y ...

Maybe; I agree that would match the current "extension" APIs a little
better. I'll let Deep and/or Ashwin chime in on why this design was
chosen.

> +   /* Make sure the the new slot is not dependent on the original tuple */
> 
> Double 'the' in the comment. More than one place with duplicate 'the'
> in the patch.

Fixed.

> +typedef struct neededColumnContext
> +{
> +   Bitmapset **mask;
> +   int n;
> 
> Should field n be named ncol ? 'n' seems too general.

Agreed; changed to ncol.

> +        * TODO: Remove this hack!! This should be done once at the start of 
> the tid scan.
> 
> Would the above be addressed in the next patch ?

I have not had time to get to this in v4, sorry.

> Toward the end of extract_scan_columns():
> 
> +                       bms_free(rte->scanCols);
> +                       rte->scanCols = bms_make_singleton(0);
> +                       break;
> 
> Should 'goto outer;' be in place of 'break;' (since rte->scanCols has
> been assigned for whole-row) ?

Agreed and fixed. Thank you!

--Jacob
From 7ac00847f17e2a0448ae06370598db0f034fcc7c Mon Sep 17 00:00:00 2001
From: Jacob Champion <pchamp...@vmware.com>
Date: Tue, 2 Mar 2021 08:59:45 -0800
Subject: [PATCH v4] tableam: accept column projection list

TODO: check the additional bms_make_singleton(0) calls in
      src/backend/executor/nodeModifyTable.c

Co-authored-by: Soumyadeep Chakraborty <soumyadeep2...@gmail.com>
Co-authored-by: Melanie Plageman <melanieplage...@gmail.com>
Co-authored-by: Ashwin Agrawal <aagra...@pivotal.io>
Co-authored-by: Jacob Champion <pchamp...@vmware.com>
---
 src/backend/access/heap/heapam_handler.c |   5 +-
 src/backend/access/nbtree/nbtsort.c      |   3 +-
 src/backend/access/table/tableam.c       |   5 +-
 src/backend/commands/trigger.c           |  19 +++-
 src/backend/executor/execMain.c          |   3 +-
 src/backend/executor/execPartition.c     |   2 +
 src/backend/executor/execReplication.c   |   6 +-
 src/backend/executor/execScan.c          | 108 +++++++++++++++++++++
 src/backend/executor/nodeIndexscan.c     |  10 ++
 src/backend/executor/nodeLockRows.c      |   7 +-
 src/backend/executor/nodeModifyTable.c   |  47 +++++++--
 src/backend/executor/nodeSeqscan.c       |  43 ++++++++-
 src/backend/executor/nodeTidscan.c       |  11 ++-
 src/backend/nodes/copyfuncs.c            |   2 +
 src/backend/nodes/equalfuncs.c           |   2 +
 src/backend/nodes/outfuncs.c             |   2 +
 src/backend/nodes/readfuncs.c            |   2 +
 src/backend/optimizer/path/allpaths.c    |  85 +++++++++++++++-
 src/backend/optimizer/prep/preptlist.c   |  13 ++-
 src/backend/optimizer/util/inherit.c     |  37 ++++++-
 src/backend/parser/analyze.c             |  40 ++++++--
 src/backend/parser/parse_relation.c      |  18 ++++
 src/backend/partitioning/partbounds.c    |  14 ++-
 src/backend/rewrite/rewriteHandler.c     |   8 ++
 src/include/access/tableam.h             | 118 +++++++++++++++++++++--
 src/include/executor/executor.h          |   6 ++
 src/include/nodes/execnodes.h            |   1 +
 src/include/nodes/parsenodes.h           |  14 +++
 src/include/utils/rel.h                  |  14 +++
 29 files changed, 598 insertions(+), 47 deletions(-)

diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index e2cd79ec54..e440ae4a69 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -182,7 +182,8 @@ static bool
 heapam_fetch_row_version(Relation relation,
 						 ItemPointer tid,
 						 Snapshot snapshot,
-						 TupleTableSlot *slot)
+						 TupleTableSlot *slot,
+						 Bitmapset *project_cols)
 {
 	BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
 	Buffer		buffer;
@@ -350,7 +351,7 @@ static TM_Result
 heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
 				  TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
 				  LockWaitPolicy wait_policy, uint8 flags,
-				  TM_FailureData *tmfd)
+				  TM_FailureData *tmfd, Bitmapset *project_cols)
 {
 	BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
 	TM_Result	result;
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index 2c4d7f6e25..57462eb9c7 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -1966,7 +1966,8 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2,
 	indexInfo = BuildIndexInfo(btspool->index);
 	indexInfo->ii_Concurrent = btshared->isconcurrent;
 	scan = table_beginscan_parallel(btspool->heap,
-									ParallelTableScanFromBTShared(btshared));
+									ParallelTableScanFromBTShared(btshared),
+									NULL);
 	reltuples = table_index_build_scan(btspool->heap, btspool->index, indexInfo,
 									   true, progress, _bt_build_callback,
 									   (void *) &buildstate, scan);
diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c
index 5ea5bdd810..9bda57247a 100644
--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@@ -172,7 +172,7 @@ table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan,
 }
 
 TableScanDesc
-table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan)
+table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan, Bitmapset *proj)
 {
 	Snapshot	snapshot;
 	uint32		flags = SO_TYPE_SEQSCAN |
@@ -194,6 +194,9 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan)
 		snapshot = SnapshotAny;
 	}
 
+	if (proj)
+		return relation->rd_tableam->scan_begin_with_column_projection(relation, snapshot, 0, NULL,
+											parallel_scan, flags, proj);
 	return relation->rd_tableam->scan_begin(relation, snapshot, 0, NULL,
 											parallel_scan, flags);
 }
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 07c73f39de..5dd2a12cb5 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -2532,6 +2532,10 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
 		if (newtuple != trigtuple)
 			heap_freetuple(newtuple);
 	}
+
+	/* Make sure the new slot is not dependent on the original tuple */
+	ExecMaterializeSlot(slot);
+
 	if (should_free)
 		heap_freetuple(trigtuple);
 
@@ -2816,6 +2820,10 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
 			newtuple = NULL;
 		}
 	}
+
+	/* Make sure the new slot is not dependent on the original tuple */
+	ExecMaterializeSlot(newslot);
+
 	if (should_free_trig)
 		heap_freetuple(trigtuple);
 
@@ -3023,7 +3031,8 @@ GetTupleForTrigger(EState *estate,
 								estate->es_output_cid,
 								lockmode, LockWaitBlock,
 								lockflags,
-								&tmfd);
+								&tmfd,
+								bms_make_singleton(0));
 
 		switch (test)
 		{
@@ -3098,7 +3107,7 @@ GetTupleForTrigger(EState *estate,
 		 * suffices.
 		 */
 		if (!table_tuple_fetch_row_version(relation, tid, SnapshotAny,
-										   oldslot))
+										   oldslot, bms_make_singleton(0)))
 			elog(ERROR, "failed to fetch tuple for trigger");
 	}
 
@@ -3962,7 +3971,8 @@ AfterTriggerExecute(EState *estate,
 
 				if (!table_tuple_fetch_row_version(rel, &(event->ate_ctid1),
 												   SnapshotAny,
-												   LocTriggerData.tg_trigslot))
+												   LocTriggerData.tg_trigslot,
+												   bms_make_singleton(0)))
 					elog(ERROR, "failed to fetch tuple1 for AFTER trigger");
 				LocTriggerData.tg_trigtuple =
 					ExecFetchSlotHeapTuple(LocTriggerData.tg_trigslot, false, &should_free_trig);
@@ -3981,7 +3991,8 @@ AfterTriggerExecute(EState *estate,
 
 				if (!table_tuple_fetch_row_version(rel, &(event->ate_ctid2),
 												   SnapshotAny,
-												   LocTriggerData.tg_newslot))
+												   LocTriggerData.tg_newslot,
+												   bms_make_singleton(0)))
 					elog(ERROR, "failed to fetch tuple2 for AFTER trigger");
 				LocTriggerData.tg_newtuple =
 					ExecFetchSlotHeapTuple(LocTriggerData.tg_newslot, false, &should_free_new);
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index b3ce4bae53..0197fbd0da 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -2575,7 +2575,8 @@ EvalPlanQualFetchRowMark(EPQState *epqstate, Index rti, TupleTableSlot *slot)
 			/* ordinary table, fetch the tuple */
 			if (!table_tuple_fetch_row_version(erm->relation,
 											   (ItemPointer) DatumGetPointer(datum),
-											   SnapshotAny, slot))
+											   SnapshotAny, slot,
+											   bms_make_singleton(0)))
 				elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
 			return true;
 		}
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index 606c920b06..b82650ef32 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -837,6 +837,8 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
 						ExecInitQual((List *) clause, &mtstate->ps);
 				}
 			}
+
+			PopulateNeededColumnsForOnConflictUpdate(leaf_part_rri);
 		}
 	}
 
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 1e285e0349..3b4a65fa11 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -180,7 +180,8 @@ retry:
 							   lockmode,
 							   LockWaitBlock,
 							   0 /* don't follow updates */ ,
-							   &tmfd);
+							   &tmfd,
+							   bms_make_singleton(0));
 
 		PopActiveSnapshot();
 
@@ -357,7 +358,8 @@ retry:
 							   lockmode,
 							   LockWaitBlock,
 							   0 /* don't follow updates */ ,
-							   &tmfd);
+							   &tmfd,
+							   bms_make_singleton(0));
 
 		PopActiveSnapshot();
 
diff --git a/src/backend/executor/execScan.c b/src/backend/executor/execScan.c
index 69ab34573e..e25c999b4d 100644
--- a/src/backend/executor/execScan.c
+++ b/src/backend/executor/execScan.c
@@ -20,7 +20,9 @@
 
 #include "executor/executor.h"
 #include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
 #include "utils/memutils.h"
+#include "utils/rel.h"
 
 
 
@@ -340,3 +342,109 @@ ExecScanReScan(ScanState *node)
 		}
 	}
 }
+
+typedef struct neededColumnContext
+{
+	Bitmapset **mask;
+	int ncol;
+} neededColumnContext;
+
+static bool
+neededColumnContextWalker(Node *node, neededColumnContext *c)
+{
+	if (node == NULL || contains_whole_row_col(*c->mask))
+		return false;
+
+	if (IsA(node, Var))
+	{
+		Var *var = (Var *)node;
+
+		if (var->varattno > 0)
+		{
+			Assert(var->varattno <= c->ncol);
+			*(c->mask) = bms_add_member(*(c->mask), var->varattno);
+		}
+		else if(var->varattno == 0) {
+			bms_free(*(c->mask));
+			*(c->mask) = bms_make_singleton(0);
+		}
+
+		return false;
+	}
+	return expression_tree_walker(node, neededColumnContextWalker, (void * )c);
+}
+
+/*
+ * n specifies the number of allowed entries in mask: we use
+ * it for bounds-checking in the walker above.
+ */
+void
+PopulateNeededColumnsForNode(Node *expr, int ncol, Bitmapset **scanCols)
+{
+	neededColumnContext c;
+
+	c.mask = scanCols;
+	c.ncol = ncol;
+
+	neededColumnContextWalker(expr, &c);
+}
+
+Bitmapset *
+PopulateNeededColumnsForScan(ScanState *scanstate, int ncol)
+{
+	Bitmapset *result = NULL;
+	Plan	   *plan = scanstate->ps.plan;
+
+	PopulateNeededColumnsForNode((Node *) plan->targetlist, ncol, &result);
+	PopulateNeededColumnsForNode((Node *) plan->qual, ncol, &result);
+
+	if (IsA(plan, IndexScan))
+	{
+		PopulateNeededColumnsForNode((Node *) ((IndexScan *) plan)->indexqualorig, ncol, &result);
+		PopulateNeededColumnsForNode((Node *) ((IndexScan *) plan)->indexorderbyorig, ncol, &result);
+	}
+	else if (IsA(plan, BitmapHeapScan))
+		PopulateNeededColumnsForNode((Node *) ((BitmapHeapScan *) plan)->bitmapqualorig, ncol, &result);
+
+	return result;
+}
+
+Bitmapset *
+PopulateNeededColumnsForEPQ(EPQState *epqstate, int ncol)
+{
+	Bitmapset *epqCols = NULL;
+	Assert(epqstate && epqstate->plan);
+	PopulateNeededColumnsForNode((Node *) epqstate->plan->qual,
+								 ncol,
+								 &epqCols);
+	return epqCols;
+}
+
+void
+PopulateNeededColumnsForOnConflictUpdate(ResultRelInfo *resultRelInfo)
+{
+	ExprState  *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
+	ProjectionInfo *oc_ProjInfo = resultRelInfo->ri_onConflict->oc_ProjInfo;
+	Relation relation = resultRelInfo->ri_RelationDesc;
+	Bitmapset *proj_cols = NULL;
+	ListCell *lc;
+
+	if (onConflictSetWhere && onConflictSetWhere->expr)
+		PopulateNeededColumnsForNode((Node *) onConflictSetWhere->expr,
+									 RelationGetDescr(relation)->natts,
+									 &proj_cols);
+
+	if (oc_ProjInfo)
+		PopulateNeededColumnsForNode((Node *) oc_ProjInfo->pi_state.expr,
+									 RelationGetDescr(relation)->natts,
+									 &proj_cols);
+
+	foreach(lc, resultRelInfo->ri_WithCheckOptionExprs)
+	{
+		ExprState  *wcoExpr = (ExprState *) lfirst(lc);
+		PopulateNeededColumnsForNode((Node *) wcoExpr->expr,
+									 RelationGetDescr(relation)->natts,
+									 &proj_cols);
+	}
+	resultRelInfo->ri_onConflict->proj_cols = proj_cols;
+}
diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c
index 2fffb1b437..4923bb258c 100644
--- a/src/backend/executor/nodeIndexscan.c
+++ b/src/backend/executor/nodeIndexscan.c
@@ -115,6 +115,16 @@ IndexNext(IndexScanState *node)
 								   node->iss_NumScanKeys,
 								   node->iss_NumOrderByKeys);
 
+		if (table_scans_leverage_column_projection(node->ss.ss_currentRelation))
+		{
+			Bitmapset *proj = NULL;
+			Scan *planNode = (Scan *)node->ss.ps.plan;
+			int rti = planNode->scanrelid;
+			RangeTblEntry *rte = list_nth(estate->es_plannedstmt->rtable, rti - 1);
+			proj = rte->scanCols;
+			table_index_fetch_set_column_projection(scandesc->xs_heapfetch, proj);
+		}
+
 		node->iss_ScanDesc = scandesc;
 
 		/*
diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c
index b2e5c30079..bbff96a85f 100644
--- a/src/backend/executor/nodeLockRows.c
+++ b/src/backend/executor/nodeLockRows.c
@@ -80,6 +80,7 @@ lnext:
 		int			lockflags = 0;
 		TM_Result	test;
 		TupleTableSlot *markSlot;
+		Bitmapset *epqCols = NULL;
 
 		/* clear any leftover test tuple for this rel */
 		markSlot = EvalPlanQualSlot(&node->lr_epqstate, erm->relation, erm->rti);
@@ -179,11 +180,15 @@ lnext:
 		if (!IsolationUsesXactSnapshot())
 			lockflags |= TUPLE_LOCK_FLAG_FIND_LAST_VERSION;
 
+		epqCols = PopulateNeededColumnsForEPQ(&node->lr_epqstate,
+											  RelationGetDescr(erm->relation)->natts);
+
 		test = table_tuple_lock(erm->relation, &tid, estate->es_snapshot,
 								markSlot, estate->es_output_cid,
 								lockmode, erm->waitPolicy,
 								lockflags,
-								&tmfd);
+								&tmfd,
+								epqCols);
 
 		switch (test)
 		{
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 379b056310..c9fec21449 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -244,7 +244,7 @@ ExecCheckTIDVisible(EState *estate,
 	if (!IsolationUsesXactSnapshot())
 		return;
 
-	if (!table_tuple_fetch_row_version(rel, tid, SnapshotAny, tempSlot))
+	if (!table_tuple_fetch_row_version(rel, tid, SnapshotAny, tempSlot, NULL))
 		elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
 	ExecCheckTupleVisible(estate, rel, tempSlot);
 	ExecClearTuple(tempSlot);
@@ -1202,6 +1202,7 @@ ldelete:;
 				{
 					TupleTableSlot *inputslot;
 					TupleTableSlot *epqslot;
+					Bitmapset *epqCols = NULL;
 
 					if (IsolationUsesXactSnapshot())
 						ereport(ERROR,
@@ -1216,12 +1217,15 @@ ldelete:;
 					inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc,
 												 resultRelInfo->ri_RangeTableIndex);
 
+					epqCols = PopulateNeededColumnsForEPQ(epqstate,
+														  RelationGetDescr(resultRelationDesc)->natts);
+
 					result = table_tuple_lock(resultRelationDesc, tupleid,
 											  estate->es_snapshot,
 											  inputslot, estate->es_output_cid,
 											  LockTupleExclusive, LockWaitBlock,
 											  TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
-											  &tmfd);
+											  &tmfd, epqCols);
 
 					switch (result)
 					{
@@ -1374,8 +1378,23 @@ ldelete:;
 			}
 			else
 			{
+				RangeTblEntry *resultrte = exec_rt_fetch(resultRelInfo->ri_RangeTableIndex, estate);
+				Bitmapset *project_cols = resultrte->returningCols;
+				/*
+				 * XXX returningCols should never be empty if we have a RETURNING
+				 * clause. Right now, if we have a view, we fail to populate the
+				 * returningCols of its base table's RTE.
+				 * If we encounter such a situation now, for correctness, ensure
+				 * that we fetch all the columns.
+				 */
+				if(bms_is_empty(resultrte->returningCols))
+				{
+					bms_free(resultrte->returningCols);
+					project_cols = bms_make_singleton(0);
+				}
 				if (!table_tuple_fetch_row_version(resultRelationDesc, tupleid,
-												   SnapshotAny, slot))
+												   SnapshotAny, slot,
+												   project_cols))
 					elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
 			}
 		}
@@ -1523,7 +1542,8 @@ ExecCrossPartitionUpdate(ModifyTableState *mtstate,
 			if (!table_tuple_fetch_row_version(resultRelInfo->ri_RelationDesc,
 											   tupleid,
 											   SnapshotAny,
-											   oldSlot))
+											   oldSlot,
+											   bms_make_singleton(0) /* TODO */))
 				elog(ERROR, "failed to fetch tuple being updated");
 			*retry_slot = ExecGetUpdateNewTuple(resultRelInfo, epqslot,
 												oldSlot);
@@ -1822,6 +1842,7 @@ lreplace:;
 					TupleTableSlot *inputslot;
 					TupleTableSlot *epqslot;
 					TupleTableSlot *oldSlot;
+					Bitmapset *epqCols = NULL;
 
 					if (IsolationUsesXactSnapshot())
 						ereport(ERROR,
@@ -1835,12 +1856,14 @@ lreplace:;
 					inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc,
 												 resultRelInfo->ri_RangeTableIndex);
 
+					epqCols = PopulateNeededColumnsForEPQ(epqstate,
+														  RelationGetDescr(resultRelationDesc)->natts);
 					result = table_tuple_lock(resultRelationDesc, tupleid,
 											  estate->es_snapshot,
 											  inputslot, estate->es_output_cid,
 											  lockmode, LockWaitBlock,
 											  TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
-											  &tmfd);
+											  &tmfd, epqCols);
 
 					switch (result)
 					{
@@ -1864,7 +1887,8 @@ lreplace:;
 							if (!table_tuple_fetch_row_version(resultRelationDesc,
 															   tupleid,
 															   SnapshotAny,
-															   oldSlot))
+															   oldSlot,
+															   bms_make_singleton(0) /* TODO */))
 								elog(ERROR, "failed to fetch tuple being updated");
 							slot = ExecGetUpdateNewTuple(resultRelInfo,
 														 epqslot, oldSlot);
@@ -1981,6 +2005,8 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
 	Relation	relation = resultRelInfo->ri_RelationDesc;
 	ExprState  *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
 	TupleTableSlot *existing = resultRelInfo->ri_onConflict->oc_Existing;
+	ProjectionInfo *oc_ProjInfo = resultRelInfo->ri_onConflict->oc_ProjInfo;
+	Bitmapset *proj_cols = resultRelInfo->ri_onConflict->proj_cols;
 	TM_FailureData tmfd;
 	LockTupleMode lockmode;
 	TM_Result	test;
@@ -2001,7 +2027,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
 							estate->es_snapshot,
 							existing, estate->es_output_cid,
 							lockmode, LockWaitBlock, 0,
-							&tmfd);
+							&tmfd, proj_cols);
 	switch (test)
 	{
 		case TM_Ok:
@@ -2149,7 +2175,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
 	}
 
 	/* Project the new tuple version */
-	ExecProject(resultRelInfo->ri_onConflict->oc_ProjInfo);
+	ExecProject(oc_ProjInfo);
 
 	/*
 	 * Note that it is possible that the target tuple has been modified in
@@ -2571,7 +2597,8 @@ ExecModifyTable(PlanState *pstate)
 					Assert(tupleid != NULL);
 					if (!table_tuple_fetch_row_version(relation, tupleid,
 													   SnapshotAny,
-													   oldSlot))
+													   oldSlot,
+													   bms_make_singleton(0) /* TODO */))
 						elog(ERROR, "failed to fetch tuple being updated");
 				}
 				slot = ExecGetUpdateNewTuple(resultRelInfo, planSlot,
@@ -3033,6 +3060,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 									&mtstate->ps);
 			onconfl->oc_WhereClause = qualexpr;
 		}
+
+		PopulateNeededColumnsForOnConflictUpdate(resultRelInfo);
 	}
 
 	/*
diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c
index 066f9ae37e..2535d1c7f4 100644
--- a/src/backend/executor/nodeSeqscan.c
+++ b/src/backend/executor/nodeSeqscan.c
@@ -31,6 +31,7 @@
 #include "access/tableam.h"
 #include "executor/execdebug.h"
 #include "executor/nodeSeqscan.h"
+#include "nodes/nodeFuncs.h"
 #include "utils/rel.h"
 
 static TupleTableSlot *SeqNext(SeqScanState *node);
@@ -68,9 +69,22 @@ SeqNext(SeqScanState *node)
 		 * We reach here if the scan is not parallel, or if we're serially
 		 * executing a scan that was planned to be parallel.
 		 */
-		scandesc = table_beginscan(node->ss.ss_currentRelation,
-								   estate->es_snapshot,
-								   0, NULL);
+		if (table_scans_leverage_column_projection(node->ss.ss_currentRelation))
+		{
+			Scan *planNode = (Scan *)node->ss.ps.plan;
+			int rti = planNode->scanrelid;
+			RangeTblEntry *rte = list_nth(estate->es_plannedstmt->rtable, rti - 1);
+			scandesc = table_beginscan_with_column_projection(node->ss.ss_currentRelation,
+															  estate->es_snapshot,
+															  0, NULL,
+															  rte->scanCols);
+		}
+		else
+		{
+			scandesc = table_beginscan(node->ss.ss_currentRelation,
+									   estate->es_snapshot,
+									   0, NULL);
+		}
 		node->ss.ss_currentScanDesc = scandesc;
 	}
 
@@ -270,14 +284,22 @@ ExecSeqScanInitializeDSM(SeqScanState *node,
 {
 	EState	   *estate = node->ss.ps.state;
 	ParallelTableScanDesc pscan;
+	Bitmapset *proj = NULL;
 
 	pscan = shm_toc_allocate(pcxt->toc, node->pscan_len);
+
+	if (table_scans_leverage_column_projection(node->ss.ss_currentRelation))
+	{
+		proj = PopulateNeededColumnsForScan(&node->ss,
+											node->ss.ss_currentRelation->rd_att->natts);
+	}
+
 	table_parallelscan_initialize(node->ss.ss_currentRelation,
 								  pscan,
 								  estate->es_snapshot);
 	shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan);
 	node->ss.ss_currentScanDesc =
-		table_beginscan_parallel(node->ss.ss_currentRelation, pscan);
+		table_beginscan_parallel(node->ss.ss_currentRelation, pscan, proj);
 }
 
 /* ----------------------------------------------------------------
@@ -307,8 +329,19 @@ ExecSeqScanInitializeWorker(SeqScanState *node,
 							ParallelWorkerContext *pwcxt)
 {
 	ParallelTableScanDesc pscan;
+	Bitmapset *proj = NULL;
+
+	/*
+	 * FIXME: this is duplicate work with ExecSeqScanInitializeDSM. In future
+	 * plan will have the we have projection list, then this overhead will not exist.
+	 */
+	if (table_scans_leverage_column_projection(node->ss.ss_currentRelation))
+	{
+		proj = PopulateNeededColumnsForScan(&node->ss,
+											node->ss.ss_currentRelation->rd_att->natts);
+	}
 
 	pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
 	node->ss.ss_currentScanDesc =
-		table_beginscan_parallel(node->ss.ss_currentRelation, pscan);
+		table_beginscan_parallel(node->ss.ss_currentRelation, pscan, proj);
 }
diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c
index 48c3737da2..ffb00c5f03 100644
--- a/src/backend/executor/nodeTidscan.c
+++ b/src/backend/executor/nodeTidscan.c
@@ -366,6 +366,7 @@ TidNext(TidScanState *node)
 	while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids)
 	{
 		ItemPointerData tid = tidList[node->tss_TidPtr];
+		Bitmapset *project_cols = NULL;
 
 		/*
 		 * For WHERE CURRENT OF, the tuple retrieved from the cursor might
@@ -375,7 +376,15 @@ TidNext(TidScanState *node)
 		if (node->tss_isCurrentOf)
 			table_tuple_get_latest_tid(scan, &tid);
 
-		if (table_tuple_fetch_row_version(heapRelation, &tid, snapshot, slot))
+		/*
+		 * TODO: Remove this hack!! This should be done once at the start of the tid scan.
+		 * Ideally we should probably set the list of projection cols in the
+		 * generic scan desc, perhaps in TableScanDesc.
+		 */
+		project_cols = PopulateNeededColumnsForScan((ScanState *) node,
+													 RelationGetDescr(heapRelation)->natts);
+
+		if (table_tuple_fetch_row_version(heapRelation, &tid, snapshot, slot, project_cols))
 			return slot;
 
 		/* Bad TID or failed snapshot qual; try next */
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 90770a89b0..7dfdce02bf 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -2495,8 +2495,10 @@ _copyRangeTblEntry(const RangeTblEntry *from)
 	COPY_SCALAR_FIELD(checkAsUser);
 	COPY_BITMAPSET_FIELD(selectedCols);
 	COPY_BITMAPSET_FIELD(insertedCols);
+	COPY_BITMAPSET_FIELD(returningCols);
 	COPY_BITMAPSET_FIELD(updatedCols);
 	COPY_BITMAPSET_FIELD(extraUpdatedCols);
+	COPY_BITMAPSET_FIELD(scanCols);
 	COPY_NODE_FIELD(securityQuals);
 
 	return newnode;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index ce76d093dd..dedfbd9a9b 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -2750,8 +2750,10 @@ _equalRangeTblEntry(const RangeTblEntry *a, const RangeTblEntry *b)
 	COMPARE_SCALAR_FIELD(checkAsUser);
 	COMPARE_BITMAPSET_FIELD(selectedCols);
 	COMPARE_BITMAPSET_FIELD(insertedCols);
+	COMPARE_BITMAPSET_FIELD(returningCols);
 	COMPARE_BITMAPSET_FIELD(updatedCols);
 	COMPARE_BITMAPSET_FIELD(extraUpdatedCols);
+	COMPARE_BITMAPSET_FIELD(scanCols);
 	COMPARE_NODE_FIELD(securityQuals);
 
 	return true;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 8da8b14f0e..2928f6e5d0 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -3296,8 +3296,10 @@ _outRangeTblEntry(StringInfo str, const RangeTblEntry *node)
 	WRITE_OID_FIELD(checkAsUser);
 	WRITE_BITMAPSET_FIELD(selectedCols);
 	WRITE_BITMAPSET_FIELD(insertedCols);
+	WRITE_BITMAPSET_FIELD(returningCols);
 	WRITE_BITMAPSET_FIELD(updatedCols);
 	WRITE_BITMAPSET_FIELD(extraUpdatedCols);
+	WRITE_BITMAPSET_FIELD(scanCols);
 	WRITE_NODE_FIELD(securityQuals);
 }
 
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 3772ea07df..e7b8625713 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -1508,8 +1508,10 @@ _readRangeTblEntry(void)
 	READ_OID_FIELD(checkAsUser);
 	READ_BITMAPSET_FIELD(selectedCols);
 	READ_BITMAPSET_FIELD(insertedCols);
+	READ_BITMAPSET_FIELD(returningCols);
 	READ_BITMAPSET_FIELD(updatedCols);
 	READ_BITMAPSET_FIELD(extraUpdatedCols);
+	READ_BITMAPSET_FIELD(scanCols);
 	READ_NODE_FIELD(securityQuals);
 
 	READ_DONE();
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 353454b183..caf113fda8 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -27,6 +27,7 @@
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
+#include "utils/rel.h"
 #ifdef OPTIMIZER_DEBUG
 #include "nodes/print.h"
 #endif
@@ -142,7 +143,7 @@ static void subquery_push_qual(Query *subquery,
 static void recurse_push_qual(Node *setOp, Query *topquery,
 							  RangeTblEntry *rte, Index rti, Node *qual);
 static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel);
-
+static void extract_scan_columns(PlannerInfo *root);
 
 /*
  * make_one_rel
@@ -185,6 +186,8 @@ make_one_rel(PlannerInfo *root, List *joinlist)
 	 */
 	set_base_rel_sizes(root);
 
+	extract_scan_columns(root);
+
 	/*
 	 * We should now have size estimates for every actual table involved in
 	 * the query, and we also know which if any have been deleted from the
@@ -235,6 +238,86 @@ make_one_rel(PlannerInfo *root, List *joinlist)
 	return rel;
 }
 
+static void
+extract_scan_columns(PlannerInfo *root)
+{
+	for (int i = 1; i < root->simple_rel_array_size; i++)
+	{
+		ListCell *lc;
+		RangeTblEntry *rte = root->simple_rte_array[i];
+		RelOptInfo    *rel = root->simple_rel_array[i];
+		if (rte == NULL)
+			continue;
+		if (rel == NULL)
+			continue;
+		if (IS_DUMMY_REL(rel))
+			continue;
+		rte->scanCols = NULL;
+		foreach(lc, rel->reltarget->exprs)
+		{
+			Node *node;
+			List *vars;
+			ListCell *lc1;
+			node = lfirst(lc);
+			/*
+			 * TODO: suggest a default for vars_only to make maintenance less burdensome
+			 */
+			vars = pull_var_clause(node,
+								   PVC_RECURSE_AGGREGATES |
+									   PVC_RECURSE_WINDOWFUNCS |
+									   PVC_RECURSE_PLACEHOLDERS);
+			foreach(lc1, vars)
+			{
+				Var *var = lfirst(lc1);
+				if (var->varno == i)
+				{
+					if (var->varattno > 0)
+						rte->scanCols = bms_add_member(rte->scanCols, var->varattno);
+					else if (var->varattno == 0)
+					{
+						/*
+						 * If there is a whole-row var, we have to fetch the whole row.
+						 */
+						bms_free(rte->scanCols);
+						rte->scanCols = bms_make_singleton(0);
+						goto outer;
+					}
+				}
+			}
+		}
+		foreach(lc, rel->baserestrictinfo)
+		{
+			RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+			List *vars = pull_var_clause((Node *)rinfo->clause,
+										 PVC_RECURSE_AGGREGATES |
+											 PVC_RECURSE_WINDOWFUNCS |
+											 PVC_RECURSE_PLACEHOLDERS);
+			ListCell *lc1;
+			if (contains_whole_row_col(rte->scanCols))
+				break;
+			foreach(lc1, vars)
+			{
+				Var *var = lfirst(lc1);
+				if (var->varno == i)
+				{
+					if (var->varattno > 0)
+						rte->scanCols = bms_add_member(rte->scanCols, var->varattno);
+					else if (var->varattno == 0)
+					{
+						/*
+						 * If there is a whole-row var, we have to fetch the whole row.
+						 */
+						bms_free(rte->scanCols);
+						rte->scanCols = bms_make_singleton(0);
+						goto outer;
+					}
+				}
+			}
+		}
+		outer:;
+	}
+}
+
 /*
  * set_base_rel_consider_startup
  *	  Set the consider_[param_]startup flags for each base-relation entry.
diff --git a/src/backend/optimizer/prep/preptlist.c b/src/backend/optimizer/prep/preptlist.c
index e9434580d6..ca1490a951 100644
--- a/src/backend/optimizer/prep/preptlist.c
+++ b/src/backend/optimizer/prep/preptlist.c
@@ -205,8 +205,19 @@ preprocess_targetlist(PlannerInfo *root)
 	 * to make these Vars available for the RETURNING calculation.  Vars that
 	 * belong to the result rel don't need to be added, because they will be
 	 * made to refer to the actual heap tuple.
+	 *
+	 * XXX: Avoid adding cols from the returningList to avoid overestimation
+	 * of scanCols from RelOptInfo->reltarget exprs. This is done to avoid
+	 * additional cols from the RETURNING clause making its way into scanCols
+	 * for queries such as:
+	 * delete from base_tbl using other_tbl t where base_tbl.col1 = t.col1 returning *;
+	 * where base_tbl is the root table of an inheritance hierarchy
+	 * TODO: Delete the result_relation guard below if and when
+	 * inheritance_planner() is refactored to not fake a round of planning
+	 * pretending we have a SELECT query (which causes result_relation to be 0
+	 * in the first place)
 	 */
-	if (parse->returningList && list_length(parse->rtable) > 1)
+	if (result_relation && parse->returningList && list_length(parse->rtable) > 1)
 	{
 		List	   *vars;
 		ListCell   *l;
diff --git a/src/backend/optimizer/util/inherit.c b/src/backend/optimizer/util/inherit.c
index 992ef87b9d..eea59f758a 100644
--- a/src/backend/optimizer/util/inherit.c
+++ b/src/backend/optimizer/util/inherit.c
@@ -50,7 +50,9 @@ static Bitmapset *translate_col_privs(const Bitmapset *parent_privs,
 static void expand_appendrel_subquery(PlannerInfo *root, RelOptInfo *rel,
 									  RangeTblEntry *rte, Index rti);
 
-
+static Bitmapset *
+translate_parent_cols(Bitmapset *parent_cols, List *translated_vars,
+					  Relation parent_rel);
 /*
  * expand_inherited_rtentry
  *		Expand a rangetable entry that has the "inh" bit set.
@@ -539,6 +541,13 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
 	childrte->alias = childrte->eref = makeAlias(parentrte->eref->aliasname,
 												 child_colnames);
 
+	if (childOID != parentOID)
+		childrte->returningCols =
+			translate_parent_cols(parentrte->returningCols,
+								  appinfo->translated_vars, parentrel);
+	else
+		childrte->returningCols = bms_copy(parentrte->returningCols);
+
 	/*
 	 * Translate the column permissions bitmaps to the child's attnums (we
 	 * have to build the translated_vars list before we can do this).  But if
@@ -648,6 +657,32 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
 	}
 }
 
+/*
+ * We need to translate the list of ordinal attnos from a parent table's
+ * RangeTblEntry to the ordinal attribute numbers for the child's entry.
+ */
+
+static Bitmapset *
+translate_parent_cols(Bitmapset *parent_cols, List *translated_vars,
+					  Relation parent_rel)
+{
+	int col = -1;
+	Bitmapset *result = NULL;
+	/*
+	 * Enumerate the set of parent columns for translation if there is a whole
+	 * row var
+	 */
+	if(contains_whole_row_col(parent_cols))
+		parent_cols = get_ordinal_attnos(parent_rel);
+	while ((col = bms_next_member(parent_cols, col)) >= 0)
+	{
+		Var *var = (Var *) list_nth(translated_vars, col - 1);
+		if (var)
+			result = bms_add_member(result, var->varattno);
+	}
+	return result;
+}
+
 /*
  * translate_col_privs
  *	  Translate a bitmapset representing per-column privileges from the
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 9cede29d6a..4d045e44ae 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -73,7 +73,7 @@ static void determineRecursiveColTypes(ParseState *pstate,
 									   Node *larg, List *nrtargetlist);
 static Query *transformReturnStmt(ParseState *pstate, ReturnStmt *stmt);
 static Query *transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt);
-static List *transformReturningList(ParseState *pstate, List *returningList);
+static void transformReturningList(ParseState *pstate, Query *qry, List *returningList);
 static List *transformUpdateTargetList(ParseState *pstate,
 									   List *targetList);
 static Query *transformPLAssignStmt(ParseState *pstate,
@@ -469,7 +469,7 @@ transformDeleteStmt(ParseState *pstate, DeleteStmt *stmt)
 	qual = transformWhereClause(pstate, stmt->whereClause,
 								EXPR_KIND_WHERE, "WHERE");
 
-	qry->returningList = transformReturningList(pstate, stmt->returningList);
+	transformReturningList(pstate, qry, stmt->returningList);
 
 	/* done building the range table and jointree */
 	qry->rtable = pstate->p_rtable;
@@ -888,8 +888,7 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
 
 	/* Process RETURNING, if any. */
 	if (stmt->returningList)
-		qry->returningList = transformReturningList(pstate,
-													stmt->returningList);
+		transformReturningList(pstate, qry, stmt->returningList);
 
 	/* done building the range table and jointree */
 	qry->rtable = pstate->p_rtable;
@@ -2341,7 +2340,7 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt)
 	qual = transformWhereClause(pstate, stmt->whereClause,
 								EXPR_KIND_WHERE, "WHERE");
 
-	qry->returningList = transformReturningList(pstate, stmt->returningList);
+	transformReturningList(pstate, qry, stmt->returningList);
 
 	/*
 	 * Now we are done with SELECT-like processing, and can get on with
@@ -2436,14 +2435,16 @@ transformUpdateTargetList(ParseState *pstate, List *origTlist)
  * transformReturningList -
  *	handle a RETURNING clause in INSERT/UPDATE/DELETE
  */
-static List *
-transformReturningList(ParseState *pstate, List *returningList)
+static void
+transformReturningList(ParseState *pstate, Query *qry, List *returningList)
 {
 	List	   *rlist;
 	int			save_next_resno;
+	List 	   *vars;
+	ListCell   *l;
 
 	if (returningList == NIL)
-		return NIL;				/* nothing to do */
+		return;
 
 	/*
 	 * We need to assign resnos starting at one in the RETURNING list. Save
@@ -2456,6 +2457,27 @@ transformReturningList(ParseState *pstate, List *returningList)
 	/* transform RETURNING identically to a SELECT targetlist */
 	rlist = transformTargetList(pstate, returningList, EXPR_KIND_RETURNING);
 
+	vars = pull_var_clause((Node *) rlist,
+						   PVC_RECURSE_AGGREGATES |
+							   PVC_RECURSE_WINDOWFUNCS |
+							   PVC_INCLUDE_PLACEHOLDERS);
+	foreach (l, vars)
+	{
+		Var	*var = (Var *) lfirst(l);
+		RangeTblEntry *rte = (RangeTblEntry *) list_nth(pstate->p_rtable, var->varno - 1);
+		if (var->varattno > 0)
+			rte->returningCols = bms_add_member(rte->returningCols, var->varattno);
+		else if (var->varattno == 0)
+		{
+			/*
+			 * If there is a whole-row var, we have to fetch the whole row.
+			 */
+			bms_free(rte->returningCols);
+			rte->returningCols = bms_make_singleton(0);
+			break;
+		}
+	}
+
 	/*
 	 * Complain if the nonempty tlist expanded to nothing (which is possible
 	 * if it contains only a star-expansion of a zero-column table).  If we
@@ -2479,7 +2501,7 @@ transformReturningList(ParseState *pstate, List *returningList)
 	/* restore state */
 	pstate->p_next_resno = save_next_resno;
 
-	return rlist;
+	qry->returningList = rlist;
 }
 
 
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index 7465919044..68723562ee 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -1456,7 +1456,9 @@ addRangeTableEntry(ParseState *pstate,
 	rte->selectedCols = NULL;
 	rte->insertedCols = NULL;
 	rte->updatedCols = NULL;
+	rte->returningCols = NULL;
 	rte->extraUpdatedCols = NULL;
+	rte->scanCols = NULL;
 
 	/*
 	 * Add completed RTE to pstate's range table list, so that we know its
@@ -1543,7 +1545,9 @@ addRangeTableEntryForRelation(ParseState *pstate,
 	rte->checkAsUser = InvalidOid;	/* not set-uid by default, either */
 	rte->selectedCols = NULL;
 	rte->insertedCols = NULL;
+	rte->returningCols = NULL;
 	rte->updatedCols = NULL;
+	rte->scanCols = NULL;
 	rte->extraUpdatedCols = NULL;
 
 	/*
@@ -1640,8 +1644,10 @@ addRangeTableEntryForSubquery(ParseState *pstate,
 	rte->checkAsUser = InvalidOid;
 	rte->selectedCols = NULL;
 	rte->insertedCols = NULL;
+	rte->returningCols = NULL;
 	rte->updatedCols = NULL;
 	rte->extraUpdatedCols = NULL;
+	rte->scanCols = NULL;
 
 	/*
 	 * Add completed RTE to pstate's range table list, so that we know its
@@ -1946,8 +1952,10 @@ addRangeTableEntryForFunction(ParseState *pstate,
 	rte->checkAsUser = InvalidOid;
 	rte->selectedCols = NULL;
 	rte->insertedCols = NULL;
+	rte->returningCols = NULL;
 	rte->updatedCols = NULL;
 	rte->extraUpdatedCols = NULL;
+	rte->scanCols = NULL;
 
 	/*
 	 * Add completed RTE to pstate's range table list, so that we know its
@@ -2017,8 +2025,10 @@ addRangeTableEntryForTableFunc(ParseState *pstate,
 	rte->checkAsUser = InvalidOid;
 	rte->selectedCols = NULL;
 	rte->insertedCols = NULL;
+	rte->returningCols = NULL;
 	rte->updatedCols = NULL;
 	rte->extraUpdatedCols = NULL;
+	rte->scanCols = NULL;
 
 	/*
 	 * Add completed RTE to pstate's range table list, so that we know its
@@ -2104,8 +2114,10 @@ addRangeTableEntryForValues(ParseState *pstate,
 	rte->checkAsUser = InvalidOid;
 	rte->selectedCols = NULL;
 	rte->insertedCols = NULL;
+	rte->returningCols = NULL;
 	rte->updatedCols = NULL;
 	rte->extraUpdatedCols = NULL;
+	rte->scanCols = NULL;
 
 	/*
 	 * Add completed RTE to pstate's range table list, so that we know its
@@ -2195,8 +2207,10 @@ addRangeTableEntryForJoin(ParseState *pstate,
 	rte->checkAsUser = InvalidOid;
 	rte->selectedCols = NULL;
 	rte->insertedCols = NULL;
+	rte->returningCols = NULL;
 	rte->updatedCols = NULL;
 	rte->extraUpdatedCols = NULL;
+	rte->scanCols = NULL;
 
 	/*
 	 * Add completed RTE to pstate's range table list, so that we know its
@@ -2345,8 +2359,10 @@ addRangeTableEntryForCTE(ParseState *pstate,
 	rte->checkAsUser = InvalidOid;
 	rte->selectedCols = NULL;
 	rte->insertedCols = NULL;
+	rte->returningCols = NULL;
 	rte->updatedCols = NULL;
 	rte->extraUpdatedCols = NULL;
+	rte->scanCols = NULL;
 
 	/*
 	 * Add completed RTE to pstate's range table list, so that we know its
@@ -2470,6 +2486,8 @@ addRangeTableEntryForENR(ParseState *pstate,
 	rte->requiredPerms = 0;
 	rte->checkAsUser = InvalidOid;
 	rte->selectedCols = NULL;
+	rte->returningCols = NULL;
+	rte->scanCols = NULL;
 
 	/*
 	 * Add completed RTE to pstate's range table list, so that we know its
diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c
index 7925fcce3b..a379458c46 100644
--- a/src/backend/partitioning/partbounds.c
+++ b/src/backend/partitioning/partbounds.c
@@ -3200,6 +3200,7 @@ check_default_partition_contents(Relation parent, Relation default_rel,
 		TableScanDesc scan;
 		MemoryContext oldCxt;
 		TupleTableSlot *tupslot;
+		Bitmapset *proj = NULL;
 
 		/* Lock already taken above. */
 		if (part_relid != RelationGetRelid(default_rel))
@@ -3264,7 +3265,15 @@ check_default_partition_contents(Relation parent, Relation default_rel,
 		econtext = GetPerTupleExprContext(estate);
 		snapshot = RegisterSnapshot(GetLatestSnapshot());
 		tupslot = table_slot_create(part_rel, &estate->es_tupleTable);
-		scan = table_beginscan(part_rel, snapshot, 0, NULL);
+		if (table_scans_leverage_column_projection(part_rel))
+		{
+			PopulateNeededColumnsForNode((Node*)partqualstate->expr, tupslot->tts_tupleDescriptor->natts, &proj);
+			scan = table_beginscan_with_column_projection(part_rel, snapshot, 0, NULL, proj);
+		}
+		else
+		{
+			scan = table_beginscan(part_rel, snapshot, 0, NULL);
+		}
 
 		/*
 		 * Switch to per-tuple memory context and reset it for each tuple
@@ -3295,6 +3304,9 @@ check_default_partition_contents(Relation parent, Relation default_rel,
 
 		if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel))
 			table_close(part_rel, NoLock);	/* keep the lock until commit */
+
+		if (proj)
+			pfree(proj);
 	}
 }
 
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index 88a9e95e33..0e59f37e4b 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -1731,8 +1731,10 @@ ApplyRetrieveRule(Query *parsetree,
 			rte->checkAsUser = InvalidOid;
 			rte->selectedCols = NULL;
 			rte->insertedCols = NULL;
+			rte->returningCols = NULL;
 			rte->updatedCols = NULL;
 			rte->extraUpdatedCols = NULL;
+			rte->scanCols = NULL;
 
 			/*
 			 * For the most part, Vars referencing the view should remain as
@@ -1832,15 +1834,19 @@ ApplyRetrieveRule(Query *parsetree,
 	subrte->checkAsUser = rte->checkAsUser;
 	subrte->selectedCols = rte->selectedCols;
 	subrte->insertedCols = rte->insertedCols;
+	subrte->returningCols = rte->returningCols;
 	subrte->updatedCols = rte->updatedCols;
 	subrte->extraUpdatedCols = rte->extraUpdatedCols;
+	subrte->scanCols = rte->scanCols;
 
 	rte->requiredPerms = 0;		/* no permission check on subquery itself */
 	rte->checkAsUser = InvalidOid;
 	rte->selectedCols = NULL;
 	rte->insertedCols = NULL;
+	rte->returningCols = NULL;
 	rte->updatedCols = NULL;
 	rte->extraUpdatedCols = NULL;
+	rte->scanCols = NULL;
 
 	return parsetree;
 }
@@ -3191,6 +3197,7 @@ rewriteTargetView(Query *parsetree, Relation view)
 	 * base_rte instead of copying it.
 	 */
 	new_rte = base_rte;
+	new_rte->returningCols = bms_copy(view_rte->returningCols);
 	new_rte->rellockmode = RowExclusiveLock;
 
 	parsetree->rtable = lappend(parsetree->rtable, new_rte);
@@ -3543,6 +3550,7 @@ rewriteTargetView(Query *parsetree, Relation view)
 		}
 	}
 
+	new_rte->returningCols = bms_copy(view_rte->returningCols);
 	table_close(base_rel, NoLock);
 
 	return parsetree;
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 9f1e4a1ac9..f8a86b5bd7 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -265,6 +265,7 @@ typedef struct TableAmRoutine
 {
 	/* this must be set to T_TableAmRoutine */
 	NodeTag		type;
+	bool scans_leverage_column_projection;
 
 
 	/* ------------------------------------------------------------------------
@@ -305,6 +306,30 @@ typedef struct TableAmRoutine
 								 ParallelTableScanDesc pscan,
 								 uint32 flags);
 
+	/*
+	 * Variant of scan_begin() with a column projection bitmap that lists the
+	 * ordinal attribute numbers to be fetched during the scan.
+	 *
+	 * If project_columns is an empty bitmap, none of the data columns are to be
+	 * fetched.
+	 *
+	 * If project_columns is a singleton bitmap with a whole-row reference (0),
+	 * all of the data columns are to be fetched.
+	 *
+	 * Please note: project_cols only deals with non system columns (attnum >= 0)
+	 *
+	 * Please note: Due to the limitations of the slot_get***() APIs, the
+	 * scan_getnextslot() tableAM call must return a TupleTableSlot that is densely
+	 * populated (missing cols indicated with isnull = true upto the largest
+	 * attno in the projection list)
+	 */
+	TableScanDesc (*scan_begin_with_column_projection)(Relation relation,
+													   Snapshot snapshot,
+													   int nkeys, struct ScanKeyData *key,
+													   ParallelTableScanDesc parallel_scan,
+													   uint32 flags,
+													   Bitmapset *project_columns);
+
 	/*
 	 * Release resources and deallocate scan. If TableScanDesc.temp_snap,
 	 * TableScanDesc.rs_snapshot needs to be unregistered.
@@ -408,6 +433,26 @@ typedef struct TableAmRoutine
 	 */
 	void		(*index_fetch_end) (struct IndexFetchTableData *data);
 
+	/*
+	 * Set up a column projection list that can be used by index_fetch_tuple()
+	 * to fetch a subset of columns for a tuple.
+	 *
+	 * If project_columns is an empty bitmap, none of the data columns are to be
+	 * fetched.
+	 *
+	 * If project_columns is a singleton bitmap with a whole-row reference (0),
+	 * all of the data columns are to be fetched.
+	 *
+	 * Please note: project_columns only deals with non system columns (attnum >= 0)
+	 *
+	 * Please note: Due to the limitations of the slot_get***() APIs,
+	 * index_fetch_tuple() must return a TupleTableSlot that is densely
+	 * populated (missing cols indicated with isnull = true upto the largest
+	 * attno in the projection list)
+	 */
+	void (*index_fetch_set_column_projection) (struct IndexFetchTableData *data,
+											   Bitmapset *project_columns);
+
 	/*
 	 * Fetch tuple at `tid` into `slot`, after doing a visibility test
 	 * according to `snapshot`. If a tuple was found and passed the visibility
@@ -444,11 +489,27 @@ typedef struct TableAmRoutine
 	 * Fetch tuple at `tid` into `slot`, after doing a visibility test
 	 * according to `snapshot`. If a tuple was found and passed the visibility
 	 * test, returns true, false otherwise.
+	 *
+	 * project_cols is a set of columns to be fetched for the given row.
+	 *
+	 * If project_cols is an empty bitmap, none of the data columns are to be
+	 * fetched.
+	 *
+	 * If project_cols is a singleton bitmap with a whole-row reference (0),
+	 * all of the data columns are to be fetched.
+	 *
+	 * Please note: project_cols only deals with non system columns (attnum >= 0)
+	 *
+	 * Please note: Due to the limitations of the slot_get***() APIs,
+	 * tuple_fetch_row_version() must return a TupleTableSlot that is densely
+	 * populated (missing cols indicated with isnull = true upto the largest
+	 * attno in the projection list)
 	 */
 	bool		(*tuple_fetch_row_version) (Relation rel,
 											ItemPointer tid,
 											Snapshot snapshot,
-											TupleTableSlot *slot);
+											TupleTableSlot *slot,
+											Bitmapset *project_cols);
 
 	/*
 	 * Is tid valid for a scan of this relation.
@@ -535,7 +596,8 @@ typedef struct TableAmRoutine
 							   LockTupleMode mode,
 							   LockWaitPolicy wait_policy,
 							   uint8 flags,
-							   TM_FailureData *tmfd);
+							   TM_FailureData *tmfd,
+							   Bitmapset *project_cols);
 
 	/*
 	 * Perform operations necessary to complete insertions made via
@@ -889,6 +951,12 @@ table_beginscan(Relation rel, Snapshot snapshot,
 	return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
 }
 
+static inline bool
+table_scans_leverage_column_projection(Relation relation)
+{
+	return relation->rd_tableam->scans_leverage_column_projection;
+}
+
 /*
  * Like table_beginscan(), but for scanning catalog. It'll automatically use a
  * snapshot appropriate for scanning catalog relations.
@@ -918,6 +986,19 @@ table_beginscan_strat(Relation rel, Snapshot snapshot,
 	return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
 }
 
+static inline TableScanDesc
+table_beginscan_with_column_projection(Relation relation, Snapshot snapshot,
+									   int nkeys, struct ScanKeyData *key,
+									   Bitmapset *project_column)
+{
+	uint32		flags = SO_TYPE_SEQSCAN |
+		SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
+
+	Assert(relation->rd_tableam->scans_leverage_column_projection);
+	return relation->rd_tableam->scan_begin_with_column_projection(
+		relation, snapshot, nkeys, key, NULL, flags, project_column);
+}
+
 /*
  * table_beginscan_bm is an alternative entry point for setting up a
  * TableScanDesc for a bitmap heap scan.  Although that scan technology is
@@ -1132,7 +1213,8 @@ extern void table_parallelscan_initialize(Relation rel,
  * Caller must hold a suitable lock on the relation.
  */
 extern TableScanDesc table_beginscan_parallel(Relation rel,
-											  ParallelTableScanDesc pscan);
+											  ParallelTableScanDesc pscan,
+											  Bitmapset *proj);
 
 /*
  * Restart a parallel scan.  Call this in the leader process.  Caller is
@@ -1182,6 +1264,13 @@ table_index_fetch_end(struct IndexFetchTableData *scan)
 	scan->rel->rd_tableam->index_fetch_end(scan);
 }
 
+static inline void
+table_index_fetch_set_column_projection(struct IndexFetchTableData *scan,
+										Bitmapset *project_column)
+{
+	scan->rel->rd_tableam->index_fetch_set_column_projection(scan, project_column);
+}
+
 /*
  * Fetches, as part of an index scan, tuple at `tid` into `slot`, after doing
  * a visibility test according to `snapshot`. If a tuple was found and passed
@@ -1257,7 +1346,8 @@ static inline bool
 table_tuple_fetch_row_version(Relation rel,
 							  ItemPointer tid,
 							  Snapshot snapshot,
-							  TupleTableSlot *slot)
+							  TupleTableSlot *slot,
+							  Bitmapset *project_cols)
 {
 	/*
 	 * We don't expect direct calls to table_tuple_fetch_row_version with
@@ -1267,7 +1357,7 @@ table_tuple_fetch_row_version(Relation rel,
 	if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
 		elog(ERROR, "unexpected table_tuple_fetch_row_version call during logical decoding");
 
-	return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot);
+	return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot, project_cols);
 }
 
 /*
@@ -1528,6 +1618,20 @@ table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
  *		also lock descendant tuples if lock modes don't conflict.
  *		If TUPLE_LOCK_FLAG_FIND_LAST_VERSION, follow the update chain and lock
  *		latest version.
+ *	project_cols: It is a set of columns to be fetched for the tuple being locked.
+ *
+ *	 If project_cols is an empty bitmap, none of the data columns are to be
+ *	 fetched.
+ *
+ *	 If project_cols is a singleton bitmap with a whole-row reference (0),
+ *	 all of the data columns are to be fetched.
+ *
+ *	 Please note: project_cols only deals with non system columns (attnum >= 0)
+ *
+ *	 Please note: Due to the limitations of the slot_get***() APIs,
+ *	 tuple_lock() must return a TupleTableSlot that is densely
+ *	 populated (missing cols indicated with isnull = true upto the largest
+ *	 attno in the projection list)
  *
  * Output parameters:
  *	*slot: contains the target tuple
@@ -1549,11 +1653,11 @@ static inline TM_Result
 table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot,
 				 TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
 				 LockWaitPolicy wait_policy, uint8 flags,
-				 TM_FailureData *tmfd)
+				 TM_FailureData *tmfd, Bitmapset *project_cols)
 {
 	return rel->rd_tableam->tuple_lock(rel, tid, snapshot, slot,
 									   cid, mode, wait_policy,
-									   flags, tmfd);
+									   flags, tmfd, project_cols);
 }
 
 /*
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 3dc03c913e..6824ee79ce 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -646,6 +646,12 @@ extern void CheckCmdReplicaIdentity(Relation rel, CmdType cmd);
 
 extern void CheckSubscriptionRelkind(char relkind, const char *nspname,
 									 const char *relname);
+extern void
+PopulateNeededColumnsForNode(Node *expr, int ncol, Bitmapset **scanCols);
+extern Bitmapset *
+PopulateNeededColumnsForScan(ScanState *scanstate, int ncol);
+extern Bitmapset *PopulateNeededColumnsForEPQ(EPQState *epqstate, int ncol);
+extern void PopulateNeededColumnsForOnConflictUpdate(ResultRelInfo *resultRelInfo);
 
 /*
  * prototypes from functions in nodeModifyTable.c
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 7795a69490..94353b0060 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -381,6 +381,7 @@ typedef struct OnConflictSetState
 	TupleTableSlot *oc_ProjSlot;	/* CONFLICT ... SET ... projection target */
 	ProjectionInfo *oc_ProjInfo;	/* for ON CONFLICT DO UPDATE SET */
 	ExprState  *oc_WhereClause; /* state for the WHERE clause */
+	Bitmapset *proj_cols; /* cols to be scanned during the operation */
 } OnConflictSetState;
 
 /*
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index ef73342019..5891e31c53 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -1149,6 +1149,20 @@ typedef struct RangeTblEntry
 	Bitmapset  *updatedCols;	/* columns needing UPDATE permission */
 	Bitmapset  *extraUpdatedCols;	/* generated columns being updated */
 	List	   *securityQuals;	/* security barrier quals to apply, if any */
+
+	/*
+	 * scanCols: Columns to be retrieved during a physical scan.
+	 * returningCols: Columns to be retrieved to satisfy the RETURNING clause.
+	 *
+	 * Please note: These bitmaps only deal with non-system columns (attnum >= 0)
+	 *
+	 * These bitmaps have some special values:
+	 * - A singleton bitmap with the element 0 indicates that all non-system
+	 *   columns must be fetched.
+	 * - An empty bitmap indicates that no non-system column must be fetched.
+	 */
+	Bitmapset  *scanCols;		/* columns to be fetched during a physical scan */
+	Bitmapset  *returningCols;	/* columns in the RETURNING clause */
 } RangeTblEntry;
 
 /*
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 774ac5b2b1..66d1bea175 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -658,6 +658,20 @@ typedef struct ViewOptions
 	 (relation)->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&	\
 	 !IsCatalogRelation(relation))
 
+static inline bool
+contains_whole_row_col(Bitmapset *cols)
+{
+	return bms_is_member(0, cols);
+}
+
+static inline Bitmapset *
+get_ordinal_attnos(Relation rel)
+{
+	Bitmapset *attnos = NULL;
+	attnos = bms_add_range(attnos, 1, RelationGetDescr(rel)->natts);
+	return attnos;
+}
+
 /* routines in utils/cache/relcache.c */
 extern void RelationIncrementReferenceCount(Relation rel);
 extern void RelationDecrementReferenceCount(Relation rel);
-- 
2.25.1

Reply via email to