From 55ac97d7425f47cdf4415d3bdc521b58901fc87d Mon Sep 17 00:00:00 2001
From: Nikita Malakhov <n.malakhov@postgrespro.ru>
Date: Fri, 2 Sep 2022 14:06:34 +0300
Subject: [PATCH] Table AM modifications to accept column projection lists

This patch introduces a set of changes to the table AM APIs, making them
accept a column projection list. That helps columnar table AMs, so that
they don't need to fetch all columns from disk, but only the ones
actually needed.

The set of changes in this patch is not exhaustive -
there are many more opportunities that are discussed in the TODO section
below. Before digging deeper, we want to elicit early feedback on the
API changes and the column extraction logic.

TableAM APIs that have been modified are:

1. Sequential scan APIs
2. Index scan APIs
3. API to lock and return a row
4. API to fetch a single row

We have seen performance benefits in Zedstore for many of the optimized
operations [0]. This patch is extracted from the larger patch shared in
[0].

Author: Jacob Champion <pchampion@vmware.com>
Author: Soumyadeep Chakraborty <soumyadeep2007@gmail.com>

Discussion: https://www.postgresql.org/message-id/flat/CAE-ML+9RmTNzKCNTZPQf8O3b-UjHWGFbSoXpQa3Wvuc8YBbEQw@mail.gmail.com
---
 src/backend/access/heap/heapam_handler.c |   5 +-
 src/backend/access/nbtree/nbtsort.c      |   2 +-
 src/backend/access/table/tableam.c       |   6 +-
 src/backend/commands/trigger.c           |  32 ++++--
 src/backend/executor/execMain.c          |   2 +-
 src/backend/executor/execPartition.c     |   2 +
 src/backend/executor/execReplication.c   |   6 +-
 src/backend/executor/execScan.c          | 110 ++++++++++++++++++++-
 src/backend/executor/nodeIndexscan.c     |  10 ++
 src/backend/executor/nodeLockRows.c      |   7 +-
 src/backend/executor/nodeModifyTable.c   |  57 ++++++++---
 src/backend/executor/nodeSeqscan.c       |  59 +++++++++---
 src/backend/executor/nodeTidscan.c       |  12 ++-
 src/backend/optimizer/path/allpaths.c    |  84 +++++++++++++++-
 src/backend/optimizer/prep/preptlist.c   |  14 ++-
 src/backend/optimizer/util/inherit.c     |  39 +++++++-
 src/backend/parser/analyze.c             |  46 +++++++--
 src/backend/parser/parse_relation.c      |  16 +++
 src/backend/partitioning/partbounds.c    |  15 ++-
 src/backend/rewrite/rewriteHandler.c     |   9 ++
 src/include/access/tableam.h             | 118 +++++++++++++++++++++--
 src/include/executor/executor.h          |   7 ++
 src/include/nodes/execnodes.h            |   1 +
 src/include/nodes/parsenodes.h           |  14 +++
 src/include/utils/rel.h                  |  14 +++
 25 files changed, 620 insertions(+), 67 deletions(-)

diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index a3414a76e8..5ef3a96c6a 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -180,7 +180,8 @@ static bool
 heapam_fetch_row_version(Relation relation,
 						 ItemPointer tid,
 						 Snapshot snapshot,
-						 TupleTableSlot *slot)
+						 TupleTableSlot *slot,
+						 Bitmapset *project_cols)
 {
 	BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
 	Buffer		buffer;
@@ -348,7 +349,7 @@ static TM_Result
 heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
 				  TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
 				  LockWaitPolicy wait_policy, uint8 flags,
-				  TM_FailureData *tmfd)
+				  TM_FailureData *tmfd, Bitmapset *project_cols)
 {
 	BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
 	TM_Result	result;
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index bd1685c441..60f664443d 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -1979,7 +1979,7 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2,
 	indexInfo = BuildIndexInfo(btspool->index);
 	indexInfo->ii_Concurrent = btshared->isconcurrent;
 	scan = table_beginscan_parallel(btspool->heap,
-									ParallelTableScanFromBTShared(btshared));
+									ParallelTableScanFromBTShared(btshared), NULL);
 	reltuples = table_index_build_scan(btspool->heap, btspool->index, indexInfo,
 									   true, progress, _bt_build_callback,
 									   (void *) &buildstate, scan);
diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c
index b3d1a6c3f8..44f7125b81 100644
--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@@ -172,7 +172,7 @@ table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan,
 }
 
 TableScanDesc
-table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan)
+table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan, Bitmapset *proj)
 {
 	Snapshot	snapshot;
 	uint32		flags = SO_TYPE_SEQSCAN |
@@ -194,6 +194,10 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan)
 		snapshot = SnapshotAny;
 	}
 
+	if (proj)
+		return relation->rd_tableam->scan_begin_with_column_projection(relation, snapshot, 0, NULL,
+											parallel_scan, flags, proj);
+
 	return relation->rd_tableam->scan_begin(relation, snapshot, 0, NULL,
 											parallel_scan, flags);
 }
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 62a09fb131..6274203e4b 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -85,7 +85,8 @@ static bool GetTupleForTrigger(EState *estate,
 							   LockTupleMode lockmode,
 							   TupleTableSlot *oldslot,
 							   TupleTableSlot **newSlot,
-							   TM_FailureData *tmfpd);
+							   TM_FailureData *tmfpd,
+							   Bitmapset *bms);
 static bool TriggerEnabled(EState *estate, ResultRelInfo *relinfo,
 						   Trigger *trigger, TriggerEvent event,
 						   Bitmapset *modifiedCols,
@@ -2738,7 +2739,7 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
 
 		if (!GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
 								LockTupleExclusive, slot, &epqslot_candidate,
-								NULL))
+								NULL, bms_make_singleton(0)))
 			return false;
 
 		/*
@@ -2795,6 +2796,10 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
 		if (newtuple != trigtuple)
 			heap_freetuple(newtuple);
 	}
+
+    /* Make sure the new slot is not dependent on the original tuple */
+    ExecMaterializeSlot(slot);
+
 	if (should_free)
 		heap_freetuple(trigtuple);
 
@@ -2829,7 +2834,8 @@ ExecARDeleteTriggers(EState *estate,
 							   LockTupleExclusive,
 							   slot,
 							   NULL,
-							   NULL);
+							   NULL,
+							   bms_make_singleton(0));
 		else
 			ExecForceStoreHeapTuple(fdw_trigtuple, slot, false);
 
@@ -2994,7 +3000,7 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
 		/* get a copy of the on-disk tuple we are planning to update */
 		if (!GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
 								lockmode, oldslot, &epqslot_candidate,
-								tmfd))
+								tmfd, bms_make_singleton(0)))
 			return false;		/* cancel the update action */
 
 		/*
@@ -3092,6 +3098,10 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
 			newtuple = NULL;
 		}
 	}
+
+    /* Make sure the new slot is not dependent on the original tuple */
+    ExecMaterializeSlot(newslot);
+
 	if (should_free_trig)
 		heap_freetuple(trigtuple);
 
@@ -3149,7 +3159,8 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
 							   LockTupleExclusive,
 							   oldslot,
 							   NULL,
-							   NULL);
+							   NULL,
+							   bms_make_singleton(0));
 		else if (fdw_trigtuple != NULL)
 			ExecForceStoreHeapTuple(fdw_trigtuple, oldslot, false);
 		else
@@ -3304,7 +3315,8 @@ GetTupleForTrigger(EState *estate,
 				   LockTupleMode lockmode,
 				   TupleTableSlot *oldslot,
 				   TupleTableSlot **epqslot,
-				   TM_FailureData *tmfdp)
+				   TM_FailureData *tmfdp,
+				   Bitmapset *bms)
 {
 	Relation	relation = relinfo->ri_RelationDesc;
 
@@ -3328,7 +3340,7 @@ GetTupleForTrigger(EState *estate,
 								estate->es_output_cid,
 								lockmode, LockWaitBlock,
 								lockflags,
-								&tmfd);
+								&tmfd, bms_make_singleton(0));
 
 		/* Let the caller know about the status of this operation */
 		if (tmfdp)
@@ -3407,7 +3419,7 @@ GetTupleForTrigger(EState *estate,
 		 * suffices.
 		 */
 		if (!table_tuple_fetch_row_version(relation, tid, SnapshotAny,
-										   oldslot))
+										   oldslot, bms_make_singleton(0)))
 			elog(ERROR, "failed to fetch tuple for trigger");
 	}
 
@@ -4335,7 +4347,7 @@ AfterTriggerExecute(EState *estate,
 				if (!table_tuple_fetch_row_version(src_rel,
 												   &(event->ate_ctid1),
 												   SnapshotAny,
-												   src_slot))
+												   src_slot, bms_make_singleton(0)))
 					elog(ERROR, "failed to fetch tuple1 for AFTER trigger");
 
 				/*
@@ -4377,7 +4389,7 @@ AfterTriggerExecute(EState *estate,
 				if (!table_tuple_fetch_row_version(dst_rel,
 												   &(event->ate_ctid2),
 												   SnapshotAny,
-												   dst_slot))
+												   dst_slot, bms_make_singleton(0)))
 					elog(ERROR, "failed to fetch tuple2 for AFTER trigger");
 
 				/*
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index ef2fd46092..c3c41ca569 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -2673,7 +2673,7 @@ EvalPlanQualFetchRowMark(EPQState *epqstate, Index rti, TupleTableSlot *slot)
 			/* ordinary table, fetch the tuple */
 			if (!table_tuple_fetch_row_version(erm->relation,
 											   (ItemPointer) DatumGetPointer(datum),
-											   SnapshotAny, slot))
+											   SnapshotAny, slot, bms_make_singleton(0)))
 				elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
 			return true;
 		}
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index ac03271882..2e69515304 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -845,6 +845,8 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
 						ExecInitQual((List *) clause, &mtstate->ps);
 				}
 			}
+
+			PopulateNeededColumnsForOnConflictUpdate(leaf_part_rri);
 		}
 	}
 
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 6014f2e248..de473dd98b 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -180,7 +180,8 @@ retry:
 							   lockmode,
 							   LockWaitBlock,
 							   0 /* don't follow updates */ ,
-							   &tmfd);
+							   &tmfd,
+							   bms_make_singleton(0));
 
 		PopActiveSnapshot();
 
@@ -357,7 +358,8 @@ retry:
 							   lockmode,
 							   LockWaitBlock,
 							   0 /* don't follow updates */ ,
-							   &tmfd);
+							   &tmfd,
+							   bms_make_singleton(0));
 
 		PopActiveSnapshot();
 
diff --git a/src/backend/executor/execScan.c b/src/backend/executor/execScan.c
index 043bb83f55..0761baff4d 100644
--- a/src/backend/executor/execScan.c
+++ b/src/backend/executor/execScan.c
@@ -21,7 +21,8 @@
 #include "executor/executor.h"
 #include "miscadmin.h"
 #include "utils/memutils.h"
-
+#include "utils/rel.h"
+#include "nodes/nodeFuncs.h"
 
 
 /*
@@ -340,3 +341,110 @@ ExecScanReScan(ScanState *node)
 		}
 	}
 }
+
+typedef struct neededColumnContext
+{
+       Bitmapset **mask;
+       int ncol;
+} neededColumnContext;
+
+static bool
+neededColumnContextWalker(Node *node, neededColumnContext *c)
+{
+       if (node == NULL || contains_whole_row_col(*c->mask))
+               return false;
+
+       if (IsA(node, Var))
+       {
+               Var *var = (Var *)node;
+
+               if (var->varattno > 0)
+               {
+                       Assert(var->varattno <= c->ncol);
+                       *(c->mask) = bms_add_member(*(c->mask), var->varattno);
+               }
+               else if(var->varattno == 0) {
+                       bms_free(*(c->mask));
+                       *(c->mask) = bms_make_singleton(0);
+               }
+
+               return false;
+       }
+       return expression_tree_walker(node, neededColumnContextWalker, (void * )c);
+}
+
+/*
+ * n specifies the number of allowed entries in mask: we use
+ * it for bounds-checking in the walker above.
+ */
+void
+PopulateNeededColumnsForNode(Node *expr, int ncol, Bitmapset **scanCols)
+{
+       neededColumnContext c;
+
+       c.mask = scanCols;
+       c.ncol = ncol;
+
+       neededColumnContextWalker(expr, &c);
+}
+
+
+Bitmapset *
+PopulateNeededColumnsForScan(ScanState *scanstate, int ncol)
+{
+       Bitmapset *result = NULL;
+       Plan       *plan = scanstate->ps.plan;
+
+       PopulateNeededColumnsForNode((Node *) plan->targetlist, ncol, &result);
+       PopulateNeededColumnsForNode((Node *) plan->qual, ncol, &result);
+
+       if (IsA(plan, IndexScan))
+       {
+               PopulateNeededColumnsForNode((Node *) ((IndexScan *) plan)->indexqualorig, ncol, &result);
+               PopulateNeededColumnsForNode((Node *) ((IndexScan *) plan)->indexorderbyorig, ncol, &result);
+       }
+       else if (IsA(plan, BitmapHeapScan))
+               PopulateNeededColumnsForNode((Node *) ((BitmapHeapScan *) plan)->bitmapqualorig, ncol, &result);
+
+       return result;
+}
+
+Bitmapset *
+PopulateNeededColumnsForEPQ(EPQState *epqstate, int ncol)
+{
+       Bitmapset *epqCols = NULL;
+       Assert(epqstate && epqstate->plan);
+       PopulateNeededColumnsForNode((Node *) epqstate->plan->qual,
+                                                                ncol,
+                                                                &epqCols);
+       return epqCols;
+}
+
+void
+PopulateNeededColumnsForOnConflictUpdate(ResultRelInfo *resultRelInfo)
+{
+       ExprState  *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
+       ProjectionInfo *oc_ProjInfo = resultRelInfo->ri_onConflict->oc_ProjInfo;
+       Relation relation = resultRelInfo->ri_RelationDesc;
+       Bitmapset *proj_cols = NULL;
+       ListCell *lc;
+
+       if (onConflictSetWhere && onConflictSetWhere->expr)
+               PopulateNeededColumnsForNode((Node *) onConflictSetWhere->expr,
+                                                                        RelationGetDescr(relation)->natts,
+                                                                        &proj_cols);
+
+       if (oc_ProjInfo)
+               PopulateNeededColumnsForNode((Node *) oc_ProjInfo->pi_state.expr,
+                                                                        RelationGetDescr(relation)->natts,
+                                                                        &proj_cols);
+
+       foreach(lc, resultRelInfo->ri_WithCheckOptionExprs)
+       {
+               ExprState  *wcoExpr = (ExprState *) lfirst(lc);
+               PopulateNeededColumnsForNode((Node *) wcoExpr->expr,
+                                                                        RelationGetDescr(relation)->natts,
+                                                                        &proj_cols);
+       }
+       resultRelInfo->ri_onConflict->proj_cols = proj_cols;
+}
\ No newline at end of file
diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c
index 5ef5c6930f..95de4cbb60 100644
--- a/src/backend/executor/nodeIndexscan.c
+++ b/src/backend/executor/nodeIndexscan.c
@@ -115,6 +115,16 @@ IndexNext(IndexScanState *node)
 								   node->iss_NumScanKeys,
 								   node->iss_NumOrderByKeys);
 
+        if (table_scans_leverage_column_projection(node->ss.ss_currentRelation))
+        {
+                Bitmapset *proj = NULL;
+                Scan *planNode = (Scan *)node->ss.ps.plan;
+                int rti = planNode->scanrelid;
+                RangeTblEntry *rte = list_nth(estate->es_plannedstmt->rtable, rti - 1);
+                proj = rte->scanCols;
+                table_index_fetch_set_column_projection(scandesc->xs_heapfetch, proj);
+        }
+
 		node->iss_ScanDesc = scandesc;
 
 		/*
diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c
index a74813c7aa..10481b51a2 100644
--- a/src/backend/executor/nodeLockRows.c
+++ b/src/backend/executor/nodeLockRows.c
@@ -84,6 +84,7 @@ lnext:
 		int			lockflags = 0;
 		TM_Result	test;
 		TupleTableSlot *markSlot;
+		Bitmapset *epqCols = NULL;
 
 		/* clear any leftover test tuple for this rel */
 		markSlot = EvalPlanQualSlot(&node->lr_epqstate, erm->relation, erm->rti);
@@ -183,11 +184,15 @@ lnext:
 		if (!IsolationUsesXactSnapshot())
 			lockflags |= TUPLE_LOCK_FLAG_FIND_LAST_VERSION;
 
+        epqCols = PopulateNeededColumnsForEPQ(&node->lr_epqstate,
+                                              RelationGetDescr(erm->relation)->natts);
+
 		test = table_tuple_lock(erm->relation, &tid, estate->es_snapshot,
 								markSlot, estate->es_output_cid,
 								lockmode, erm->waitPolicy,
 								lockflags,
-								&tmfd);
+								&tmfd,
+								epqCols);
 
 		switch (test)
 		{
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index deda321502..400b75f109 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -345,7 +345,7 @@ ExecCheckTIDVisible(EState *estate,
 	if (!IsolationUsesXactSnapshot())
 		return;
 
-	if (!table_tuple_fetch_row_version(rel, tid, SnapshotAny, tempSlot))
+	if (!table_tuple_fetch_row_version(rel, tid, SnapshotAny, tempSlot, NULL))
 		elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
 	ExecCheckTupleVisible(estate, rel, tempSlot);
 	ExecClearTuple(tempSlot);
@@ -1421,6 +1421,7 @@ ldelete:;
 				{
 					TupleTableSlot *inputslot;
 					TupleTableSlot *epqslot;
+					Bitmapset *epqCols = NULL;
 
 					if (IsolationUsesXactSnapshot())
 						ereport(ERROR,
@@ -1435,12 +1436,15 @@ ldelete:;
 					inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc,
 												 resultRelInfo->ri_RangeTableIndex);
 
+                    epqCols = PopulateNeededColumnsForEPQ(context->epqstate,
+                                                          RelationGetDescr(resultRelationDesc)->natts);
+
 					result = table_tuple_lock(resultRelationDesc, tupleid,
 											  estate->es_snapshot,
 											  inputslot, estate->es_output_cid,
 											  LockTupleExclusive, LockWaitBlock,
 											  TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
-											  &context->tmfd);
+											  &context->tmfd, epqCols);
 
 					switch (result)
 					{
@@ -1573,8 +1577,23 @@ ldelete:;
 			}
 			else
 			{
+                RangeTblEntry *resultrte = exec_rt_fetch(resultRelInfo->ri_RangeTableIndex, estate);
+                Bitmapset *project_cols = resultrte->returningCols;
+                /*
+                 * XXX returningCols should never be empty if we have a RETURNING
+                 * clause. Right now, if we have a view, we fail to populate the
+                 * returningCols of its base table's RTE.
+                 * If we encounter such a situation now, for correctness, ensure
+                 * that we fetch all the columns.
+                 */
+                if(bms_is_empty(resultrte->returningCols))
+                {
+                    bms_free(resultrte->returningCols);
+                    project_cols = bms_make_singleton(0);
+                }
+
 				if (!table_tuple_fetch_row_version(resultRelationDesc, tupleid,
-												   SnapshotAny, slot))
+												   SnapshotAny, slot, project_cols))
 					elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
 			}
 		}
@@ -1724,7 +1743,8 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
 			if (!table_tuple_fetch_row_version(resultRelInfo->ri_RelationDesc,
 											   tupleid,
 											   SnapshotAny,
-											   oldSlot))
+											   oldSlot,
+											   bms_make_singleton(0) /* TODO */))
 				elog(ERROR, "failed to fetch tuple being updated");
 			/* and project the new tuple to retry the UPDATE with */
 			context->cpUpdateRetrySlot =
@@ -2237,6 +2257,7 @@ redo_act:
 					TupleTableSlot *inputslot;
 					TupleTableSlot *epqslot;
 					TupleTableSlot *oldSlot;
+					Bitmapset *epqCols = NULL;
 
 					if (IsolationUsesXactSnapshot())
 						ereport(ERROR,
@@ -2250,12 +2271,15 @@ redo_act:
 					inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc,
 												 resultRelInfo->ri_RangeTableIndex);
 
+					epqCols = PopulateNeededColumnsForEPQ(context->epqstate,
+														  RelationGetDescr(resultRelationDesc)->natts);
+
 					result = table_tuple_lock(resultRelationDesc, tupleid,
 											  estate->es_snapshot,
 											  inputslot, estate->es_output_cid,
 											  updateCxt.lockmode, LockWaitBlock,
 											  TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
-											  &context->tmfd);
+											  &context->tmfd, epqCols);
 
 					switch (result)
 					{
@@ -2280,7 +2304,8 @@ redo_act:
 							if (!table_tuple_fetch_row_version(resultRelationDesc,
 															   tupleid,
 															   SnapshotAny,
-															   oldSlot))
+															   oldSlot,
+															   bms_make_singleton(0) /* TODO */))
 								elog(ERROR, "failed to fetch tuple being updated");
 							slot = ExecGetUpdateNewTuple(resultRelInfo,
 														 epqslot, oldSlot);
@@ -2374,6 +2399,8 @@ ExecOnConflictUpdate(ModifyTableContext *context,
 	Relation	relation = resultRelInfo->ri_RelationDesc;
 	ExprState  *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
 	TupleTableSlot *existing = resultRelInfo->ri_onConflict->oc_Existing;
+    ProjectionInfo *oc_ProjInfo = resultRelInfo->ri_onConflict->oc_ProjInfo;
+    Bitmapset *proj_cols = resultRelInfo->ri_onConflict->proj_cols;
 	TM_FailureData tmfd;
 	LockTupleMode lockmode;
 	TM_Result	test;
@@ -2394,7 +2421,7 @@ ExecOnConflictUpdate(ModifyTableContext *context,
 							context->estate->es_snapshot,
 							existing, context->estate->es_output_cid,
 							lockmode, LockWaitBlock, 0,
-							&tmfd);
+							&tmfd, proj_cols);
 	switch (test)
 	{
 		case TM_Ok:
@@ -2544,7 +2571,8 @@ ExecOnConflictUpdate(ModifyTableContext *context,
 	}
 
 	/* Project the new tuple version */
-	ExecProject(resultRelInfo->ri_onConflict->oc_ProjInfo);
+	ExecProject(oc_ProjInfo);
+	/* ExecProject(resultRelInfo->ri_onConflict->oc_ProjInfo); */
 
 	/*
 	 * Note that it is possible that the target tuple has been modified in
@@ -2701,7 +2729,8 @@ lmerge_matched:;
 	if (!table_tuple_fetch_row_version(resultRelInfo->ri_RelationDesc,
 									   tupleid,
 									   SnapshotAny,
-									   resultRelInfo->ri_oldTupleSlot))
+									   resultRelInfo->ri_oldTupleSlot,
+									   bms_make_singleton(0) /* TODO */))
 		elog(ERROR, "failed to fetch the target tuple");
 
 	foreach(l, resultRelInfo->ri_matchedMergeAction)
@@ -2845,6 +2874,7 @@ lmerge_matched:;
 					TupleTableSlot *epqslot,
 							   *inputslot;
 					LockTupleMode lockmode;
+					Bitmapset *epqCols = NULL;
 
 					/*
 					 * The target tuple was concurrently updated by some other
@@ -2884,12 +2914,15 @@ lmerge_matched:;
 					inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc,
 												 resultRelInfo->ri_RangeTableIndex);
 
+					epqCols = PopulateNeededColumnsForEPQ(epqstate,
+														  RelationGetDescr(resultRelationDesc)->natts);
+
 					result = table_tuple_lock(resultRelationDesc, tupleid,
 											  estate->es_snapshot,
 											  inputslot, estate->es_output_cid,
 											  lockmode, LockWaitBlock,
 											  TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
-											  &context->tmfd);
+											  &context->tmfd, epqCols);
 					switch (result)
 					{
 						case TM_Ok:
@@ -3725,7 +3758,8 @@ ExecModifyTable(PlanState *pstate)
 
 					if (!table_tuple_fetch_row_version(relation, tupleid,
 													   SnapshotAny,
-													   oldSlot))
+													   oldSlot,
+													   bms_make_singleton(0) /* TODO */))
 						elog(ERROR, "failed to fetch tuple being updated");
 				}
 				slot = internalGetUpdateNewTuple(resultRelInfo, context.planSlot,
@@ -4205,6 +4239,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 									&mtstate->ps);
 			onconfl->oc_WhereClause = qualexpr;
 		}
+		PopulateNeededColumnsForOnConflictUpdate(resultRelInfo);
 	}
 
 	/*
diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c
index 7b58cd9162..68609b4103 100644
--- a/src/backend/executor/nodeSeqscan.c
+++ b/src/backend/executor/nodeSeqscan.c
@@ -68,9 +68,22 @@ SeqNext(SeqScanState *node)
 		 * We reach here if the scan is not parallel, or if we're serially
 		 * executing a scan that was planned to be parallel.
 		 */
-		scandesc = table_beginscan(node->ss.ss_currentRelation,
-								   estate->es_snapshot,
-								   0, NULL);
+      	if (table_scans_leverage_column_projection(node->ss.ss_currentRelation))
+      	{
+            Scan *planNode = (Scan *)node->ss.ps.plan;
+            int rti = planNode->scanrelid;
+            RangeTblEntry *rte = list_nth(estate->es_plannedstmt->rtable, rti - 1);
+            scandesc = table_beginscan_with_column_projection(node->ss.ss_currentRelation,
+                                                              estate->es_snapshot,
+                                                              0, NULL,
+                                                              rte->scanCols);
+      	}
+      	else
+      	{
+            scandesc = table_beginscan(node->ss.ss_currentRelation,
+                                       estate->es_snapshot,
+                                       0, NULL);
+      	}
 		node->ss.ss_currentScanDesc = scandesc;
 	}
 
@@ -270,14 +283,21 @@ ExecSeqScanInitializeDSM(SeqScanState *node,
 {
 	EState	   *estate = node->ss.ps.state;
 	ParallelTableScanDesc pscan;
+    Bitmapset *proj = NULL;
 
-	pscan = shm_toc_allocate(pcxt->toc, node->pscan_len);
-	table_parallelscan_initialize(node->ss.ss_currentRelation,
-								  pscan,
-								  estate->es_snapshot);
-	shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan);
-	node->ss.ss_currentScanDesc =
-		table_beginscan_parallel(node->ss.ss_currentRelation, pscan);
+    pscan = shm_toc_allocate(pcxt->toc, node->pscan_len);
+
+    if (table_scans_leverage_column_projection(node->ss.ss_currentRelation))
+    {
+        proj = PopulateNeededColumnsForScan(&node->ss,
+                                  			node->ss.ss_currentRelation->rd_att->natts);
+    }
+
+    table_parallelscan_initialize(node->ss.ss_currentRelation,
+                                  pscan,
+                                  estate->es_snapshot);
+    shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan);
+    node->ss.ss_currentScanDesc = table_beginscan_parallel(node->ss.ss_currentRelation, pscan, proj);
 }
 
 /* ----------------------------------------------------------------
@@ -307,8 +327,19 @@ ExecSeqScanInitializeWorker(SeqScanState *node,
 							ParallelWorkerContext *pwcxt)
 {
 	ParallelTableScanDesc pscan;
-
-	pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
-	node->ss.ss_currentScanDesc =
-		table_beginscan_parallel(node->ss.ss_currentRelation, pscan);
+   Bitmapset *proj = NULL;
+
+   /*
+    * FIXME: this is duplicate work with ExecSeqScanInitializeDSM. In future
+    * plan will have the we have projection list, then this overhead will not exist.
+    */
+   if (table_scans_leverage_column_projection(node->ss.ss_currentRelation))
+   {
+           proj = PopulateNeededColumnsForScan(&node->ss,
+                                                node->ss.ss_currentRelation->rd_att->natts);
+   }
+
+   pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
+   node->ss.ss_currentScanDesc =
+           table_beginscan_parallel(node->ss.ss_currentRelation, pscan, proj);
 }
diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c
index a1c6325d64..b3783eaf49 100644
--- a/src/backend/executor/nodeTidscan.c
+++ b/src/backend/executor/nodeTidscan.c
@@ -364,6 +364,7 @@ TidNext(TidScanState *node)
 	while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids)
 	{
 		ItemPointerData tid = tidList[node->tss_TidPtr];
+		Bitmapset *project_cols = NULL;
 
 		/*
 		 * For WHERE CURRENT OF, the tuple retrieved from the cursor might
@@ -373,8 +374,15 @@ TidNext(TidScanState *node)
 		if (node->tss_isCurrentOf)
 			table_tuple_get_latest_tid(scan, &tid);
 
-		if (table_tuple_fetch_row_version(heapRelation, &tid, snapshot, slot))
-			return slot;
+        /*
+         * TODO: Remove this hack!! This should be done once at the start of the tid scan.
+         * Ideally we should probably set the list of projection cols in the
+         * generic scan desc, perhaps in TableScanDesc.
+         */
+        project_cols = PopulateNeededColumnsForScan((ScanState *) node,
+                                                    RelationGetDescr(heapRelation)->natts);
+
+        if (table_tuple_fetch_row_version(heapRelation, &tid, snapshot, slot, project_cols))			return slot;
 
 		/* Bad TID or failed snapshot qual; try next */
 		if (bBackward)
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 8fc28007f5..ab1fcdd1f4 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -50,7 +50,7 @@
 #include "port/pg_bitutils.h"
 #include "rewrite/rewriteManip.h"
 #include "utils/lsyscache.h"
-
+#include "utils/rel.h"
 
 /* results of subquery_is_pushdown_safe */
 typedef struct pushdown_safety_info
@@ -145,6 +145,7 @@ static void recurse_push_qual(Node *setOp, Query *topquery,
 							  RangeTblEntry *rte, Index rti, Node *qual);
 static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel,
 										   Bitmapset *extra_used_attrs);
+static void extract_scan_columns(PlannerInfo *root);
 
 
 /*
@@ -188,6 +189,7 @@ make_one_rel(PlannerInfo *root, List *joinlist)
 	 */
 	set_base_rel_sizes(root);
 
+	extract_scan_columns(root);
 	/*
 	 * We should now have size estimates for every actual table involved in
 	 * the query, and we also know which if any have been deleted from the
@@ -238,6 +240,86 @@ make_one_rel(PlannerInfo *root, List *joinlist)
 	return rel;
 }
 
+static void
+extract_scan_columns(PlannerInfo *root)
+{
+   for (int i = 1; i < root->simple_rel_array_size; i++)
+   {
+      ListCell *lc;
+      RangeTblEntry *rte = root->simple_rte_array[i];
+      RelOptInfo    *rel = root->simple_rel_array[i];
+      if (rte == NULL)
+              continue;
+      if (rel == NULL)
+              continue;
+      if (IS_DUMMY_REL(rel))
+              continue;
+      rte->scanCols = NULL;
+      foreach(lc, rel->reltarget->exprs)
+      {
+         Node *node;
+         List *vars;
+         ListCell *lc1;
+         node = lfirst(lc);
+         /*
+          * TODO: suggest a default for vars_only to make maintenance less burdensome
+          */
+         vars = pull_var_clause(node,
+                                PVC_RECURSE_AGGREGATES |
+                                PVC_RECURSE_WINDOWFUNCS |
+                                PVC_RECURSE_PLACEHOLDERS);
+         foreach(lc1, vars)
+         {
+            Var *var = lfirst(lc1);
+            if (var->varno == i)
+            {
+               if (var->varattno > 0)
+                       rte->scanCols = bms_add_member(rte->scanCols, var->varattno);
+               else if (var->varattno == 0)
+               {
+                  /*
+                   * If there is a whole-row var, we have to fetch the whole row.
+                   */
+                  bms_free(rte->scanCols);
+                  rte->scanCols = bms_make_singleton(0);
+                  goto outer;
+               }
+            }
+         }
+      }
+      foreach(lc, rel->baserestrictinfo)
+      {
+         RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+         List *vars = pull_var_clause((Node *)rinfo->clause,
+                                                                  PVC_RECURSE_AGGREGATES |
+                                                                          PVC_RECURSE_WINDOWFUNCS |
+                                                                          PVC_RECURSE_PLACEHOLDERS);
+         ListCell *lc1;
+         if (contains_whole_row_col(rte->scanCols))
+                 break;
+         foreach(lc1, vars)
+         {
+            Var *var = lfirst(lc1);
+            if (var->varno == i)
+            {
+               if (var->varattno > 0)
+                  rte->scanCols = bms_add_member(rte->scanCols, var->varattno);
+               else if (var->varattno == 0)
+               {
+                  /*
+                   * If there is a whole-row var, we have to fetch the whole row.
+                   */
+                  bms_free(rte->scanCols);
+                  rte->scanCols = bms_make_singleton(0);
+                  goto outer;
+               }
+            }
+         }
+      }
+      outer:;
+   }
+}
+
 /*
  * set_base_rel_consider_startup
  *	  Set the consider_[param_]startup flags for each base-relation entry.
diff --git a/src/backend/optimizer/prep/preptlist.c b/src/backend/optimizer/prep/preptlist.c
index 137b28323d..869e67a00b 100644
--- a/src/backend/optimizer/prep/preptlist.c
+++ b/src/backend/optimizer/prep/preptlist.c
@@ -267,8 +267,20 @@ preprocess_targetlist(PlannerInfo *root)
 	 * to make these Vars available for the RETURNING calculation.  Vars that
 	 * belong to the result rel don't need to be added, because they will be
 	 * made to refer to the actual heap tuple.
+     *
+     * XXX: Avoid adding cols from the returningList to avoid overestimation
+     * of scanCols from RelOptInfo->reltarget exprs. This is done to avoid
+     * additional cols from the RETURNING clause making its way into scanCols
+     * for queries such as:
+     * delete from base_tbl using other_tbl t where base_tbl.col1 = t.col1 returning *;
+     * where base_tbl is the root table of an inheritance hierarchy
+     * TODO: Delete the result_relation guard below if and when
+     * inheritance_planner() is refactored to not fake a round of planning
+     * pretending we have a SELECT query (which causes result_relation to be 0
+     * in the first place)
 	 */
-	if (parse->returningList && list_length(parse->rtable) > 1)
+	/* if (parse->returningList && list_length(parse->rtable) > 1) */
+	if (result_relation && parse->returningList && list_length(parse->rtable) > 1)
 	{
 		List	   *vars;
 		ListCell   *l;
diff --git a/src/backend/optimizer/util/inherit.c b/src/backend/optimizer/util/inherit.c
index cf7691a474..2f3ded4c30 100644
--- a/src/backend/optimizer/util/inherit.c
+++ b/src/backend/optimizer/util/inherit.c
@@ -49,7 +49,8 @@ static Bitmapset *translate_col_privs(const Bitmapset *parent_privs,
 									  List *translated_vars);
 static void expand_appendrel_subquery(PlannerInfo *root, RelOptInfo *rel,
 									  RangeTblEntry *rte, Index rti);
-
+static Bitmapset *translate_parent_cols(Bitmapset *parent_cols, List *translated_vars,
+                                        Relation parent_rel);
 
 /*
  * expand_inherited_rtentry
@@ -539,6 +540,17 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
 	childrte->alias = childrte->eref = makeAlias(parentrte->eref->aliasname,
 												 child_colnames);
 
+    if (childOID != parentOID)
+	{
+        childrte->returningCols =
+            translate_parent_cols(parentrte->returningCols,
+                                  appinfo->translated_vars, parentrel);
+	}
+    else
+	{
+        childrte->returningCols = bms_copy(parentrte->returningCols);
+	}
+
 	/*
 	 * Translate the column permissions bitmaps to the child's attnums (we
 	 * have to build the translated_vars list before we can do this).  But if
@@ -648,6 +660,31 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
 	}
 }
 
+/*
+ * We need to translate the list of ordinal attnos from a parent table's
+ * RangeTblEntry to the ordinal attribute numbers for the child's entry.
+ */
+static Bitmapset *
+translate_parent_cols(Bitmapset *parent_cols, List *translated_vars,
+                                         Relation parent_rel)
+{
+    int col = -1;
+    Bitmapset *result = NULL;
+    /*
+     * Enumerate the set of parent columns for translation if there is a whole
+     * row var
+     */
+    if(contains_whole_row_col(parent_cols))
+        parent_cols = get_ordinal_attnos(parent_rel);
+    while ((col = bms_next_member(parent_cols, col)) >= 0)
+    {
+        Var *var = (Var *) list_nth(translated_vars, col - 1);
+        if (var)
+            result = bms_add_member(result, var->varattno);
+    }
+    return result;
+}
+
 /*
  * translate_col_privs
  *	  Translate a bitmapset representing per-column privileges from the
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 6688c2a865..ec61a7e675 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -73,7 +73,7 @@ static void determineRecursiveColTypes(ParseState *pstate,
 									   Node *larg, List *nrtargetlist);
 static Query *transformReturnStmt(ParseState *pstate, ReturnStmt *stmt);
 static Query *transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt);
-static List *transformReturningList(ParseState *pstate, List *returningList);
+static void transformReturningList(ParseState *pstate, Query *qry, List *returningList);
 static Query *transformPLAssignStmt(ParseState *pstate,
 									PLAssignStmt *stmt);
 static Query *transformDeclareCursorStmt(ParseState *pstate,
@@ -514,7 +514,8 @@ transformDeleteStmt(ParseState *pstate, DeleteStmt *stmt)
 	qual = transformWhereClause(pstate, stmt->whereClause,
 								EXPR_KIND_WHERE, "WHERE");
 
-	qry->returningList = transformReturningList(pstate, stmt->returningList);
+	/* XXX qry->returningList = */
+	transformReturningList(pstate, qry, stmt->returningList);
 
 	/* done building the range table and jointree */
 	qry->rtable = pstate->p_rtable;
@@ -931,10 +932,10 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
 		qry->onConflict = transformOnConflictClause(pstate,
 													stmt->onConflictClause);
 
-	/* Process RETURNING, if any. */
+	/* Process RETURNING, if any. */ /* XXX qry->returningList =  */
 	if (stmt->returningList)
-		qry->returningList = transformReturningList(pstate,
-													stmt->returningList);
+		transformReturningList(pstate, qry,
+								stmt->returningList);
 
 	/* done building the range table and jointree */
 	qry->rtable = pstate->p_rtable;
@@ -2396,7 +2397,8 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt)
 	qual = transformWhereClause(pstate, stmt->whereClause,
 								EXPR_KIND_WHERE, "WHERE");
 
-	qry->returningList = transformReturningList(pstate, stmt->returningList);
+	/* qry->returningList = */
+	transformReturningList(pstate, qry, stmt->returningList);
 
 	/*
 	 * Now we are done with SELECT-like processing, and can get on with
@@ -2491,14 +2493,16 @@ transformUpdateTargetList(ParseState *pstate, List *origTlist)
  * transformReturningList -
  *	handle a RETURNING clause in INSERT/UPDATE/DELETE
  */
-static List *
-transformReturningList(ParseState *pstate, List *returningList)
+static void /* List **/
+transformReturningList(ParseState *pstate, Query *qry, List *returningList)
 {
 	List	   *rlist;
 	int			save_next_resno;
+    List       *vars;
+    ListCell   *l;
 
 	if (returningList == NIL)
-		return NIL;				/* nothing to do */
+		return; /* NIL;*/				/* nothing to do */
 
 	/*
 	 * We need to assign resnos starting at one in the RETURNING list. Save
@@ -2511,6 +2515,27 @@ transformReturningList(ParseState *pstate, List *returningList)
 	/* transform RETURNING identically to a SELECT targetlist */
 	rlist = transformTargetList(pstate, returningList, EXPR_KIND_RETURNING);
 
+    vars = pull_var_clause((Node *) rlist,
+                            PVC_RECURSE_AGGREGATES |
+                            PVC_RECURSE_WINDOWFUNCS |
+                            PVC_INCLUDE_PLACEHOLDERS);
+    foreach (l, vars)
+    {
+        Var     *var = (Var *) lfirst(l);
+        RangeTblEntry *rte = (RangeTblEntry *) list_nth(pstate->p_rtable, var->varno - 1);
+        if (var->varattno > 0)
+            rte->returningCols = bms_add_member(rte->returningCols, var->varattno);
+        else if (var->varattno == 0)
+        {
+            /*
+             * If there is a whole-row var, we have to fetch the whole row.
+             */
+            bms_free(rte->returningCols);
+            rte->returningCols = bms_make_singleton(0);
+            break;
+        }
+    }
+
 	/*
 	 * Complain if the nonempty tlist expanded to nothing (which is possible
 	 * if it contains only a star-expansion of a zero-column table).  If we
@@ -2534,7 +2559,8 @@ transformReturningList(ParseState *pstate, List *returningList)
 	/* restore state */
 	pstate->p_next_resno = save_next_resno;
 
-	return rlist;
+	qry->returningList = rlist;
+	/* return rlist; */
 }
 
 
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index f6b740df0a..6add3feaf9 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -1467,7 +1467,9 @@ addRangeTableEntry(ParseState *pstate,
 	rte->selectedCols = NULL;
 	rte->insertedCols = NULL;
 	rte->updatedCols = NULL;
+	rte->returningCols = NULL;
 	rte->extraUpdatedCols = NULL;
+	rte->scanCols = NULL;
 
 	/*
 	 * Add completed RTE to pstate's range table list, so that we know its
@@ -1555,7 +1557,9 @@ addRangeTableEntryForRelation(ParseState *pstate,
 	rte->selectedCols = NULL;
 	rte->insertedCols = NULL;
 	rte->updatedCols = NULL;
+	rte->returningCols = NULL;
 	rte->extraUpdatedCols = NULL;
+	rte->scanCols = NULL;
 
 	/*
 	 * Add completed RTE to pstate's range table list, so that we know its
@@ -1656,7 +1660,9 @@ addRangeTableEntryForSubquery(ParseState *pstate,
 	rte->selectedCols = NULL;
 	rte->insertedCols = NULL;
 	rte->updatedCols = NULL;
+	rte->returningCols = NULL;
 	rte->extraUpdatedCols = NULL;
+	rte->scanCols = NULL;
 
 	/*
 	 * Add completed RTE to pstate's range table list, so that we know its
@@ -1986,7 +1992,9 @@ addRangeTableEntryForFunction(ParseState *pstate,
 	rte->selectedCols = NULL;
 	rte->insertedCols = NULL;
 	rte->updatedCols = NULL;
+	rte->returningCols = NULL;
 	rte->extraUpdatedCols = NULL;
+	rte->scanCols = NULL;
 
 	/*
 	 * Add completed RTE to pstate's range table list, so that we know its
@@ -2079,7 +2087,9 @@ addRangeTableEntryForTableFunc(ParseState *pstate,
 	rte->selectedCols = NULL;
 	rte->insertedCols = NULL;
 	rte->updatedCols = NULL;
+	rte->returningCols = NULL;
 	rte->extraUpdatedCols = NULL;
+	rte->scanCols = NULL;
 
 	/*
 	 * Add completed RTE to pstate's range table list, so that we know its
@@ -2166,7 +2176,9 @@ addRangeTableEntryForValues(ParseState *pstate,
 	rte->selectedCols = NULL;
 	rte->insertedCols = NULL;
 	rte->updatedCols = NULL;
+	rte->returningCols = NULL;
 	rte->extraUpdatedCols = NULL;
+	rte->scanCols = NULL;
 
 	/*
 	 * Add completed RTE to pstate's range table list, so that we know its
@@ -2263,7 +2275,9 @@ addRangeTableEntryForJoin(ParseState *pstate,
 	rte->selectedCols = NULL;
 	rte->insertedCols = NULL;
 	rte->updatedCols = NULL;
+	rte->returningCols = NULL;
 	rte->extraUpdatedCols = NULL;
+	rte->scanCols = NULL;
 
 	/*
 	 * Add completed RTE to pstate's range table list, so that we know its
@@ -2413,7 +2427,9 @@ addRangeTableEntryForCTE(ParseState *pstate,
 	rte->selectedCols = NULL;
 	rte->insertedCols = NULL;
 	rte->updatedCols = NULL;
+	rte->returningCols = NULL;
 	rte->extraUpdatedCols = NULL;
+	rte->scanCols = NULL;
 
 	/*
 	 * Add completed RTE to pstate's range table list, so that we know its
diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c
index 091d6e886b..5f53690ec7 100644
--- a/src/backend/partitioning/partbounds.c
+++ b/src/backend/partitioning/partbounds.c
@@ -3322,6 +3322,7 @@ check_default_partition_contents(Relation parent, Relation default_rel,
 		TableScanDesc scan;
 		MemoryContext oldCxt;
 		TupleTableSlot *tupslot;
+		Bitmapset *proj = NULL;
 
 		/* Lock already taken above. */
 		if (part_relid != RelationGetRelid(default_rel))
@@ -3386,7 +3387,16 @@ check_default_partition_contents(Relation parent, Relation default_rel,
 		econtext = GetPerTupleExprContext(estate);
 		snapshot = RegisterSnapshot(GetLatestSnapshot());
 		tupslot = table_slot_create(part_rel, &estate->es_tupleTable);
-		scan = table_beginscan(part_rel, snapshot, 0, NULL);
+		/* scan = table_beginscan(part_rel, snapshot, 0, NULL); */
+        if (table_scans_leverage_column_projection(part_rel))
+        {
+            PopulateNeededColumnsForNode((Node*)partqualstate->expr, tupslot->tts_tupleDescriptor->natts, &proj);
+            scan = table_beginscan_with_column_projection(part_rel, snapshot, 0, NULL, proj);
+        }
+        else
+        {
+            scan = table_beginscan(part_rel, snapshot, 0, NULL);
+        }
 
 		/*
 		 * Switch to per-tuple memory context and reset it for each tuple
@@ -3417,6 +3427,9 @@ check_default_partition_contents(Relation parent, Relation default_rel,
 
 		if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel))
 			table_close(part_rel, NoLock);	/* keep the lock until commit */
+
+        if (proj)
+            pfree(proj);
 	}
 }
 
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index 29ae27e5e3..0a636c44b5 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -1759,7 +1759,9 @@ ApplyRetrieveRule(Query *parsetree,
 			rte->selectedCols = NULL;
 			rte->insertedCols = NULL;
 			rte->updatedCols = NULL;
+			rte->returningCols = NULL;
 			rte->extraUpdatedCols = NULL;
+			rte->scanCols = NULL;
 
 			/*
 			 * For the most part, Vars referencing the view should remain as
@@ -1860,14 +1862,18 @@ ApplyRetrieveRule(Query *parsetree,
 	subrte->selectedCols = rte->selectedCols;
 	subrte->insertedCols = rte->insertedCols;
 	subrte->updatedCols = rte->updatedCols;
+	subrte->returningCols = rte->returningCols;
 	subrte->extraUpdatedCols = rte->extraUpdatedCols;
+	subrte->scanCols = rte->scanCols;
 
 	rte->requiredPerms = 0;		/* no permission check on subquery itself */
 	rte->checkAsUser = InvalidOid;
 	rte->selectedCols = NULL;
 	rte->insertedCols = NULL;
 	rte->updatedCols = NULL;
+	rte->returningCols = NULL;
 	rte->extraUpdatedCols = NULL;
+	rte->scanCols = NULL;
 
 	return parsetree;
 }
@@ -3218,6 +3224,7 @@ rewriteTargetView(Query *parsetree, Relation view)
 	 * base_rte instead of copying it.
 	 */
 	new_rte = base_rte;
+	new_rte->returningCols = bms_copy(view_rte->returningCols);
 	new_rte->rellockmode = RowExclusiveLock;
 
 	parsetree->rtable = lappend(parsetree->rtable, new_rte);
@@ -3576,6 +3583,8 @@ rewriteTargetView(Query *parsetree, Relation view)
 		}
 	}
 
+	new_rte->returningCols = bms_copy(view_rte->returningCols);
+
 	table_close(base_rel, NoLock);
 
 	return parsetree;
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 9df4e7cb0a..fb538fe4e7 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -267,6 +267,7 @@ typedef struct TableAmRoutine
 {
 	/* this must be set to T_TableAmRoutine */
 	NodeTag		type;
+	bool scans_leverage_column_projection;
 
 
 	/* ------------------------------------------------------------------------
@@ -307,6 +308,30 @@ typedef struct TableAmRoutine
 								 ParallelTableScanDesc pscan,
 								 uint32 flags);
 
+    /*
+     * Variant of scan_begin() with a column projection bitmap that lists the
+     * ordinal attribute numbers to be fetched during the scan.
+     *
+     * If project_columns is an empty bitmap, none of the data columns are to be
+     * fetched.
+     *
+     * If project_columns is a singleton bitmap with a whole-row reference (0),
+     * all of the data columns are to be fetched.
+     *
+     * Please note: project_cols only deals with non system columns (attnum >= 0)
+     *
+     * Please note: Due to the limitations of the slot_get***() APIs, the
+     * scan_getnextslot() tableAM call must return a TupleTableSlot that is densely
+     * populated (missing cols indicated with isnull = true upto the largest
+     * attno in the projection list)
+     */
+    TableScanDesc (*scan_begin_with_column_projection) (Relation relation,
+                                                        Snapshot snapshot,
+                                                        int nkeys, struct ScanKeyData *key,
+                                                        ParallelTableScanDesc parallel_scan,
+                                                        uint32 flags,
+                                                        Bitmapset *project_columns);
+
 	/*
 	 * Release resources and deallocate scan. If TableScanDesc.temp_snap,
 	 * TableScanDesc.rs_snapshot needs to be unregistered.
@@ -410,6 +435,26 @@ typedef struct TableAmRoutine
 	 */
 	void		(*index_fetch_end) (struct IndexFetchTableData *data);
 
+    /*
+     * Set up a column projection list that can be used by index_fetch_tuple()
+     * to fetch a subset of columns for a tuple.
+     *
+     * If project_columns is an empty bitmap, none of the data columns are to be
+     * fetched.
+     *
+     * If project_columns is a singleton bitmap with a whole-row reference (0),
+     * all of the data columns are to be fetched.
+     *
+     * Please note: project_columns only deals with non system columns (attnum >= 0)
+     *
+     * Please note: Due to the limitations of the slot_get***() APIs,
+     * index_fetch_tuple() must return a TupleTableSlot that is densely
+     * populated (missing cols indicated with isnull = true upto the largest
+     * attno in the projection list)
+     */
+    void (*index_fetch_set_column_projection) (struct IndexFetchTableData *data,
+                                               Bitmapset *project_columns);
+
 	/*
 	 * Fetch tuple at `tid` into `slot`, after doing a visibility test
 	 * according to `snapshot`. If a tuple was found and passed the visibility
@@ -446,11 +491,27 @@ typedef struct TableAmRoutine
 	 * Fetch tuple at `tid` into `slot`, after doing a visibility test
 	 * according to `snapshot`. If a tuple was found and passed the visibility
 	 * test, returns true, false otherwise.
+     *
+     * project_cols is a set of columns to be fetched for the given row.
+     *
+     * If project_cols is an empty bitmap, none of the data columns are to be
+     * fetched.
+     *
+     * If project_cols is a singleton bitmap with a whole-row reference (0),
+     * all of the data columns are to be fetched.
+     *
+     * Please note: project_cols only deals with non system columns (attnum >= 0)
+     *
+     * Please note: Due to the limitations of the slot_get***() APIs,
+     * tuple_fetch_row_version() must return a TupleTableSlot that is densely
+     * populated (missing cols indicated with isnull = true upto the largest
+     * attno in the projection list)
 	 */
 	bool		(*tuple_fetch_row_version) (Relation rel,
 											ItemPointer tid,
 											Snapshot snapshot,
-											TupleTableSlot *slot);
+											TupleTableSlot *slot,
+											Bitmapset *project_cols);
 
 	/*
 	 * Is tid valid for a scan of this relation.
@@ -537,7 +598,8 @@ typedef struct TableAmRoutine
 							   LockTupleMode mode,
 							   LockWaitPolicy wait_policy,
 							   uint8 flags,
-							   TM_FailureData *tmfd);
+							   TM_FailureData *tmfd,
+							   Bitmapset *project_cols);
 
 	/*
 	 * Perform operations necessary to complete insertions made via
@@ -891,6 +953,25 @@ table_beginscan(Relation rel, Snapshot snapshot,
 	return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
 }
 
+static inline TableScanDesc
+table_beginscan_with_column_projection(Relation relation, Snapshot snapshot,
+                                       int nkeys, struct ScanKeyData *key,
+                                       Bitmapset *project_column)
+{
+    uint32          flags = SO_TYPE_SEQSCAN |
+        SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
+
+    Assert(relation->rd_tableam->scans_leverage_column_projection);
+    return relation->rd_tableam->scan_begin_with_column_projection(
+        relation, snapshot, nkeys, key, NULL, flags, project_column);
+}
+
+static inline bool
+table_scans_leverage_column_projection(Relation relation)
+{
+    return relation->rd_tableam->scans_leverage_column_projection;
+}
+
 /*
  * Like table_beginscan(), but for scanning catalog. It'll automatically use a
  * snapshot appropriate for scanning catalog relations.
@@ -1134,7 +1215,8 @@ extern void table_parallelscan_initialize(Relation rel,
  * Caller must hold a suitable lock on the relation.
  */
 extern TableScanDesc table_beginscan_parallel(Relation rel,
-											  ParallelTableScanDesc pscan);
+											  ParallelTableScanDesc pscan,
+											  Bitmapset *proj);
 
 /*
  * Restart a parallel scan.  Call this in the leader process.  Caller is
@@ -1184,6 +1266,13 @@ table_index_fetch_end(struct IndexFetchTableData *scan)
 	scan->rel->rd_tableam->index_fetch_end(scan);
 }
 
+static inline void
+table_index_fetch_set_column_projection(struct IndexFetchTableData *scan,
+                                        Bitmapset *project_column)
+{
+    scan->rel->rd_tableam->index_fetch_set_column_projection(scan, project_column);
+}
+
 /*
  * Fetches, as part of an index scan, tuple at `tid` into `slot`, after doing
  * a visibility test according to `snapshot`. If a tuple was found and passed
@@ -1259,7 +1348,8 @@ static inline bool
 table_tuple_fetch_row_version(Relation rel,
 							  ItemPointer tid,
 							  Snapshot snapshot,
-							  TupleTableSlot *slot)
+							  TupleTableSlot *slot,
+							  Bitmapset *project_cols)
 {
 	/*
 	 * We don't expect direct calls to table_tuple_fetch_row_version with
@@ -1269,7 +1359,7 @@ table_tuple_fetch_row_version(Relation rel,
 	if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
 		elog(ERROR, "unexpected table_tuple_fetch_row_version call during logical decoding");
 
-	return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot);
+	return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot, project_cols);
 }
 
 /*
@@ -1530,6 +1620,20 @@ table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
  *		also lock descendant tuples if lock modes don't conflict.
  *		If TUPLE_LOCK_FLAG_FIND_LAST_VERSION, follow the update chain and lock
  *		latest version.
+ *     project_cols: It is a set of columns to be fetched for the tuple being locked.
+ *
+ *      If project_cols is an empty bitmap, none of the data columns are to be
+ *      fetched.
+ *
+ *      If project_cols is a singleton bitmap with a whole-row reference (0),
+ *      all of the data columns are to be fetched.
+ *
+ *      Please note: project_cols only deals with non system columns (attnum >= 0)
+ *
+ *      Please note: Due to the limitations of the slot_get***() APIs,
+ *      tuple_lock() must return a TupleTableSlot that is densely
+ *      populated (missing cols indicated with isnull = true upto the largest
+ *      attno in the projection list)
  *
  * Output parameters:
  *	*slot: contains the target tuple
@@ -1551,11 +1655,11 @@ static inline TM_Result
 table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot,
 				 TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
 				 LockWaitPolicy wait_policy, uint8 flags,
-				 TM_FailureData *tmfd)
+				 TM_FailureData *tmfd, Bitmapset *project_cols)
 {
 	return rel->rd_tableam->tuple_lock(rel, tid, snapshot, slot,
 									   cid, mode, wait_policy,
-									   flags, tmfd);
+									   flags, tmfd, project_cols);
 }
 
 /*
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index d68a6b9d28..651d294421 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -651,6 +651,13 @@ extern void CheckCmdReplicaIdentity(Relation rel, CmdType cmd);
 extern void CheckSubscriptionRelkind(char relkind, const char *nspname,
 									 const char *relname);
 
+extern void
+	PopulateNeededColumnsForNode(Node *expr, int ncol, Bitmapset **scanCols);
+extern Bitmapset *
+	PopulateNeededColumnsForScan(ScanState *scanstate, int ncol);
+extern Bitmapset *PopulateNeededColumnsForEPQ(EPQState *epqstate, int ncol);
+extern void PopulateNeededColumnsForOnConflictUpdate(ResultRelInfo *resultRelInfo);
+
 /*
  * prototypes from functions in nodeModifyTable.c
  */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 01b1727fc0..bace99d311 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -402,6 +402,7 @@ typedef struct OnConflictSetState
 	TupleTableSlot *oc_ProjSlot;	/* CONFLICT ... SET ... projection target */
 	ProjectionInfo *oc_ProjInfo;	/* for ON CONFLICT DO UPDATE SET */
 	ExprState  *oc_WhereClause; /* state for the WHERE clause */
+	Bitmapset  *proj_cols; /* cols to be scanned during the operation */
 } OnConflictSetState;
 
 /* ----------------
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index b376031856..98a7cc2fdf 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -1181,6 +1181,20 @@ typedef struct RangeTblEntry
 	Bitmapset  *updatedCols;	/* columns needing UPDATE permission */
 	Bitmapset  *extraUpdatedCols;	/* generated columns being updated */
 	List	   *securityQuals;	/* security barrier quals to apply, if any */
+
+    /*
+     * scanCols: Columns to be retrieved during a physical scan.
+     * returningCols: Columns to be retrieved to satisfy the RETURNING clause.
+     *
+     * Please note: These bitmaps only deal with non-system columns (attnum >= 0)
+     *
+     * These bitmaps have some special values:
+     * - A singleton bitmap with the element 0 indicates that all non-system
+     *   columns must be fetched.
+     * - An empty bitmap indicates that no non-system column must be fetched.
+     */
+    Bitmapset  *scanCols;           /* columns to be fetched during a physical scan */
+    Bitmapset  *returningCols;      /* columns in the RETURNING clause */
 } RangeTblEntry;
 
 /*
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 7dc401cf0d..6d709a98e1 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -703,6 +703,20 @@ RelationGetSmgr(Relation rel)
 	 (relation)->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&	\
 	 !IsCatalogRelation(relation))
 
+static inline bool
+contains_whole_row_col(Bitmapset *cols)
+{
+    return bms_is_member(0, cols);
+}
+
+static inline Bitmapset *
+get_ordinal_attnos(Relation rel)
+{
+    Bitmapset *attnos = NULL;
+    attnos = bms_add_range(attnos, 1, RelationGetDescr(rel)->natts);
+    return attnos;
+}
+
 /* routines in utils/cache/relcache.c */
 extern void RelationIncrementReferenceCount(Relation rel);
 extern void RelationDecrementReferenceCount(Relation rel);
-- 
2.25.1

