From fd6fb3028c1c9f7fcb41d651a324b1b1eb4ab2ce Mon Sep 17 00:00:00 2001
From: kommih <haribabuk@fast.au.fujitsu.com>
Date: Wed, 29 Aug 2018 13:52:39 +1000
Subject: [PATCH 2/2] copy memory limit fix

To limit memory used by the COPY FROM because of slotification,
calculates the tuple size of the first tuple in the batch and
use that for remaining batch, so that it almost averages the
memory usage by the COPY command.
---
 src/backend/commands/copy.c | 61 ++++++++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 22 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index c9272b344a..1e2d5ebb50 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -308,7 +308,7 @@ static void CopyFromInsertBatch(CopyState cstate, EState *estate,
 					CommandId mycid, int hi_options,
 					ResultRelInfo *resultRelInfo,
 					BulkInsertState bistate,
-					int nBufferedTuples, TupleTableSlot **bufferedSlots,
+					int nBufferedSlots, TupleTableSlot **bufferedSlots,
 					uint64 firstBufferedLineNo);
 static bool CopyReadLine(CopyState cstate);
 static bool CopyReadLineText(CopyState cstate);
@@ -2309,11 +2309,12 @@ CopyFrom(CopyState cstate)
 	void       *bistate;
 	uint64		processed = 0;
 	bool		useHeapMultiInsert;
-	int			nBufferedTuples = 0;
+	int			nBufferedSlots = 0;
 	int			prev_leaf_part_index = -1;
 
-#define MAX_BUFFERED_TUPLES 1000
+#define MAX_BUFFERED_SLOTS 1000
 	TupleTableSlot  **bufferedSlots = NULL;	/* initialize to silence warning */
+	Size		bufferedSlotsSize = 0;
 	uint64		firstBufferedLineNo = 0;
 
 	Assert(cstate->rel);
@@ -2524,7 +2525,7 @@ CopyFrom(CopyState cstate)
 	else
 	{
 		useHeapMultiInsert = true;
-		bufferedSlots = palloc0(MAX_BUFFERED_TUPLES * sizeof(TupleTableSlot *));
+		bufferedSlots = palloc0(MAX_BUFFERED_SLOTS * sizeof(TupleTableSlot *));
 	}
 
 	/*
@@ -2562,7 +2563,7 @@ CopyFrom(CopyState cstate)
 
 		CHECK_FOR_INTERRUPTS();
 
-		if (nBufferedTuples == 0)
+		if (nBufferedSlots == 0)
 		{
 			/*
 			 * Reset the per-tuple exprcontext. We can only do this if the
@@ -2577,14 +2578,14 @@ CopyFrom(CopyState cstate)
 			myslot = singleslot;
 			Assert(myslot != NULL);
 		}
-		else if (bufferedSlots[nBufferedTuples] == NULL)
+		else if (bufferedSlots[nBufferedSlots] == NULL)
 		{
 			myslot = table_gimmegimmeslot(resultRelInfo->ri_RelationDesc,
 										  &estate->es_tupleTable);
-			bufferedSlots[nBufferedTuples] = myslot;
+			bufferedSlots[nBufferedSlots] = myslot;
 		}
 		else
-			myslot = bufferedSlots[nBufferedTuples];
+			myslot = bufferedSlots[nBufferedSlots];
 
 		/* Switch into its memory context */
 		MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
@@ -2750,27 +2751,43 @@ CopyFrom(CopyState cstate)
 
 				if (useHeapMultiInsert)
 				{
+					int tup_size;
+
 					/* Add this tuple to the tuple buffer */
-					if (nBufferedTuples == 0)
+					if (nBufferedSlots == 0)
+					{
 						firstBufferedLineNo = cstate->cur_lineno;
-					Assert(bufferedSlots[nBufferedTuples] == myslot);
-					nBufferedTuples++;
+
+						/*
+						 * Find out the Tuple size of the first tuple in a batch and
+						 * use it for the rest tuples in a batch. There may be scenarios
+						 * where the first tuple is very small and rest can be large, but
+						 * that's rare and this should work for majority of the scenarios.
+						 */
+						tup_size = heap_compute_data_size(myslot->tts_tupleDescriptor,
+														  myslot->tts_values,
+														  myslot->tts_isnull);
+					}
+
+					Assert(bufferedSlots[nBufferedSlots] == myslot);
+					nBufferedSlots++;
+					bufferedSlotsSize += tup_size;
 
 					/*
 					 * If the buffer filled up, flush it.  Also flush if the
 					 * total size of all the tuples in the buffer becomes
 					 * large, to avoid using large amounts of memory for the
 					 * buffer when the tuples are exceptionally wide.
-					 *
-					 * PBORKED: Re-introduce size limit
 					 */
-					if (nBufferedTuples == MAX_BUFFERED_TUPLES)
+					if (nBufferedSlots == MAX_BUFFERED_SLOTS ||
+						bufferedSlotsSize > 65535)
 					{
 						CopyFromInsertBatch(cstate, estate, mycid, hi_options,
 											resultRelInfo, bistate,
-											nBufferedTuples, bufferedSlots,
+											nBufferedSlots, bufferedSlots,
 											firstBufferedLineNo);
-						nBufferedTuples = 0;
+						nBufferedSlots = 0;
+						bufferedSlotsSize = 0;
 					}
 				}
 				else
@@ -2836,10 +2853,10 @@ next_tuple:
 	}
 
 	/* Flush any remaining buffered tuples */
-	if (nBufferedTuples > 0)
+	if (nBufferedSlots > 0)
 		CopyFromInsertBatch(cstate, estate, mycid, hi_options,
 							resultRelInfo, bistate,
-							nBufferedTuples, bufferedSlots,
+							nBufferedSlots, bufferedSlots,
 							firstBufferedLineNo);
 
 	/* Done, clean up */
@@ -2899,7 +2916,7 @@ next_tuple:
 static void
 CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid,
 					int hi_options, ResultRelInfo *resultRelInfo,
-					BulkInsertState bistate, int nBufferedTuples, TupleTableSlot **bufferedSlots,
+					BulkInsertState bistate, int nBufferedSlots, TupleTableSlot **bufferedSlots,
 					uint64 firstBufferedLineNo)
 {
 	MemoryContext oldcontext;
@@ -2920,7 +2937,7 @@ CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid,
 	oldcontext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
 	table_multi_insert(cstate->rel,
 					   bufferedSlots,
-					   nBufferedTuples,
+					   nBufferedSlots,
 					   mycid,
 					   hi_options,
 					   bistate);
@@ -2932,7 +2949,7 @@ CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid,
 	 */
 	if (resultRelInfo->ri_NumIndices > 0)
 	{
-		for (i = 0; i < nBufferedTuples; i++)
+		for (i = 0; i < nBufferedSlots; i++)
 		{
 			List	   *recheckIndexes;
 
@@ -2954,7 +2971,7 @@ CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid,
 			 (resultRelInfo->ri_TrigDesc->trig_insert_after_row ||
 			  resultRelInfo->ri_TrigDesc->trig_insert_new_table))
 	{
-		for (i = 0; i < nBufferedTuples; i++)
+		for (i = 0; i < nBufferedSlots; i++)
 		{
 			cstate->cur_lineno = firstBufferedLineNo + i;
 			ExecARInsertTriggers(estate, resultRelInfo,
-- 
2.18.0.windows.1

