From c39f3ef547d03e02f2dfca6d011ca65aa7e6c8c3 Mon Sep 17 00:00:00 2001
From: Andrey <amborodin@acm.org>
Date: Sun, 25 Aug 2019 14:18:31 +0500
Subject: [PATCH 3/3] Implement GiST build using sort support

---
 src/backend/access/gist/gistbuild.c | 204 +++++++++++++++++++++++++++-
 src/backend/access/gist/gistutil.c  |  30 +++-
 src/backend/utils/sort/tuplesort.c  |  82 +++++++++++
 src/include/access/gist_private.h   |   3 +
 src/include/utils/tuplesort.h       |   6 +
 5 files changed, 312 insertions(+), 13 deletions(-)

diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c
index e7112590b8..b49875e731 100644
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -31,6 +31,7 @@
 #include "utils/memutils.h"
 #include "utils/rel.h"
 #include "utils/regproc.h"
+#include "utils/tuplesort.h"
 
 /* Step of index tuples for check whether to switch to buffering build mode */
 #define BUFFERING_MODE_SWITCH_CHECK_STEP 256
@@ -56,6 +57,15 @@ typedef enum
 	GIST_BUFFERING_ACTIVE		/* in buffering build mode */
 } GistBufferingMode;
 
+/*
+ * Status record for spooling/sorting phase.
+ */
+typedef struct
+{
+	Tuplesortstate *sortstate;	/* state data for tuplesort.c */
+	Relation	index;
+} GSpool;
+
 /* Working state for gistbuild and its callback */
 typedef struct
 {
@@ -77,7 +87,7 @@ typedef struct
 	HTAB	   *parentMap;
 
 	GistBufferingMode bufferingMode;
-	FmgrInfo	sortFn;
+	GSpool *spool;
 } GISTBuildState;
 
 /* prototypes for private functions */
@@ -111,6 +121,173 @@ static void gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child,
 static void gistMemorizeAllDownlinks(GISTBuildState *buildstate, Buffer parent);
 static BlockNumber gistGetParent(GISTBuildState *buildstate, BlockNumber child);
 
+static GSpool *gist_spoolinit(Relation heap, Relation index, SortSupport ssup);
+static void gist_spool(GSpool *gspool, ItemPointer self, Datum *values, bool *isnull);
+static void gist_indexsortbuild(GISTBuildState *state);
+static void gist_indexsortbuild_flush(Relation rel, Page page, BlockNumber blockno, bool isNew);
+
+/*
+ * create and initialize a spool structure to sort tuples
+ */
+static GSpool *
+gist_spoolinit(Relation heap, Relation index, SortSupport ssup)
+{
+	GSpool	   *gspool = (GSpool *) palloc0(sizeof(GSpool));
+
+	gspool->index = index;
+
+	/*
+	 * We size the sort area as maintenance_work_mem rather than work_mem to
+	 * speed index creation.  This should be OK since a single backend can't
+	 * run multiple index creations in parallel.
+	 */
+	gspool->sortstate = tuplesort_begin_index_gist(heap,
+												   index,
+												   ssup,
+												   maintenance_work_mem,
+												   NULL,
+												   false);
+
+	return gspool;
+}
+
+/*
+ * Flush page contents to actual page at blockno
+ * We have expected block number, because GiST build relies on that pages
+ * will be allocated in continous segments. This simplifies allocation
+ * logic.
+ */
+static void
+gist_indexsortbuild_flush(Relation rel, Page page, BlockNumber blockno, bool isNew)
+{
+	OffsetNumber i,
+				maxoff;
+	Page newpage;
+
+	Buffer buffer = ReadBuffer(rel, isNew ? P_NEW : blockno);
+	GISTInitBuffer(buffer, GistPageIsLeaf(page) ? F_LEAF : 0);
+	/* If the page is new - check that it was allocated correctly */
+	Assert(BufferGetBlockNumber(buffer) == blockno);
+
+	LockBuffer(buffer, GIST_EXCLUSIVE);
+	newpage = BufferGetPage(buffer);
+
+	maxoff = PageGetMaxOffsetNumber(page);
+	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+	{
+		ItemId itemid = PageGetItemId(page, i);
+		IndexTuple tuple = (IndexTuple) PageGetItem(page, itemid);
+		gistfillbuffer(newpage,	&tuple,
+						1, InvalidOffsetNumber);
+	}
+	UnlockReleaseBuffer(buffer);
+}
+
+/*
+ * given a spool loaded by successive calls to _h_spool,
+ * create an entire index.
+ */
+static void
+gist_indexsortbuild(GISTBuildState *state)
+{
+	GSpool *gspool = state->spool;
+	/*
+	 * Build constructs GiST by levels. Level is always allocated
+	 * sequentially from level_start until level_end.
+	 */
+	BlockNumber level_start = GIST_ROOT_BLKNO + 1;
+	BlockNumber level_end = level_start;
+	BlockNumber prev_level_start;
+	IndexTuple	itup;
+	/*
+	 * We keep one page in memory for the special case
+	 * When layer will have only on page - we will place it
+	 * to ROOT_BLOCK_NO
+	 */
+	Page page = palloc(BLCKSZ);
+	gistinitpage(page, F_LEAF);
+
+	tuplesort_performsort(gspool->sortstate);
+
+	/* Create a first layer of leaf pages */
+	while ((itup = tuplesort_getindextuple(gspool->sortstate, true)) != NULL)
+	{
+		if (PageGetFreeSpace(page) >= IndexTupleSize(itup) + sizeof(ItemIdData))
+		{
+			gistfillbuffer(page, &itup, 1, InvalidOffsetNumber);
+		}
+		else
+		{
+			gist_indexsortbuild_flush(state->indexrel, page, level_end, true);
+			level_end++;
+			gistinitpage(page, F_LEAF);
+			gistfillbuffer(page, &itup, 1, InvalidOffsetNumber);
+		}
+	}
+
+	/* Construct internal levels */
+	do
+	{
+		/* If previous level had only one page - that page is a root */
+		if (level_start == level_end)
+		{
+			gist_indexsortbuild_flush(state->indexrel, page, GIST_ROOT_BLKNO,
+										false);
+			return;
+		}
+
+		gist_indexsortbuild_flush(state->indexrel, page, level_end, true);
+		level_end++;
+		gistinitpage(page, 0);
+		
+		prev_level_start = level_start;
+		level_start = level_end;
+
+		for (BlockNumber i = prev_level_start; i < level_start; i++)
+		{
+			/* For each page on previous level we form one tuple */
+			Buffer lower_buffer = ReadBuffer(state->indexrel, i);
+			Page lower_page;
+			IndexTuple union_tuple;
+			MemoryContext oldCtx = MemoryContextSwitchTo(state->giststate->tempCxt);
+			int vect_len;
+			LockBuffer(lower_buffer, GIST_SHARE);
+			lower_page = BufferGetPage(lower_buffer);
+
+			IndexTuple *itvec = gistextractpage(lower_page, &vect_len);
+			union_tuple = gistunion(state->indexrel, itvec, vect_len,
+									state->giststate);
+			ItemPointerSetBlockNumber(&(union_tuple->t_tid), i);
+
+			if (PageGetFreeSpace(page) >= IndexTupleSize(union_tuple) + sizeof(ItemIdData))
+			{
+				gistfillbuffer(page, &union_tuple, 1, InvalidOffsetNumber);
+			}
+			else
+			{
+				gist_indexsortbuild_flush(state->indexrel, page, level_end, true);
+				level_end++;
+				gistinitpage(page, 0);
+				gistfillbuffer(page, &union_tuple, 1, InvalidOffsetNumber);
+			}
+
+			UnlockReleaseBuffer(lower_buffer);
+			MemoryContextSwitchTo(oldCtx);
+			MemoryContextReset(state->giststate->tempCxt);
+		}
+	} while (true);
+}
+
+/*
+ * spool an index entry into the sort file.
+ */
+static void
+gist_spool(GSpool *gspool, ItemPointer self, Datum *values, bool *isnull)
+{
+	tuplesort_putindextuplevalues(gspool->sortstate, gspool->index,
+								  self, values, isnull);
+}
+
 /*
  * Main entry point to GiST index build. Initially calls insert over and over,
  * but switches to more efficient buffering build algorithm after a certain
@@ -129,7 +306,7 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 
 	buildstate.indexrel = index;
 	buildstate.heaprel = heap;
-	buildstate.sortFn.fn_oid = InvalidOid;
+	buildstate.spool = NULL;
 	if (index->rd_options)
 	{
 		/* Get buffering mode from the options string */
@@ -144,13 +321,16 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 			buildstate.bufferingMode = GIST_BUFFERING_AUTO;
 
 		if (options->vl_len_ > offsetof(GiSTOptions, buildSortFunctionOffset) &&
-			options->bufferingModeOffset > 0)
+			options->buildSortFunctionOffset > 0)
 		{
 			char	*sortFuncName = (char *) options + options->buildSortFunctionOffset;
 			Oid		 argList[1] = {INTERNALOID};
 			List	*namelist = stringToQualifiedNameList(sortFuncName);
 			Oid sortFuncOid = LookupFuncName(namelist, 1, argList, false);
-			fmgr_info(sortFuncOid, &buildstate.sortFn);
+			SortSupport sort = palloc0(sizeof(SortSupportData));
+			OidFunctionCall1(sortFuncOid, PointerGetDatum(sort));
+			buildstate.bufferingMode = GIST_BUFFERING_DISABLED;
+			buildstate.spool = gist_spoolinit(heap, index, sort);
 		}
 
 		fillfactor = options->fillfactor;
@@ -223,6 +403,12 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 		gistFreeBuildBuffers(buildstate.gfbb);
 	}
 
+	if (buildstate.spool)
+	{
+		gist_indexsortbuild(&buildstate);
+		tuplesort_end(buildstate.spool->sortstate);
+	}
+
 	/* okay, all heap tuples are indexed */
 	MemoryContextSwitchTo(oldcxt);
 	MemoryContextDelete(buildstate.giststate->tempCxt);
@@ -498,14 +684,20 @@ gistBuildCallback(Relation index,
 	GISTBuildState *buildstate = (GISTBuildState *) state;
 	IndexTuple	itup;
 	MemoryContext oldCtx;
+	Datum compressed_values[INDEX_MAX_KEYS];
 
 	oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);
 
 	/* form an index tuple and point it at the heap tuple */
-	itup = gistFormTuple(buildstate->giststate, index, values, isnull, true);
+	itup = gistCompressValuesAndFormTuple(buildstate->giststate, index, values, isnull, true, compressed_values);
 	itup->t_tid = htup->t_self;
 
-	if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE)
+	if (buildstate->spool)
+	{
+		if (tupleIsAlive)
+			gist_spool(buildstate->spool, (ItemPointer)itup, compressed_values, isnull);
+	}
+	else if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE)
 	{
 		/* We have buffers, so use them. */
 		gistBufferingBuildInsert(buildstate, itup);
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index 6e2ff95b5c..330ef3a2af 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -174,6 +174,7 @@ gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len,
 
 			datum = index_getattr(itvec[j], i + 1, giststate->leafTupdesc,
 								  &IsNull);
+
 			if (IsNull)
 				continue;
 
@@ -576,6 +577,13 @@ gistFormTuple(GISTSTATE *giststate, Relation r,
 			  Datum attdata[], bool isnull[], bool isleaf)
 {
 	Datum		compatt[INDEX_MAX_KEYS];
+	return gistCompressValuesAndFormTuple(giststate, r, attdata, isnull, isleaf, compatt);
+}
+
+IndexTuple
+gistCompressValuesAndFormTuple(GISTSTATE *giststate, Relation r,
+			  Datum attdata[], bool isnull[], bool isleaf, Datum compatt[])
+{
 	int			i;
 	IndexTuple	res;
 
@@ -746,14 +754,10 @@ gistpenalty(GISTSTATE *giststate, int attno,
  * Initialize a new index page
  */
 void
-GISTInitBuffer(Buffer b, uint32 f)
+gistinitpage(Page page, uint32 f)
 {
-	GISTPageOpaque opaque;
-	Page		page;
-	Size		pageSize;
-
-	pageSize = BufferGetPageSize(b);
-	page = BufferGetPage(b);
+	GISTPageOpaque	opaque;
+	Size			pageSize = BLCKSZ;
 	PageInit(page, pageSize, sizeof(GISTPageOpaqueData));
 
 	opaque = GistPageGetOpaque(page);
@@ -764,6 +768,18 @@ GISTInitBuffer(Buffer b, uint32 f)
 	opaque->gist_page_id = GIST_PAGE_ID;
 }
 
+/*
+ * Initialize a new index buffer
+ */
+void
+GISTInitBuffer(Buffer b, uint32 f)
+{
+	Page		page;
+
+	page = BufferGetPage(b);
+	gistinitpage(page, f);
+}
+
 /*
  * Verify that a freshly-read page looks sane.
  */
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 7b8e67899e..3249f3eb69 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -630,6 +630,8 @@ static int	comparetup_index_btree(const SortTuple *a, const SortTuple *b,
 								   Tuplesortstate *state);
 static int	comparetup_index_hash(const SortTuple *a, const SortTuple *b,
 								  Tuplesortstate *state);
+static int	comparetup_index_gist(const SortTuple *a, const SortTuple *b,
+								  Tuplesortstate *state);
 static void copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup);
 static void writetup_index(Tuplesortstate *state, int tapenum,
 						   SortTuple *stup);
@@ -1095,6 +1097,43 @@ tuplesort_begin_index_hash(Relation heapRel,
 	return state;
 }
 
+Tuplesortstate *
+tuplesort_begin_index_gist(Relation heapRel,
+						   Relation indexRel,
+						   SortSupport ssup,
+						   int workMem,
+						   SortCoordinate coordinate,
+						   bool randomAccess)
+{
+	Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
+												   randomAccess);
+	MemoryContext oldcontext;
+
+	oldcontext = MemoryContextSwitchTo(state->sortcontext);
+
+#ifdef TRACE_SORT
+	if (trace_sort)
+		elog(LOG,
+			 "begin index sort: workMem = %d, randomAccess = %c",
+			 workMem, randomAccess ? 't' : 'f');
+#endif
+
+	state->nKeys = 1;			/* Only one sort column, the gist attribute */
+
+	state->comparetup = comparetup_index_gist;
+	state->copytup = copytup_index;
+	state->writetup = writetup_index;
+	state->readtup = readtup_index;
+	state->sortKeys = ssup;
+
+	state->heapRel = heapRel;
+	state->indexRel = indexRel;
+
+	MemoryContextSwitchTo(oldcontext);
+
+	return state;
+}
+
 Tuplesortstate *
 tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
 					  bool nullsFirstFlag, int workMem,
@@ -4138,6 +4177,49 @@ comparetup_index_hash(const SortTuple *a, const SortTuple *b,
 	return 0;
 }
 
+static int
+comparetup_index_gist(const SortTuple *a, const SortTuple *b,
+					  Tuplesortstate *state)
+{
+	SortSupport sortKey = state->sortKeys;
+	IndexTuple	tuple1;
+	IndexTuple	tuple2;
+	int32		compare;
+
+
+	/* Compare the leading sort key */
+	compare = ApplySortComparator(a->datum1, a->isnull1,
+								  b->datum1, b->isnull1,
+								  sortKey);
+	if (compare != 0)
+		return compare;
+
+	tuple1 = (IndexTuple) a->tuple;
+	tuple2 = (IndexTuple) b->tuple;
+	/*
+	 * If key values are equal, we sort on ItemPointer.
+	 */
+	{
+		BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid);
+		BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid);
+
+		if (blk1 != blk2)
+			return (blk1 < blk2) ? -1 : 1;
+	}
+	{
+		OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid);
+		OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid);
+
+		if (pos1 != pos2)
+			return (pos1 < pos2) ? -1 : 1;
+	}
+
+	/* ItemPointer values should never be equal */
+	Assert(false);
+
+	return 0;
+}
+
 static void
 copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup)
 {
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index b4b6883f6d..ef9afdf1ca 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -487,12 +487,15 @@ extern IndexTuple gistgetadjusted(Relation r,
 								  GISTSTATE *giststate);
 extern IndexTuple gistFormTuple(GISTSTATE *giststate,
 								Relation r, Datum *attdata, bool *isnull, bool isleaf);
+extern IndexTuple gistCompressValuesAndFormTuple(GISTSTATE *giststate, Relation r,
+							Datum attdata[], bool isnull[], bool isleaf, Datum compatt[]);
 
 extern OffsetNumber gistchoose(Relation r, Page p,
 							   IndexTuple it,
 							   GISTSTATE *giststate);
 
 extern void GISTInitBuffer(Buffer b, uint32 f);
+extern void gistinitpage(Page page, uint32 f);
 extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
 						   Datum k, Relation r, Page pg, OffsetNumber o,
 						   bool l, bool isNull);
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h
index 4521de18e1..3f1ceac3d2 100644
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -26,6 +26,7 @@
 #include "fmgr.h"
 #include "storage/dsm.h"
 #include "utils/relcache.h"
+#include "utils/sortsupport.h"
 
 
 /*
@@ -209,6 +210,11 @@ extern Tuplesortstate *tuplesort_begin_index_hash(Relation heapRel,
 												  uint32 max_buckets,
 												  int workMem, SortCoordinate coordinate,
 												  bool randomAccess);
+extern Tuplesortstate *tuplesort_begin_index_gist(Relation heapRel,
+												  Relation indexRel,
+												  SortSupport ssup,
+												  int workMem, SortCoordinate coordinate,
+												  bool randomAccess);
 extern Tuplesortstate *tuplesort_begin_datum(Oid datumType,
 											 Oid sortOperator, Oid sortCollation,
 											 bool nullsFirstFlag,
-- 
2.20.1

