From ca02781294be9b943ab1492fc2cf4b8b5b852244 Mon Sep 17 00:00:00 2001
From: Andrey <amborodin@acm.org>
Date: Sun, 25 Aug 2019 14:18:31 +0500
Subject: [PATCH 2/2] Implement GiST build using sort support

---
 src/backend/access/gist/gistbuild.c    | 219 ++++++++++++++++++++++++-
 src/backend/access/gist/gistutil.c     |  30 +++-
 src/backend/access/gist/gistvalidate.c |   7 +-
 src/backend/utils/sort/tuplesort.c     |  37 +++++
 src/include/access/gist.h              |   3 +-
 src/include/access/gist_private.h      |   3 +
 src/include/catalog/pg_amproc.dat      |   2 +
 src/include/utils/tuplesort.h          |   6 +
 8 files changed, 295 insertions(+), 12 deletions(-)

diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c
index ecef0ff072..8a9af71816 100644
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -28,6 +28,8 @@
 #include "storage/smgr.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
+#include "utils/regproc.h"
+#include "utils/tuplesort.h"
 
 /* Step of index tuples for check whether to switch to buffering build mode */
 #define BUFFERING_MODE_SWITCH_CHECK_STEP 256
@@ -53,6 +55,15 @@ typedef enum
 	GIST_BUFFERING_ACTIVE		/* in buffering build mode */
 } GistBufferingMode;
 
+/*
+ * Status record for spooling/sorting phase.
+ */
+typedef struct
+{
+	Tuplesortstate *sortstate;	/* state data for tuplesort.c */
+	Relation	index;
+} GSpool;
+
 /* Working state for gistbuild and its callback */
 typedef struct
 {
@@ -74,6 +85,7 @@ typedef struct
 	HTAB	   *parentMap;
 
 	GistBufferingMode bufferingMode;
+	GSpool *spool;
 } GISTBuildState;
 
 /* prototypes for private functions */
@@ -107,6 +119,173 @@ static void gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child,
 static void gistMemorizeAllDownlinks(GISTBuildState *buildstate, Buffer parent);
 static BlockNumber gistGetParent(GISTBuildState *buildstate, BlockNumber child);
 
+static GSpool *gist_spoolinit(Relation heap, Relation index, SortSupport ssup);
+static void gist_spool(GSpool *gspool, ItemPointer self, Datum *values, bool *isnull);
+static void gist_indexsortbuild(GISTBuildState *state);
+static void gist_indexsortbuild_flush(Relation rel, Page page, BlockNumber blockno, bool isNew);
+
+/*
+ * create and initialize a spool structure to sort tuples
+ */
+static GSpool *
+gist_spoolinit(Relation heap, Relation index, SortSupport ssup)
+{
+	GSpool	   *gspool = (GSpool *) palloc0(sizeof(GSpool));
+
+	gspool->index = index;
+
+	/*
+	 * We size the sort area as maintenance_work_mem rather than work_mem to
+	 * speed index creation.  This should be OK since a single backend can't
+	 * run multiple index creations in parallel.
+	 */
+	gspool->sortstate = tuplesort_begin_index_gist(heap,
+												   index,
+												   ssup,
+												   maintenance_work_mem,
+												   NULL,
+												   false);
+
+	return gspool;
+}
+
+/*
+ * Flush page contents to actual page at blockno
+ * We have expected block number, because GiST build relies on that pages
+ * will be allocated in continous segments. This simplifies allocation
+ * logic.
+ */
+static void
+gist_indexsortbuild_flush(Relation rel, Page page, BlockNumber blockno, bool isNew)
+{
+	OffsetNumber i,
+				maxoff;
+	Page newpage;
+
+	Buffer buffer = ReadBuffer(rel, isNew ? P_NEW : blockno);
+	GISTInitBuffer(buffer, GistPageIsLeaf(page) ? F_LEAF : 0);
+	/* If the page is new - check that it was allocated correctly */
+	Assert(BufferGetBlockNumber(buffer) == blockno);
+
+	LockBuffer(buffer, GIST_EXCLUSIVE);
+	newpage = BufferGetPage(buffer);
+
+	maxoff = PageGetMaxOffsetNumber(page);
+	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+	{
+		ItemId itemid = PageGetItemId(page, i);
+		IndexTuple tuple = (IndexTuple) PageGetItem(page, itemid);
+		gistfillbuffer(newpage,	&tuple,
+						1, InvalidOffsetNumber);
+	}
+	UnlockReleaseBuffer(buffer);
+}
+
+/*
+ * given a spool loaded by successive calls to _h_spool,
+ * create an entire index.
+ */
+static void
+gist_indexsortbuild(GISTBuildState *state)
+{
+	GSpool *gspool = state->spool;
+	/*
+	 * Build constructs GiST by levels. Level is always allocated
+	 * sequentially from level_start until level_end.
+	 */
+	BlockNumber level_start = GIST_ROOT_BLKNO + 1;
+	BlockNumber level_end = level_start;
+	BlockNumber prev_level_start;
+	IndexTuple	itup;
+	/*
+	 * We keep one page in memory for the special case
+	 * When layer will have only on page - we will place it
+	 * to ROOT_BLOCK_NO
+	 */
+	Page page = palloc(BLCKSZ);
+	gistinitpage(page, F_LEAF);
+
+	tuplesort_performsort(gspool->sortstate);
+
+	/* Create a first layer of leaf pages */
+	while ((itup = tuplesort_getindextuple(gspool->sortstate, true)) != NULL)
+	{
+		if (PageGetFreeSpace(page) >= IndexTupleSize(itup) + sizeof(ItemIdData) + state->freespace)
+		{
+			gistfillbuffer(page, &itup, 1, InvalidOffsetNumber);
+		}
+		else
+		{
+			gist_indexsortbuild_flush(state->indexrel, page, level_end, true);
+			level_end++;
+			gistinitpage(page, F_LEAF);
+			gistfillbuffer(page, &itup, 1, InvalidOffsetNumber);
+		}
+	}
+
+	/* Construct internal levels */
+	do
+	{
+		/* If previous level had only one page - that page is a root */
+		if (level_start == level_end)
+		{
+			gist_indexsortbuild_flush(state->indexrel, page, GIST_ROOT_BLKNO,
+										false);
+			return;
+		}
+
+		gist_indexsortbuild_flush(state->indexrel, page, level_end, true);
+		level_end++;
+		gistinitpage(page, 0);
+		
+		prev_level_start = level_start;
+		level_start = level_end;
+
+		for (BlockNumber i = prev_level_start; i < level_start; i++)
+		{
+			/* For each page on previous level we form one tuple */
+			Buffer lower_buffer = ReadBuffer(state->indexrel, i);
+			Page lower_page;
+			IndexTuple union_tuple;
+			MemoryContext oldCtx = MemoryContextSwitchTo(state->giststate->tempCxt);
+			int vect_len;
+			LockBuffer(lower_buffer, GIST_SHARE);
+			lower_page = BufferGetPage(lower_buffer);
+
+			IndexTuple *itvec = gistextractpage(lower_page, &vect_len);
+			union_tuple = gistunion(state->indexrel, itvec, vect_len,
+									state->giststate);
+			ItemPointerSetBlockNumber(&(union_tuple->t_tid), i);
+
+			if (PageGetFreeSpace(page) >= IndexTupleSize(union_tuple) + sizeof(ItemIdData) + state->freespace)
+			{
+				gistfillbuffer(page, &union_tuple, 1, InvalidOffsetNumber);
+			}
+			else
+			{
+				gist_indexsortbuild_flush(state->indexrel, page, level_end, true);
+				level_end++;
+				gistinitpage(page, 0);
+				gistfillbuffer(page, &union_tuple, 1, InvalidOffsetNumber);
+			}
+
+			UnlockReleaseBuffer(lower_buffer);
+			MemoryContextSwitchTo(oldCtx);
+			MemoryContextReset(state->giststate->tempCxt);
+		}
+	} while (true);
+}
+
+/*
+ * spool an index entry into the sort file.
+ */
+static void
+gist_spool(GSpool *gspool, ItemPointer self, Datum *values, bool *isnull)
+{
+	tuplesort_putindextuplevalues(gspool->sortstate, gspool->index,
+								  self, values, isnull);
+}
+
 /*
  * Main entry point to GiST index build. Initially calls insert over and over,
  * but switches to more efficient buffering build algorithm after a certain
@@ -121,10 +300,14 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 	Buffer		buffer;
 	Page		page;
 	MemoryContext oldcxt = CurrentMemoryContext;
-	int			fillfactor;
+	int			fillfactor, i;
+	Oid			SortSupportFnOids[INDEX_MAX_KEYS];
+	bool		hasallsortsupports = true;
+	int			keyscount = IndexRelationGetNumberOfKeyAttributes(index);
 
 	buildstate.indexrel = index;
 	buildstate.heaprel = heap;
+	buildstate.spool = NULL;
 	if (index->rd_options)
 	{
 		/* Get buffering mode from the options string */
@@ -152,6 +335,24 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 	/* Calculate target amount of free space to leave on pages */
 	buildstate.freespace = BLCKSZ * (100 - fillfactor) / 100;
 
+	for (i = 0; i < keyscount; i++)
+	{
+		SortSupportFnOids[i] = index_getprocid(index, i + 1, GIST_SORTSUPPORT_PROC);
+		if (!OidIsValid(SortSupportFnOids[i]))
+		{
+			hasallsortsupports = false;
+			break;
+		}
+	}
+	if (hasallsortsupports)
+	{
+			SortSupport sort = palloc0(sizeof(SortSupportData) * keyscount);
+			for (i = 0; i < keyscount; i++)
+				OidFunctionCall1(SortSupportFnOids[i], PointerGetDatum(sort + i));
+			buildstate.bufferingMode = GIST_BUFFERING_DISABLED;
+			buildstate.spool = gist_spoolinit(heap, index, sort);
+	}
+
 	/*
 	 * We expect to be called exactly once for any index relation. If that's
 	 * not the case, big trouble's what we have.
@@ -208,6 +409,12 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 		gistFreeBuildBuffers(buildstate.gfbb);
 	}
 
+	if (buildstate.spool)
+	{
+		gist_indexsortbuild(&buildstate);
+		tuplesort_end(buildstate.spool->sortstate);
+	}
+
 	/* okay, all heap tuples are indexed */
 	MemoryContextSwitchTo(oldcxt);
 	MemoryContextDelete(buildstate.giststate->tempCxt);
@@ -468,14 +675,20 @@ gistBuildCallback(Relation index,
 	GISTBuildState *buildstate = (GISTBuildState *) state;
 	IndexTuple	itup;
 	MemoryContext oldCtx;
+	Datum compressed_values[INDEX_MAX_KEYS];
 
 	oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);
 
 	/* form an index tuple and point it at the heap tuple */
-	itup = gistFormTuple(buildstate->giststate, index, values, isnull, true);
+	itup = gistCompressValuesAndFormTuple(buildstate->giststate, index, values, isnull, true, compressed_values);
 	itup->t_tid = htup->t_self;
 
-	if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE)
+	if (buildstate->spool)
+	{
+		if (tupleIsAlive)
+			gist_spool(buildstate->spool, (ItemPointer)itup, compressed_values, isnull);
+	}
+	else if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE)
 	{
 		/* We have buffers, so use them. */
 		gistBufferingBuildInsert(buildstate, itup);
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index 49df05653b..57fc479086 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -174,6 +174,7 @@ gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len,
 
 			datum = index_getattr(itvec[j], i + 1, giststate->leafTupdesc,
 								  &IsNull);
+
 			if (IsNull)
 				continue;
 
@@ -576,6 +577,13 @@ gistFormTuple(GISTSTATE *giststate, Relation r,
 			  Datum attdata[], bool isnull[], bool isleaf)
 {
 	Datum		compatt[INDEX_MAX_KEYS];
+	return gistCompressValuesAndFormTuple(giststate, r, attdata, isnull, isleaf, compatt);
+}
+
+IndexTuple
+gistCompressValuesAndFormTuple(GISTSTATE *giststate, Relation r,
+			  Datum attdata[], bool isnull[], bool isleaf, Datum compatt[])
+{
 	int			i;
 	IndexTuple	res;
 
@@ -746,14 +754,10 @@ gistpenalty(GISTSTATE *giststate, int attno,
  * Initialize a new index page
  */
 void
-GISTInitBuffer(Buffer b, uint32 f)
+gistinitpage(Page page, uint32 f)
 {
-	GISTPageOpaque opaque;
-	Page		page;
-	Size		pageSize;
-
-	pageSize = BufferGetPageSize(b);
-	page = BufferGetPage(b);
+	GISTPageOpaque	opaque;
+	Size			pageSize = BLCKSZ;
 	PageInit(page, pageSize, sizeof(GISTPageOpaqueData));
 
 	opaque = GistPageGetOpaque(page);
@@ -764,6 +768,18 @@ GISTInitBuffer(Buffer b, uint32 f)
 	opaque->gist_page_id = GIST_PAGE_ID;
 }
 
+/*
+ * Initialize a new index buffer
+ */
+void
+GISTInitBuffer(Buffer b, uint32 f)
+{
+	Page		page;
+
+	page = BufferGetPage(b);
+	gistinitpage(page, f);
+}
+
 /*
  * Verify that a freshly-read page looks sane.
  */
diff --git a/src/backend/access/gist/gistvalidate.c b/src/backend/access/gist/gistvalidate.c
index dfc1a87a75..bc65b9c6bf 100644
--- a/src/backend/access/gist/gistvalidate.c
+++ b/src/backend/access/gist/gistvalidate.c
@@ -140,6 +140,10 @@ gistvalidate(Oid opclassoid)
 											5, 5, INTERNALOID, opcintype,
 											INT2OID, OIDOID, INTERNALOID);
 				break;
+			case GIST_SORTSUPPORT_PROC:
+				ok = check_amproc_signature(procform->amproc, VOIDOID, true,
+											1, 1, INTERNALOID);
+				break;
 			default:
 				ereport(INFO,
 						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
@@ -259,7 +263,8 @@ gistvalidate(Oid opclassoid)
 			(opclassgroup->functionset & (((uint64) 1) << i)) != 0)
 			continue;			/* got it */
 		if (i == GIST_DISTANCE_PROC || i == GIST_FETCH_PROC ||
-			i == GIST_COMPRESS_PROC || i == GIST_DECOMPRESS_PROC)
+			i == GIST_COMPRESS_PROC || i == GIST_DECOMPRESS_PROC ||
+			i == GIST_SORTSUPPORT_PROC)
 			continue;			/* optional methods */
 		ereport(INFO,
 				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 7b8e67899e..018901b654 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -1095,6 +1095,43 @@ tuplesort_begin_index_hash(Relation heapRel,
 	return state;
 }
 
+Tuplesortstate *
+tuplesort_begin_index_gist(Relation heapRel,
+						   Relation indexRel,
+						   SortSupport ssup,
+						   int workMem,
+						   SortCoordinate coordinate,
+						   bool randomAccess)
+{
+	Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
+												   randomAccess);
+	MemoryContext oldcontext;
+
+	oldcontext = MemoryContextSwitchTo(state->sortcontext);
+
+#ifdef TRACE_SORT
+	if (trace_sort)
+		elog(LOG,
+			 "begin index sort: workMem = %d, randomAccess = %c",
+			 workMem, randomAccess ? 't' : 'f');
+#endif
+
+	state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);
+
+	state->comparetup = comparetup_index_btree;
+	state->copytup = copytup_index;
+	state->writetup = writetup_index;
+	state->readtup = readtup_index;
+	state->sortKeys = ssup;
+
+	state->heapRel = heapRel;
+	state->indexRel = indexRel;
+
+	MemoryContextSwitchTo(oldcontext);
+
+	return state;
+}
+
 Tuplesortstate *
 tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
 					  bool nullsFirstFlag, int workMem,
diff --git a/src/include/access/gist.h b/src/include/access/gist.h
index 6902f4115b..eeea53a8f1 100644
--- a/src/include/access/gist.h
+++ b/src/include/access/gist.h
@@ -34,7 +34,8 @@
 #define GIST_EQUAL_PROC					7
 #define GIST_DISTANCE_PROC				8
 #define GIST_FETCH_PROC					9
-#define GISTNProcs					9
+#define GIST_SORTSUPPORT_PROC			10
+#define GISTNProcs					10
 
 /*
  * Page opaque data in a GiST index page.
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index f80694bf9a..b7b6ca13ac 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -486,12 +486,15 @@ extern IndexTuple gistgetadjusted(Relation r,
 								  GISTSTATE *giststate);
 extern IndexTuple gistFormTuple(GISTSTATE *giststate,
 								Relation r, Datum *attdata, bool *isnull, bool isleaf);
+extern IndexTuple gistCompressValuesAndFormTuple(GISTSTATE *giststate, Relation r,
+							Datum attdata[], bool isnull[], bool isleaf, Datum compatt[]);
 
 extern OffsetNumber gistchoose(Relation r, Page p,
 							   IndexTuple it,
 							   GISTSTATE *giststate);
 
 extern void GISTInitBuffer(Buffer b, uint32 f);
+extern void gistinitpage(Page page, uint32 f);
 extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
 						   Datum k, Relation r, Page pg, OffsetNumber o,
 						   bool l, bool isNull);
diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat
index 020b7413cc..1a52302f7e 100644
--- a/src/include/catalog/pg_amproc.dat
+++ b/src/include/catalog/pg_amproc.dat
@@ -409,6 +409,8 @@
   amproc => 'gist_point_distance' },
 { amprocfamily => 'gist/point_ops', amproclefttype => 'point',
   amprocrighttype => 'point', amprocnum => '9', amproc => 'gist_point_fetch' },
+{ amprocfamily => 'gist/point_ops', amproclefttype => 'point',
+  amprocrighttype => 'point', amprocnum => '10', amproc => 'gist_point_sortsupport' },
 { amprocfamily => 'gist/box_ops', amproclefttype => 'box',
   amprocrighttype => 'box', amprocnum => '1', amproc => 'gist_box_consistent' },
 { amprocfamily => 'gist/box_ops', amproclefttype => 'box',
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h
index 4521de18e1..3f1ceac3d2 100644
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -26,6 +26,7 @@
 #include "fmgr.h"
 #include "storage/dsm.h"
 #include "utils/relcache.h"
+#include "utils/sortsupport.h"
 
 
 /*
@@ -209,6 +210,11 @@ extern Tuplesortstate *tuplesort_begin_index_hash(Relation heapRel,
 												  uint32 max_buckets,
 												  int workMem, SortCoordinate coordinate,
 												  bool randomAccess);
+extern Tuplesortstate *tuplesort_begin_index_gist(Relation heapRel,
+												  Relation indexRel,
+												  SortSupport ssup,
+												  int workMem, SortCoordinate coordinate,
+												  bool randomAccess);
 extern Tuplesortstate *tuplesort_begin_datum(Oid datumType,
 											 Oid sortOperator, Oid sortCollation,
 											 bool nullsFirstFlag,
-- 
2.20.1

