On 12/16/2014 07:48 PM, Teodor Sigaev wrote:
/*
 * This struct is what we actually keep in index->rd_amcache.  It includes
 * static configuration information as well as the lastUsedPages cache.
 */
typedef struct SpGistCache
{
        spgConfigOut config;            /* filled in by opclass config method */

        SpGistTypeDesc attType;         /* type of input data and leaf values */
        SpGistTypeDesc attPrefixType;           /* type of inner-tuple prefix 
values */
        SpGistTypeDesc attLabelType;    /* type of node label values */

        SpGistLUPCache lastUsedPages;           /* local storage of last-used 
info */
} SpGistCache;

Now that the input data type and leaf data type can be different, which one is "attType"? It's the leaf data type, as the patch stands. I renamed that to attLeafType, and went fixing all the references to it. In most places it's just a matter of search & replace, but what about the reconstructed datum? In freeScanStackEntry, we assume that att[Leaf]Type is the datatype for reconstructedValue, but I believe assume elsewhere that reconstructedValue is of the same data type as the input. At least if the opclass supports index-only scans.

I think we'll need a separate SpGistTypeDesc for the input type. Or perhaps a separate SpGistTypeDesc for the reconstructed value and an optional decompress method to turn the reconstructedValue back into an actual reconstructed input datum. Or something like that.

Attached is a patch with the kibitzing I did so far.

- Heikki

diff --git a/doc/src/sgml/spgist.sgml b/doc/src/sgml/spgist.sgml
index 56827e5..de158c3 100644
--- a/doc/src/sgml/spgist.sgml
+++ b/doc/src/sgml/spgist.sgml
@@ -201,20 +201,21 @@
 
  <para>
   There are five user-defined methods that an index operator class for
-  <acronym>SP-GiST</acronym> must provide.  All five follow the convention
-  of accepting two <type>internal</> arguments, the first of which is a
-  pointer to a C struct containing input values for the support method,
-  while the second argument is a pointer to a C struct where output values
-  must be placed.  Four of the methods just return <type>void</>, since
-  all their results appear in the output struct; but
+  <acronym>SP-GiST</acronym> must provide and one optional. All five mandatory
+  methos follow the convention of accepting two <type>internal</> arguments,
+  the first of which is a pointer to a C struct containing input values for 
+  the support method, while the second argument is a pointer to a C struct 
+  where output values must be placed.  Four of the methods just return 
+  <type>void</>, since all their results appear in the output struct; but
   <function>leaf_consistent</> additionally returns a <type>boolean</> result.
   The methods must not modify any fields of their input structs.  In all
   cases, the output struct is initialized to zeroes before calling the
-  user-defined method.
+  user-defined method. Optional method <function>compress</> accepts
+  datum to be indexed and returns values which actually will be indexed.  
  </para>
 
  <para>
-  The five user-defined methods are:
+  The five mandatory user-defined methods are:
  </para>
 
  <variablelist>
@@ -244,6 +245,7 @@ typedef struct spgConfigOut
 {
     Oid         prefixType;     /* Data type of inner-tuple prefixes */
     Oid         labelType;      /* Data type of inner-tuple node labels */
+    Oid         leafType;       /* Data type of leaf */
     bool        canReturnData;  /* Opclass can reconstruct original data */
     bool        longValuesOK;   /* Opclass can cope with values &gt; 1 page */
 } spgConfigOut;
@@ -264,7 +266,15 @@ typedef struct spgConfigOut
       <structfield>longValuesOK</> should be set true only when the
       <structfield>attType</> is of variable length and the operator
       class is capable of segmenting long values by repeated suffixing
-      (see <xref linkend="spgist-limits">).
+      (see <xref linkend="spgist-limits">). <structfield>leafType</>
+      usually has the same value as <structfield>attType</> but if
+      it's different then optional method  <function>compress</>
+      should be provided. Method  <function>compress</> is responsible
+      for transformation from <structfield>attType</> to 
+      <structfield>leafType</>. In this case all other function should
+      accept <structfield>leafType</> values. Note: both consistent
+      functions will get <structfield>scankeys</> unchanged, without
+      <function>compress</> transformation.
      </para>
      </listitem>
     </varlistentry>
@@ -690,6 +700,24 @@ typedef struct spgLeafConsistentOut
     </varlistentry>
    </variablelist>
 
+ <para>
+  The optional user-defined method is:
+ </para>
+
+ <variablelist>
+    <varlistentry>
+     <term><function>Datum compress(Datum in)</></term>
+     <listitem>
+      <para>
+       Converts the data item into a format suitable for physical storage in 
+       an index page. It accepts <structname>spgConfigIn</>.<structfield>attType</>
+       value and return <structname>spgConfigOut</>.<structfield>leafType</>
+       value. Output value should not be toasted.
+      </para>
+     </listitem>
+    </varlistentry>
+  </variablelist>
+
   <para>
    All the SP-GiST support methods are normally called in a short-lived
    memory context; that is, <varname>CurrentMemoryContext</> will be reset
diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c
index 1a17cc4..06c0680 100644
--- a/src/backend/access/spgist/spgdoinsert.c
+++ b/src/backend/access/spgist/spgdoinsert.c
@@ -1854,21 +1854,36 @@ spgdoinsert(Relation index, SpGistState *state,
 	FmgrInfo   *procinfo = NULL;
 
 	/*
-	 * Look up FmgrInfo of the user-defined choose function once, to save
-	 * cycles in the loop below.
+	 * Prepare the leaf datum to insert.
+	 *
+	 * If there is an optional "compress" method, call it to form the leaf
+	 * datum from the input datum. Otherwise we will store the input datum as
+	 * is. (We have to detoast it, though. We assume the "compress" method to
+	 * return an untoasted value.)
 	 */
 	if (!isnull)
-		procinfo = index_getprocinfo(index, 1, SPGIST_CHOOSE_PROC);
+	{
+		if (OidIsValid(index_getprocid(index, 1, SPGIST_COMPRESS_PROC)))
+		{
+			procinfo = index_getprocinfo(index, 1, SPGIST_COMPRESS_PROC);
+			leafDatum = FunctionCall1Coll(procinfo,
+										  index->rd_indcollation[0],
+										  datum);
+		}
+		else if (state->attLeafType.attlen == -1)
+			leafDatum = PointerGetDatum(PG_DETOAST_DATUM(datum));
+		else
+			leafDatum = datum;
+	}
+	else
+		leafDatum = (Datum) 0;
 
 	/*
-	 * Since we don't use index_form_tuple in this AM, we have to make sure
-	 * value to be inserted is not toasted; FormIndexDatum doesn't guarantee
-	 * that.
+	 * Look up FmgrInfo of the user-defined choose function once, to save
+	 * cycles in the loop below.
 	 */
-	if (!isnull && state->attType.attlen == -1)
-		datum = PointerGetDatum(PG_DETOAST_DATUM(datum));
-
-	leafDatum = datum;
+	if (!isnull)
+		procinfo = index_getprocinfo(index, 1, SPGIST_CHOOSE_PROC);
 
 	/*
 	 * Compute space needed for a leaf tuple containing the given datum.
@@ -1878,7 +1893,7 @@ spgdoinsert(Relation index, SpGistState *state,
 	 */
 	if (!isnull)
 		leafSize = SGLTHDRSZ + sizeof(ItemIdData) +
-			SpGistGetTypeSize(&state->attType, leafDatum);
+			SpGistGetTypeSize(&state->attLeafType, leafDatum);
 	else
 		leafSize = SGDTSIZE + sizeof(ItemIdData);
 
@@ -2093,7 +2108,7 @@ spgdoinsert(Relation index, SpGistState *state,
 					{
 						leafDatum = out.result.matchNode.restDatum;
 						leafSize = SGLTHDRSZ + sizeof(ItemIdData) +
-							SpGistGetTypeSize(&state->attType, leafDatum);
+							SpGistGetTypeSize(&state->attLeafType, leafDatum);
 					}
 
 					/*
diff --git a/src/backend/access/spgist/spgscan.c b/src/backend/access/spgist/spgscan.c
index 35cc41b..a4c5592 100644
--- a/src/backend/access/spgist/spgscan.c
+++ b/src/backend/access/spgist/spgscan.c
@@ -39,7 +39,8 @@ typedef struct ScanStackEntry
 static void
 freeScanStackEntry(SpGistScanOpaque so, ScanStackEntry *stackEntry)
 {
-	if (!so->state.attType.attbyval &&
+	/* FIXME: Is attLeafType correct for reconstructedValue? */
+	if (!so->state.attLeafType.attbyval &&
 		DatumGetPointer(stackEntry->reconstructedValue) != NULL)
 		pfree(DatumGetPointer(stackEntry->reconstructedValue));
 	pfree(stackEntry);
@@ -539,11 +540,14 @@ redirect:
 					else
 						newEntry->level = stackEntry->level;
 					/* Must copy value out of temp context */
+					/*
+					 * FIXME: this assumes that the leaf data type is the same
+					 * as the reconstructedValues datatype */
 					if (out.reconstructedValues)
 						newEntry->reconstructedValue =
 							datumCopy(out.reconstructedValues[i],
-									  so->state.attType.attbyval,
-									  so->state.attType.attlen);
+									  so->state.attLeafType.attbyval,
+									  so->state.attLeafType.attlen);
 					else
 						newEntry->reconstructedValue = (Datum) 0;
 
diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c
index 1a224ef..dcbb30c 100644
--- a/src/backend/access/spgist/spgutils.c
+++ b/src/backend/access/spgist/spgutils.c
@@ -74,7 +74,21 @@ spgGetCache(Relation index)
 						  PointerGetDatum(&cache->config));
 
 		/* Get the information we need about each relevant datatype */
-		fillTypeDesc(&cache->attType, atttype);
+		if (OidIsValid(cache->config.leafType) &&
+			cache->config.leafType != atttype)
+		{
+			if (!OidIsValid(index_getprocid(index, 1, SPGIST_COMPRESS_PROC)))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("compress method must not defined when leaf type is different from input type")));
+
+			fillTypeDesc(&cache->attLeafType, cache->config.leafType);
+		}
+		else
+		{
+			fillTypeDesc(&cache->attLeafType, atttype);
+		}
+
 		fillTypeDesc(&cache->attPrefixType, cache->config.prefixType);
 		fillTypeDesc(&cache->attLabelType, cache->config.labelType);
 
@@ -113,7 +127,7 @@ initSpGistState(SpGistState *state, Relation index)
 	cache = spgGetCache(index);
 
 	state->config = cache->config;
-	state->attType = cache->attType;
+	state->attLeafType = cache->attLeafType;
 	state->attPrefixType = cache->attPrefixType;
 	state->attLabelType = cache->attLabelType;
 
@@ -556,7 +570,7 @@ spgFormLeafTuple(SpGistState *state, ItemPointer heapPtr,
 	/* compute space needed (note result is already maxaligned) */
 	size = SGLTHDRSZ;
 	if (!isnull)
-		size += SpGistGetTypeSize(&state->attType, datum);
+		size += SpGistGetTypeSize(&state->attLeafType, datum);
 
 	/*
 	 * Ensure that we can replace the tuple with a dead tuple later.  This
@@ -572,7 +586,7 @@ spgFormLeafTuple(SpGistState *state, ItemPointer heapPtr,
 	tup->nextOffset = InvalidOffsetNumber;
 	tup->heapPtr = *heapPtr;
 	if (!isnull)
-		memcpyDatum(SGLTDATAPTR(tup), &state->attType, datum);
+		memcpyDatum(SGLTDATAPTR(tup), &state->attLeafType, datum);
 
 	return tup;
 }
diff --git a/src/include/access/spgist.h b/src/include/access/spgist.h
index 3aa96bd..bbf6d89 100644
--- a/src/include/access/spgist.h
+++ b/src/include/access/spgist.h
@@ -30,7 +30,8 @@
 #define SPGIST_PICKSPLIT_PROC			3
 #define SPGIST_INNER_CONSISTENT_PROC	4
 #define SPGIST_LEAF_CONSISTENT_PROC		5
-#define SPGISTNProc						5
+#define SPGIST_COMPRESS_PROC			6
+#define SPGISTNProc						6
 
 /*
  * Argument structs for spg_config method
@@ -44,6 +45,7 @@ typedef struct spgConfigOut
 {
 	Oid			prefixType;		/* Data type of inner-tuple prefixes */
 	Oid			labelType;		/* Data type of inner-tuple node labels */
+	Oid			leafType;		/* Data type of leaf (type of SPGIST_COMPRESS_PROC output) */
 	bool		canReturnData;	/* Opclass can reconstruct original data */
 	bool		longValuesOK;	/* Opclass can cope with values > 1 page */
 } spgConfigOut;
diff --git a/src/include/access/spgist_private.h b/src/include/access/spgist_private.h
index 4b6fdee..8ae7e75 100644
--- a/src/include/access/spgist_private.h
+++ b/src/include/access/spgist_private.h
@@ -115,13 +115,13 @@ typedef struct SpGistTypeDesc
 
 typedef struct SpGistState
 {
-	spgConfigOut config;		/* filled in by opclass config method */
+	spgConfigOut config;			/* filled in by opclass config method */
 
-	SpGistTypeDesc attType;		/* type of input data and leaf values */
-	SpGistTypeDesc attPrefixType;		/* type of inner-tuple prefix values */
+	SpGistTypeDesc attLeafType;		/* type of leaf values */
+	SpGistTypeDesc attPrefixType;	/* type of inner-tuple prefix values */
 	SpGistTypeDesc attLabelType;	/* type of node label values */
 
-	char	   *deadTupleStorage;		/* workspace for spgFormDeadTuple */
+	char	   *deadTupleStorage;	/* workspace for spgFormDeadTuple */
 
 	TransactionId myXid;		/* XID to use when creating a redirect tuple */
 	bool		isBuild;		/* true if doing index build */
@@ -174,13 +174,13 @@ typedef SpGistScanOpaqueData *SpGistScanOpaque;
  */
 typedef struct SpGistCache
 {
-	spgConfigOut config;		/* filled in by opclass config method */
+	spgConfigOut config;			/* filled in by opclass config method */
 
-	SpGistTypeDesc attType;		/* type of input data and leaf values */
-	SpGistTypeDesc attPrefixType;		/* type of inner-tuple prefix values */
+	SpGistTypeDesc attLeafType;		/* type of leaf values */
+	SpGistTypeDesc attPrefixType;	/* type of inner-tuple prefix values */
 	SpGistTypeDesc attLabelType;	/* type of node label values */
 
-	SpGistLUPCache lastUsedPages;		/* local storage of last-used info */
+	SpGistLUPCache lastUsedPages;	/* local storage of last-used info */
 } SpGistCache;
 
 
@@ -298,7 +298,7 @@ typedef SpGistLeafTupleData *SpGistLeafTuple;
 
 #define SGLTHDRSZ			MAXALIGN(sizeof(SpGistLeafTupleData))
 #define SGLTDATAPTR(x)		(((char *) (x)) + SGLTHDRSZ)
-#define SGLTDATUM(x, s)		((s)->attType.attbyval ? \
+#define SGLTDATUM(x, s)		((s)->attLeafType.attbyval ? \
 							 *(Datum *) SGLTDATAPTR(x) : \
 							 PointerGetDatum(SGLTDATAPTR(x)))
 
diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h
index 67b57cd..6427552 100644
--- a/src/include/catalog/pg_am.h
+++ b/src/include/catalog/pg_am.h
@@ -129,7 +129,7 @@ DESCR("GiST index access method");
 DATA(insert OID = 2742 (  gin		0 6 f f f f t t f f t f f 0 gininsert ginbeginscan - gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbuildempty ginbulkdelete ginvacuumcleanup - gincostestimate ginoptions ));
 DESCR("GIN index access method");
 #define GIN_AM_OID 2742
-DATA(insert OID = 4000 (  spgist	0 5 f f f f f t f t f f f 0 spginsert spgbeginscan spggettuple spggetbitmap spgrescan spgendscan spgmarkpos spgrestrpos spgbuild spgbuildempty spgbulkdelete spgvacuumcleanup spgcanreturn spgcostestimate spgoptions ));
+DATA(insert OID = 4000 (  spgist	0 6 f f f f f t f t f f f 0 spginsert spgbeginscan spggettuple spggetbitmap spgrescan spgendscan spgmarkpos spgrestrpos spgbuild spgbuildempty spgbulkdelete spgvacuumcleanup spgcanreturn spgcostestimate spgoptions ));
 DESCR("SP-GiST index access method");
 #define SPGIST_AM_OID 4000
 DATA(insert OID = 3580 (  brin	5 14 f f f f t t f t t f f 0 brininsert brinbeginscan - bringetbitmap brinrescan brinendscan brinmarkpos brinrestrpos brinbuild brinbuildempty brinbulkdelete brinvacuumcleanup - brincostestimate brinoptions ));
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to