Hi,

In
http://archives.postgresql.org/message-id/20130216164231.GA15069%40awork2.anarazel.de
I presented the need for 'indirect' toast tuples which point into memory
instead of a toast table. In the comments to that proposal, off-list and
in-person talks the wish to make that a more general concept has
been voiced.

The previous patch used varattrib_1b_e.va_len_1be to discern between
different types of external tuples. That obviously only works if the
data sizes of all possibly stored datum types are distinct which isn't
nice. So what the newer patch now does is to rename that field into
'va_tag' and decide based on that what kind of Datum we have. To get the
actual length of that datum there now is a VARTAG_SIZE() macro which
maps the tags back to size.
To keep on-disk compatibility the size of an external toast tuple
containing a varatt_external is used as its tag value.

This should allow for fairly easy development of a new compression
scheme for out-of-line toast tuples. It will *not* work for compressed
inline tuples (i.e. VARATT_4B_C). I am not convinced that that is a
problem or that if it is, that it cannot be solved separately.

FWIW, in some quick microbenchmarks I couldn't find any performance
difference due to the slightly more complex size computation which I do
*not* find surprising.

Opinions?

Greetings,

Andres Freund

-- 
 Andres Freund                     http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services
>From 43416ee71033a1bd12bec5e651ff45ea9eeafd56 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Sun, 17 Feb 2013 01:38:17 +0100
Subject: [PATCH] Add support for multiple kinds of external toast datums

There are several usecases where our current representation of external toast
datums is limiting:
* adding new compression schemes
* avoidance of repeated detoasting
* externally decoded toast tuples

For that support 'tags' on external (varattrib_1b_e) varlenas which recoin the
current va_len_1be field to store the tag (or type) of a varlena. To determine
the actual length a macro VARTAG_SIZE(tag) is added which can be used to map
from a tag to the actual length.

This patch adds support for 'indirect' tuples which point to some externally
allocated memory containing a toast tuple. It also implements the stub for a
different compression algorithm.
---
 src/backend/access/heap/tuptoaster.c | 115 ++++++++++++++++++++++++++++++++---
 src/include/postgres.h               | 100 +++++++++++++++++++++++-------
 2 files changed, 183 insertions(+), 32 deletions(-)

diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index fc37ceb..46c7cf4 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -128,7 +128,7 @@ heap_tuple_fetch_attr(struct varlena * attr)
 struct varlena *
 heap_tuple_untoast_attr(struct varlena * attr)
 {
-	if (VARATT_IS_EXTERNAL(attr))
+	if (VARATT_IS_EXTERNAL_OLDSTYLE(attr))
 	{
 		/*
 		 * This is an externally stored datum --- fetch it back from there
@@ -145,6 +145,19 @@ heap_tuple_untoast_attr(struct varlena * attr)
 			pfree(tmp);
 		}
 	}
+	else if (VARATT_IS_EXTERNAL_COMPRESSED(attr))
+	{
+		elog(ERROR, "not yet");
+	}
+	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+	{
+		struct varatt_indirect redirect;
+		VARATT_EXTERNAL_GET_POINTER(redirect, attr);
+		attr = (struct varlena *)redirect.pointer;
+		Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
+
+		attr = heap_tuple_untoast_attr(attr);
+	}
 	else if (VARATT_IS_COMPRESSED(attr))
 	{
 		/*
@@ -191,7 +204,7 @@ heap_tuple_untoast_attr_slice(struct varlena * attr,
 	char	   *attrdata;
 	int32		attrsize;
 
-	if (VARATT_IS_EXTERNAL(attr))
+	if (VARATT_IS_EXTERNAL_OLDSTYLE(attr))
 	{
 		struct varatt_external toast_pointer;
 
@@ -204,6 +217,13 @@ heap_tuple_untoast_attr_slice(struct varlena * attr,
 		/* fetch it back (compressed marker will get set automatically) */
 		preslice = toast_fetch_datum(attr);
 	}
+	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+	{
+		struct varatt_indirect redirect;
+		VARATT_EXTERNAL_GET_POINTER(redirect, attr);
+		return heap_tuple_untoast_attr_slice(redirect.pointer,
+											 sliceoffset, slicelength);
+	}
 	else
 		preslice = attr;
 
@@ -267,7 +287,7 @@ toast_raw_datum_size(Datum value)
 	struct varlena *attr = (struct varlena *) DatumGetPointer(value);
 	Size		result;
 
-	if (VARATT_IS_EXTERNAL(attr))
+	if (VARATT_IS_EXTERNAL_OLDSTYLE(attr))
 	{
 		/* va_rawsize is the size of the original datum -- including header */
 		struct varatt_external toast_pointer;
@@ -275,6 +295,17 @@ toast_raw_datum_size(Datum value)
 		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 		result = toast_pointer.va_rawsize;
 	}
+	else if (VARATT_IS_EXTERNAL_COMPRESSED(attr))
+	{
+		elog(ERROR, "not yet");
+	}
+	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+	{
+		struct varatt_indirect toast_pointer;
+
+		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+		return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer));
+	}
 	else if (VARATT_IS_COMPRESSED(attr))
 	{
 		/* here, va_rawsize is just the payload size */
@@ -308,7 +339,7 @@ toast_datum_size(Datum value)
 	struct varlena *attr = (struct varlena *) DatumGetPointer(value);
 	Size		result;
 
-	if (VARATT_IS_EXTERNAL(attr))
+	if (VARATT_IS_EXTERNAL_OLDSTYLE(attr))
 	{
 		/*
 		 * Attribute is stored externally - return the extsize whether
@@ -320,6 +351,17 @@ toast_datum_size(Datum value)
 		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 		result = toast_pointer.va_extsize;
 	}
+	else if (VARATT_IS_EXTERNAL_COMPRESSED(attr))
+	{
+		elog(ERROR, "not yet");
+	}
+	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+	{
+		struct varatt_indirect toast_pointer;
+
+		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+		return toast_datum_size(PointerGetDatum(toast_pointer.pointer));
+	}
 	else if (VARATT_IS_SHORT(attr))
 	{
 		result = VARSIZE_SHORT(attr);
@@ -387,12 +429,56 @@ toast_delete(Relation rel, HeapTuple oldtup)
 		{
 			Datum		value = toast_values[i];
 
-			if (!toast_isnull[i] && VARATT_IS_EXTERNAL(PointerGetDatum(value)))
+			if (toast_isnull[i])
+				continue;
+			else if (VARATT_IS_EXTERNAL_OLDSTYLE(PointerGetDatum(value)))
 				toast_delete_datum(rel, value);
+			else if (VARATT_IS_EXTERNAL_INDIRECT(PointerGetDatum(value)))
+				elog(ERROR, "cannot delete tuples with indirect toast tuples for now");
 		}
 	}
 }
 
+/* ----------
+ * toast_datum_differs -
+ *
+ *  Determine whether two toasted datums are the same and don't have to be
+ *  stored again.
+ * ----------
+ */
+static bool
+toast_datum_differs(struct varlena *old_value, struct varlena *new_value)
+{
+	Assert(VARATT_IS_EXTERNAL(old_value));
+	Assert(VARATT_IS_EXTERNAL(new_value));
+
+	/* fast path for the common case where we have the toast oid available */
+	if (VARATT_IS_EXTERNAL_OLDSTYLE(old_value) &&
+		VARATT_IS_EXTERNAL_OLDSTYLE(new_value))
+		return memcmp((char *) old_value, (char *) new_value,
+					  VARSIZE_EXTERNAL(old_value)) != 0;
+
+	/*
+	 * compare size of tuples, so we don't uselessly detoast/decompress tuples
+	 * if they can't be the same anyway.
+	 */
+	if (toast_raw_datum_size(PointerGetDatum(old_value)) !=
+		toast_raw_datum_size(PointerGetDatum(new_value)))
+		return false;
+
+	old_value = heap_tuple_untoast_attr(old_value);
+	new_value = heap_tuple_untoast_attr(new_value);
+
+	Assert(!VARATT_IS_EXTERNAL(old_value));
+	Assert(!VARATT_IS_EXTERNAL(new_value));
+	Assert(!VARATT_IS_COMPRESSED(old_value));
+	Assert(!VARATT_IS_COMPRESSED(new_value));
+	Assert(VARSIZE_ANY_EXHDR(old_value) == VARSIZE_ANY_EXHDR(new_value));
+
+	/* compare payload, we're fine with unaligned data */
+	return memcmp(VARDATA_ANY(old_value), VARDATA_ANY(new_value),
+				  VARSIZE_ANY_EXHDR(old_value)) != 0;
+}
 
 /* ----------
  * toast_insert_or_update -
@@ -497,8 +583,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 				VARATT_IS_EXTERNAL(old_value))
 			{
 				if (toast_isnull[i] || !VARATT_IS_EXTERNAL(new_value) ||
-					memcmp((char *) old_value, (char *) new_value,
-						   VARSIZE_EXTERNAL(old_value)) != 0)
+					toast_datum_differs(old_value, new_value))
 				{
 					/*
 					 * The old external stored value isn't needed any more
@@ -1258,6 +1343,8 @@ toast_save_datum(Relation rel, Datum value,
 	int32		data_todo;
 	Pointer		dval = DatumGetPointer(value);
 
+	Assert(!VARATT_IS_EXTERNAL(value));
+
 	/*
 	 * Open the toast relation and its index.  We can use the index to check
 	 * uniqueness of the OID we assign to the toasted item, even though it has
@@ -1341,7 +1428,7 @@ toast_save_datum(Relation rel, Datum value,
 		{
 			struct varatt_external old_toast_pointer;
 
-			Assert(VARATT_IS_EXTERNAL(oldexternal));
+			Assert(VARATT_IS_EXTERNAL_OLDSTYLE(oldexternal));
 			/* Must copy to access aligned fields */
 			VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal);
 			if (old_toast_pointer.va_toastrelid == rel->rd_toastoid)
@@ -1456,7 +1543,7 @@ toast_save_datum(Relation rel, Datum value,
 	 * Create the TOAST pointer value that we'll return
 	 */
 	result = (struct varlena *) palloc(TOAST_POINTER_SIZE);
-	SET_VARSIZE_EXTERNAL(result, TOAST_POINTER_SIZE);
+	SET_VARTAG_EXTERNAL(result, VARTAG_OLDSTYLE);
 	memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer));
 
 	return PointerGetDatum(result);
@@ -1483,6 +1570,8 @@ toast_delete_datum(Relation rel, Datum value)
 	if (!VARATT_IS_EXTERNAL(attr))
 		return;
 
+	Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
+
 	/* Must copy to access aligned fields */
 	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 
@@ -1608,6 +1697,12 @@ toast_fetch_datum(struct varlena * attr)
 	char	   *chunkdata;
 	int32		chunksize;
 
+	if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+		elog(ERROR, "shouldn't be called this way");
+
+	if (VARATT_IS_EXTERNAL_COMPRESSED(attr))
+		elog(ERROR, "not yet");
+
 	/* Must copy to access aligned fields */
 	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 
@@ -1775,7 +1870,7 @@ toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length)
 	int32		chcpystrt;
 	int32		chcpyend;
 
-	Assert(VARATT_IS_EXTERNAL(attr));
+	Assert(VARATT_IS_EXTERNAL_OLDSTYLE(attr));
 
 	/* Must copy to access aligned fields */
 	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
diff --git a/src/include/postgres.h b/src/include/postgres.h
index f9c5527..c6388a7 100644
--- a/src/include/postgres.h
+++ b/src/include/postgres.h
@@ -54,23 +54,68 @@
  */
 
 /*
- * struct varatt_external is a "TOAST pointer", that is, the information
- * needed to fetch a stored-out-of-line Datum.	The data is compressed
- * if and only if va_extsize < va_rawsize - VARHDRSZ.  This struct must not
- * contain any padding, because we sometimes compare pointers using memcmp.
+ * struct varatt_external is a "TOAST pointer", that is, the information needed
+ * to fetch a Datum stored in an out-of-line on-disk Datum. The data is
+ * compressed if and only if va_extsize < va_rawsize - VARHDRSZ.  This struct
+ * must not contain any padding, because we sometimes compare pointers using
+ * memcmp.
  *
  * Note that this information is stored unaligned within actual tuples, so
  * you need to memcpy from the tuple into a local struct variable before
  * you can look at these fields!  (The reason we use memcmp is to avoid
  * having to do that just to detect equality of two TOAST pointers...)
  */
-struct varatt_external
+typedef struct varatt_external
 {
 	int32		va_rawsize;		/* Original data size (includes header) */
 	int32		va_extsize;		/* External saved size (doesn't) */
 	Oid			va_valueid;		/* Unique ID of value within TOAST table */
 	Oid			va_toastrelid;	/* RelID of TOAST table containing it */
-};
+} varatt_external;
+
+/*
+ * XXX: describe
+ *
+ * Customizeable compression
+ */
+typedef struct varatt_compressed
+{
+	int32		va_rawsize;		/* Original data size (includes header) */
+	int32		va_extsize;		/* External saved size (doesn't) */
+	Oid			va_valueid;		/* Unique ID of value within TOAST table */
+	Oid			va_toastrelid;	/* RelID of TOAST table containing it */
+	uint32		va_method;
+} varatt_compressed;
+
+/*
+ * Out-of-line Datum thats stored in memory in contrast to varatt_external
+ * pointers which points to data in an external toast relation.
+ *
+ * Note that just as varatt_external's this is stored unaligned within the
+ * tuple.
+ */
+typedef struct varatt_indirect
+{
+	struct varlena *pointer;	/* Pointer to in-memory varlena */
+} varatt_indirect;
+
+
+/*
+ * Type of external toast datum stored. The peculiar value for VARTAG_OLDSTYLE
+ * comes from the requirement for on-disk compatibility with the older
+ * definitions of varattrib_1b_e where v_tag was name va_len_1be...
+ */
+typedef enum vartag_external {
+	VARTAG_COMPRESSED = 1,
+	VARTAG_INDIRECT = 4,
+	VARTAG_OLDSTYLE = 18
+} vartag_external;
+
+#define VARTAG_SIZE(tag) \
+	((tag) == VARTAG_COMPRESSED ? sizeof(varatt_compressed) :	\
+	 (tag) == VARTAG_INDIRECT ? sizeof(varatt_indirect) :		\
+	 (tag) == VARTAG_OLDSTYLE ? sizeof(varatt_external) : \
+	 TrapMacro(false, "unknown vartag"))
 
 /*
  * These structs describe the header of a varlena object that may have been
@@ -105,8 +150,8 @@ typedef struct
 typedef struct
 {
 	uint8		va_header;		/* Always 0x80 or 0x01 */
-	uint8		va_len_1be;		/* Physical length of datum */
-	char		va_data[1];		/* Data (for now always a TOAST pointer) */
+	uint8		va_tag;			/* Type of datum */
+	char		va_data[1];		/* Data (of the type indicated by va_tag) */
 } varattrib_1b_e;
 
 /*
@@ -130,6 +175,9 @@ typedef struct
  * first byte.	Also, it is not possible for a 1-byte length word to be zero;
  * this lets us disambiguate alignment padding bytes from the start of an
  * unaligned datum.  (We now *require* pad bytes to be filled with zero!)
+ *
+ * In TOAST datums the length field in varattrib_1b_e is used to discern
+ * whether its an indirection pointer or more commonly a on-disk tuple.
  */
 
 /*
@@ -161,8 +209,8 @@ typedef struct
 	(((varattrib_4b *) (PTR))->va_4byte.va_header & 0x3FFFFFFF)
 #define VARSIZE_1B(PTR) \
 	(((varattrib_1b *) (PTR))->va_header & 0x7F)
-#define VARSIZE_1B_E(PTR) \
-	(((varattrib_1b_e *) (PTR))->va_len_1be)
+#define VARTAG_1B_E(PTR) \
+	(((varattrib_1b_e *) (PTR))->va_tag)
 
 #define SET_VARSIZE_4B(PTR,len) \
 	(((varattrib_4b *) (PTR))->va_4byte.va_header = (len) & 0x3FFFFFFF)
@@ -170,9 +218,9 @@ typedef struct
 	(((varattrib_4b *) (PTR))->va_4byte.va_header = ((len) & 0x3FFFFFFF) | 0x40000000)
 #define SET_VARSIZE_1B(PTR,len) \
 	(((varattrib_1b *) (PTR))->va_header = (len) | 0x80)
-#define SET_VARSIZE_1B_E(PTR,len) \
+#define SET_VARTAG_1B_E(PTR,tag) \
 	(((varattrib_1b_e *) (PTR))->va_header = 0x80, \
-	 ((varattrib_1b_e *) (PTR))->va_len_1be = (len))
+	 ((varattrib_1b_e *) (PTR))->va_tag = (tag))
 #else							/* !WORDS_BIGENDIAN */
 
 #define VARATT_IS_4B(PTR) \
@@ -193,8 +241,8 @@ typedef struct
 	((((varattrib_4b *) (PTR))->va_4byte.va_header >> 2) & 0x3FFFFFFF)
 #define VARSIZE_1B(PTR) \
 	((((varattrib_1b *) (PTR))->va_header >> 1) & 0x7F)
-#define VARSIZE_1B_E(PTR) \
-	(((varattrib_1b_e *) (PTR))->va_len_1be)
+#define VARTAG_1B_E(PTR) \
+	(((varattrib_1b_e *) (PTR))->va_tag)
 
 #define SET_VARSIZE_4B(PTR,len) \
 	(((varattrib_4b *) (PTR))->va_4byte.va_header = (((uint32) (len)) << 2))
@@ -202,12 +250,12 @@ typedef struct
 	(((varattrib_4b *) (PTR))->va_4byte.va_header = (((uint32) (len)) << 2) | 0x02)
 #define SET_VARSIZE_1B(PTR,len) \
 	(((varattrib_1b *) (PTR))->va_header = (((uint8) (len)) << 1) | 0x01)
-#define SET_VARSIZE_1B_E(PTR,len) \
+#define SET_VARTAG_1B_E(PTR,tag) \
 	(((varattrib_1b_e *) (PTR))->va_header = 0x01, \
-	 ((varattrib_1b_e *) (PTR))->va_len_1be = (len))
+	 ((varattrib_1b_e *) (PTR))->va_tag = (tag))
 #endif   /* WORDS_BIGENDIAN */
 
-#define VARHDRSZ_SHORT			1
+#define VARHDRSZ_SHORT			offsetof(varattrib_1b, va_data)
 #define VARATT_SHORT_MAX		0x7F
 #define VARATT_CAN_MAKE_SHORT(PTR) \
 	(VARATT_IS_4B_U(PTR) && \
@@ -215,7 +263,7 @@ typedef struct
 #define VARATT_CONVERTED_SHORT_SIZE(PTR) \
 	(VARSIZE(PTR) - VARHDRSZ + VARHDRSZ_SHORT)
 
-#define VARHDRSZ_EXTERNAL		2
+#define VARHDRSZ_EXTERNAL		offsetof(varattrib_1b_e, va_data)
 
 #define VARDATA_4B(PTR)		(((varattrib_4b *) (PTR))->va_4byte.va_data)
 #define VARDATA_4B_C(PTR)	(((varattrib_4b *) (PTR))->va_compressed.va_data)
@@ -249,26 +297,34 @@ typedef struct
 #define VARSIZE_SHORT(PTR)					VARSIZE_1B(PTR)
 #define VARDATA_SHORT(PTR)					VARDATA_1B(PTR)
 
-#define VARSIZE_EXTERNAL(PTR)				VARSIZE_1B_E(PTR)
+#define VARTAG_EXTERNAL(PTR)				VARTAG_1B_E(PTR)
+#define VARSIZE_EXTERNAL(PTR)				(VARHDRSZ_EXTERNAL + VARTAG_SIZE(VARTAG_EXTERNAL(PTR)))
 #define VARDATA_EXTERNAL(PTR)				VARDATA_1B_E(PTR)
 
 #define VARATT_IS_COMPRESSED(PTR)			VARATT_IS_4B_C(PTR)
 #define VARATT_IS_EXTERNAL(PTR)				VARATT_IS_1B_E(PTR)
+#define VARATT_IS_EXTERNAL_OLDSTYLE(PTR) \
+	(VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_OLDSTYLE)
+#define VARATT_IS_EXTERNAL_COMPRESSED(PTR) \
+	(VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_COMPRESSED)
+#define VARATT_IS_EXTERNAL_INDIRECT(PTR) \
+	(VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_INDIRECT)
 #define VARATT_IS_SHORT(PTR)				VARATT_IS_1B(PTR)
 #define VARATT_IS_EXTENDED(PTR)				(!VARATT_IS_4B_U(PTR))
 
 #define SET_VARSIZE(PTR, len)				SET_VARSIZE_4B(PTR, len)
 #define SET_VARSIZE_SHORT(PTR, len)			SET_VARSIZE_1B(PTR, len)
 #define SET_VARSIZE_COMPRESSED(PTR, len)	SET_VARSIZE_4B_C(PTR, len)
-#define SET_VARSIZE_EXTERNAL(PTR, len)		SET_VARSIZE_1B_E(PTR, len)
+
+#define SET_VARTAG_EXTERNAL(PTR, tag)		SET_VARTAG_1B_E(PTR, tag)
 
 #define VARSIZE_ANY(PTR) \
-	(VARATT_IS_1B_E(PTR) ? VARSIZE_1B_E(PTR) : \
+	(VARATT_IS_1B_E(PTR) ? VARSIZE_EXTERNAL(PTR) : \
 	 (VARATT_IS_1B(PTR) ? VARSIZE_1B(PTR) : \
 	  VARSIZE_4B(PTR)))
 
 #define VARSIZE_ANY_EXHDR(PTR) \
-	(VARATT_IS_1B_E(PTR) ? VARSIZE_1B_E(PTR)-VARHDRSZ_EXTERNAL : \
+	(VARATT_IS_1B_E(PTR) ? VARSIZE_EXTERNAL(PTR)-VARHDRSZ_EXTERNAL : \
 	 (VARATT_IS_1B(PTR) ? VARSIZE_1B(PTR)-VARHDRSZ_SHORT : \
 	  VARSIZE_4B(PTR)-VARHDRSZ))
 
-- 
1.8.2.rc2.4.g7799588.dirty

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to