Hi,

During logical decoding toast tuples are decoded separately from the
main tuple. Works nicely. To make the main table's HeapTuple actually
easily useable the tuple needs to be "reconstructed" to not point to
disk anymore but to the separately reconstructed tuples.

There are two ways to do this:
a) build a new HeapTuple that contains all formerly toasted datums
   inline (i.e. !VARATT_IS_EXTERNAL)
b) add support for external toast tuples that point into separately
   allocated memory instead of a toast table

a) has the problem that that the flattened HeapTuple can be bigger than
our maximal allocation size which seems like an awkward restriction...

Given that there have been wishes to support something like b) for quite
some time, independent from logical decoding, it seems like a good idea
to add support for it. Its e.g. useful for avoiding repeated detoasting
or decompression of tuples.

The problem with b) is that there is no space in varlena's flag bits to
directly denote that a varlena points into memory instead of either
directly containing the data or a varattrib_1b_e containing a
varatt_external pointing to an on-disk toasted tuple.

I propose extending the EXTERNAL varlenas to be able to point to memory
instead just to disk. It seem apt to use EXTERNAL for this as they
aren't stored in the normal heap tuple but somewhere else.
Unfortunately there is no backward-compatible flag space in
varattrib_1b_e either to nicely denote this and we sure don't want to
break on-disk compatibility for this. Thus I propose to distinguish
on-disk and in-memory tuples via the varattrib_1b_e.va_len_1be.

The attached (RFC, not fully ready!) patch adds the following stuff to
the public interfaces:
typedef struct varatt_indirect
{
        struct varlena *pointer;        /* Pointer to in-memory varlena */
} varatt_indirect;

...

#define VARATT_IS_EXTERNAL(PTR)                         VARATT_IS_1B_E(PTR)
#define VARATT_IS_EXTERNAL_TOAST(PTR) \
        (VARATT_IS_EXTERNAL(PTR) && VARSIZE_EXTERNAL(PTR) == TOAST_POINTER_SIZE)
#define VARATT_IS_EXTERNAL_INDIRECT(PTR) \
        (VARATT_IS_EXTERNAL(PTR) && VARSIZE_EXTERNAL(PTR) == 
INDIRECT_POINTER_SIZE)

I don't like to make the distinction through the size but I don't have a
better idea. And hey, its toast/varlena stuff, who expects cleanliness ;)

Existing code doesn't need to care whether a EXTERNAL datum is TOAST or
INDIRECT, that's handled transparently in tuptoaster.c. All EXTERNAL
tuples need to go through there anyway, so that seems fine.

Currently toast_fetch_datum() in tuptoaster.c does part of the gruntwork
for this as it was the easiest location but I think it might be better
to spread the work to some more callsites of it for clarity's sake.

Opinions?

Greetings,

Andres Freund

-- 
 Andres Freund                     http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index 49f1553..613040a 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -44,9 +44,6 @@
 
 #undef TOAST_DEBUG
 
-/* Size of an EXTERNAL datum that contains a standard TOAST pointer */
-#define TOAST_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(struct varatt_external))
-
 /*
  * Testing whether an externally-stored value is compressed now requires
  * comparing extsize (the actual length of the external data) to rawsize
@@ -191,7 +188,7 @@ heap_tuple_untoast_attr_slice(struct varlena * attr,
 	char	   *attrdata;
 	int32		attrsize;
 
-	if (VARATT_IS_EXTERNAL(attr))
+	if (VARATT_IS_EXTERNAL_TOAST(attr))
 	{
 		struct varatt_external toast_pointer;
 
@@ -204,6 +201,13 @@ heap_tuple_untoast_attr_slice(struct varlena * attr,
 		/* fetch it back (compressed marker will get set automatically) */
 		preslice = toast_fetch_datum(attr);
 	}
+	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+	{
+		struct varatt_indirect redirect;
+		VARATT_EXTERNAL_GET_POINTER(redirect, attr);
+		return heap_tuple_untoast_attr_slice(redirect.pointer,
+											 sliceoffset, slicelength);
+	}
 	else
 		preslice = attr;
 
@@ -267,7 +271,7 @@ toast_raw_datum_size(Datum value)
 	struct varlena *attr = (struct varlena *) DatumGetPointer(value);
 	Size		result;
 
-	if (VARATT_IS_EXTERNAL(attr))
+	if (VARATT_IS_EXTERNAL_TOAST(attr))
 	{
 		/* va_rawsize is the size of the original datum -- including header */
 		struct varatt_external toast_pointer;
@@ -275,6 +279,13 @@ toast_raw_datum_size(Datum value)
 		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 		result = toast_pointer.va_rawsize;
 	}
+	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+	{
+		struct varatt_indirect toast_pointer;
+
+		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+		return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer));
+	}
 	else if (VARATT_IS_COMPRESSED(attr))
 	{
 		/* here, va_rawsize is just the payload size */
@@ -308,7 +319,7 @@ toast_datum_size(Datum value)
 	struct varlena *attr = (struct varlena *) DatumGetPointer(value);
 	Size		result;
 
-	if (VARATT_IS_EXTERNAL(attr))
+	if (VARATT_IS_EXTERNAL_TOAST(attr))
 	{
 		/*
 		 * Attribute is stored externally - return the extsize whether
@@ -320,6 +331,13 @@ toast_datum_size(Datum value)
 		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 		result = toast_pointer.va_extsize;
 	}
+	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+	{
+		struct varatt_indirect toast_pointer;
+
+		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+		return toast_datum_size(PointerGetDatum(toast_pointer.pointer));
+	}
 	else if (VARATT_IS_SHORT(attr))
 	{
 		result = VARSIZE_SHORT(attr);
@@ -386,12 +404,56 @@ toast_delete(Relation rel, HeapTuple oldtup)
 		{
 			Datum		value = toast_values[i];
 
-			if (!toast_isnull[i] && VARATT_IS_EXTERNAL(PointerGetDatum(value)))
+			if (toast_isnull[i])
+				continue;
+			else if (VARATT_IS_EXTERNAL_TOAST(PointerGetDatum(value)))
 				toast_delete_datum(rel, value);
+			else if (VARATT_IS_EXTERNAL_INDIRECT(PointerGetDatum(value)))
+				elog(ERROR, "cannot delete tuples with indirect toast tuples for now");
 		}
 	}
 }
 
+/* ----------
+ * toast_datum_differs -
+ *
+ *  Determine whether two toasted datums are the same and don't have to be
+ *  stored again.
+ * ----------
+ */
+static bool
+toast_datum_differs(struct varlena *old_value, struct varlena *new_value)
+{
+	Assert(VARATT_IS_EXTERNAL(old_value));
+	Assert(VARATT_IS_EXTERNAL(new_value));
+
+	/* fast path for the common case where we have the toast oid available */
+	if (VARATT_IS_EXTERNAL_TOAST(old_value) &&
+		VARATT_IS_EXTERNAL_TOAST(new_value))
+		return memcmp((char *) old_value, (char *) new_value,
+					  VARSIZE_EXTERNAL(old_value)) == 0;
+
+	/*
+	 * compare size of tuples, so we don't uselessly detoast/decompress tuples
+	 * if they can't be the same anyway.
+	 */
+	if (toast_raw_datum_size(PointerGetDatum(old_value)) !=
+		toast_raw_datum_size(PointerGetDatum(new_value)))
+		return false;
+
+	old_value = heap_tuple_untoast_attr(old_value);
+	new_value = heap_tuple_untoast_attr(new_value);
+
+	Assert(!VARATT_IS_EXTERNAL(old_value));
+	Assert(!VARATT_IS_EXTERNAL(new_value));
+	Assert(!VARATT_IS_COMPRESSED(old_value));
+	Assert(!VARATT_IS_COMPRESSED(new_value));
+	Assert(VARSIZE_ANY_EXHDR(old_value) == VARSIZE_ANY_EXHDR(new_value));
+
+	/* compare payload, we're fine with unaligned data */
+	return memcmp(VARDATA_ANY(old_value), VARDATA_ANY(new_value),
+				  VARSIZE_ANY_EXHDR(old_value)) == 0;
+}
 
 /* ----------
  * toast_insert_or_update -
@@ -495,8 +557,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 				VARATT_IS_EXTERNAL(old_value))
 			{
 				if (toast_isnull[i] || !VARATT_IS_EXTERNAL(new_value) ||
-					memcmp((char *) old_value, (char *) new_value,
-						   VARSIZE_EXTERNAL(old_value)) != 0)
+					toast_datum_differs(old_value, new_value))
 				{
 					/*
 					 * The old external stored value isn't needed any more
@@ -1256,6 +1317,8 @@ toast_save_datum(Relation rel, Datum value,
 	int32		data_todo;
 	Pointer		dval = DatumGetPointer(value);
 
+	Assert(!VARATT_IS_EXTERNAL(value));
+
 	/*
 	 * Open the toast relation and its index.  We can use the index to check
 	 * uniqueness of the OID we assign to the toasted item, even though it has
@@ -1339,7 +1402,7 @@ toast_save_datum(Relation rel, Datum value,
 		{
 			struct varatt_external old_toast_pointer;
 
-			Assert(VARATT_IS_EXTERNAL(oldexternal));
+			Assert(VARATT_IS_EXTERNAL_TOAST(oldexternal));
 			/* Must copy to access aligned fields */
 			VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal);
 			if (old_toast_pointer.va_toastrelid == rel->rd_toastoid)
@@ -1481,6 +1544,8 @@ toast_delete_datum(Relation rel, Datum value)
 	if (!VARATT_IS_EXTERNAL(attr))
 		return;
 
+	Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
+
 	/* Must copy to access aligned fields */
 	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 
@@ -1606,6 +1671,16 @@ toast_fetch_datum(struct varlena * attr)
 	char	   *chunkdata;
 	int32		chunksize;
 
+	StaticAssertStmt(sizeof(varatt_indirect) != sizeof(varatt_external),
+					 "indiscernible external/indirect toast tuples");
+
+	if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+	{
+		struct varatt_indirect redirect;
+		VARATT_EXTERNAL_GET_POINTER(redirect, attr);
+		return (struct varlena *)redirect.pointer;
+	}
+
 	/* Must copy to access aligned fields */
 	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 
@@ -1773,7 +1848,7 @@ toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length)
 	int32		chcpystrt;
 	int32		chcpyend;
 
-	Assert(VARATT_IS_EXTERNAL(attr));
+	Assert(VARATT_IS_EXTERNAL_TOAST(attr));
 
 	/* Must copy to access aligned fields */
 	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
diff --git a/src/include/postgres.h b/src/include/postgres.h
index 9808bdc..842fe9d 100644
--- a/src/include/postgres.h
+++ b/src/include/postgres.h
@@ -54,23 +54,36 @@
  */
 
 /*
- * struct varatt_external is a "TOAST pointer", that is, the information
- * needed to fetch a stored-out-of-line Datum.	The data is compressed
- * if and only if va_extsize < va_rawsize - VARHDRSZ.  This struct must not
- * contain any padding, because we sometimes compare pointers using memcmp.
+ * struct varatt_external is a "TOAST pointer", that is, the information needed
+ * to fetch a Datum stored in an out-of-line on-disk Datum. The data is
+ * compressed if and only if va_extsize < va_rawsize - VARHDRSZ.  This struct
+ * must not contain any padding, because we sometimes compare pointers using
+ * memcmp.
  *
  * Note that this information is stored unaligned within actual tuples, so
  * you need to memcpy from the tuple into a local struct variable before
  * you can look at these fields!  (The reason we use memcmp is to avoid
  * having to do that just to detect equality of two TOAST pointers...)
  */
-struct varatt_external
+typedef struct varatt_external
 {
 	int32		va_rawsize;		/* Original data size (includes header) */
 	int32		va_extsize;		/* External saved size (doesn't) */
 	Oid			va_valueid;		/* Unique ID of value within TOAST table */
 	Oid			va_toastrelid;	/* RelID of TOAST table containing it */
-};
+} varatt_external;
+
+/*
+ * Out-of-line Datum thats stored in memory in contrast to varatt_external
+ * pointers which points to data in an external toast relation.
+ *
+ * Note that just as varatt_external's this is stored unaligned within the
+ * tuple.
+ */
+typedef struct varatt_indirect
+{
+	struct varlena *pointer;	/* Pointer to in-memory varlena */
+} varatt_indirect;
 
 /*
  * These structs describe the header of a varlena object that may have been
@@ -106,7 +119,7 @@ typedef struct
 {
 	uint8		va_header;		/* Always 0x80 or 0x01 */
 	uint8		va_len_1be;		/* Physical length of datum */
-	char		va_data[1];		/* Data (for now always a TOAST pointer) */
+	char		va_data[1];		/* Data (pointer to either TOAST or memory) */
 } varattrib_1b_e;
 
 /*
@@ -130,6 +143,9 @@ typedef struct
  * first byte.	Also, it is not possible for a 1-byte length word to be zero;
  * this lets us disambiguate alignment padding bytes from the start of an
  * unaligned datum.  (We now *require* pad bytes to be filled with zero!)
+ *
+ * In TOAST datums the length field in varattrib_1b_e is used to discern
+ * whether its an indirection pointer or more commonly a on-disk tuple.
  */
 
 /*
@@ -217,6 +233,12 @@ typedef struct
 
 #define VARHDRSZ_EXTERNAL		2
 
+/* Size of an EXTERNAL datum that contains a standard TOAST pointer */
+#define TOAST_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(struct varatt_external))
+
+/* Size of an EXTERNAL datum that contains a in-memory TOAST pointer */
+#define INDIRECT_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(struct varatt_indirect))
+
 #define VARDATA_4B(PTR)		(((varattrib_4b *) (PTR))->va_4byte.va_data)
 #define VARDATA_4B_C(PTR)	(((varattrib_4b *) (PTR))->va_compressed.va_data)
 #define VARDATA_1B(PTR)		(((varattrib_1b *) (PTR))->va_data)
@@ -254,6 +276,10 @@ typedef struct
 
 #define VARATT_IS_COMPRESSED(PTR)			VARATT_IS_4B_C(PTR)
 #define VARATT_IS_EXTERNAL(PTR)				VARATT_IS_1B_E(PTR)
+#define VARATT_IS_EXTERNAL_TOAST(PTR) \
+	(VARATT_IS_EXTERNAL(PTR) && VARSIZE_EXTERNAL(PTR) == TOAST_POINTER_SIZE)
+#define VARATT_IS_EXTERNAL_INDIRECT(PTR) \
+	(VARATT_IS_EXTERNAL(PTR) && VARSIZE_EXTERNAL(PTR) == INDIRECT_POINTER_SIZE)
 #define VARATT_IS_SHORT(PTR)				VARATT_IS_1B(PTR)
 #define VARATT_IS_EXTENDED(PTR)				(!VARATT_IS_4B_U(PTR))
 
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to