Paul Ramsey <pram...@cleverelephant.ca> 于2019年7月2日周二 下午10:46写道:

> This looks good to me. A little commentary around why
> pglz_maximum_compressed_size() returns a universally correct answer
> (there's no way the compressed size can ever be larger than this
> because...) would be nice for peasants like myself.
>
> If you're looking to continue down this code line in your next patch,
> the next TODO item is a little more involved: a user-land (ala
> PG_DETOAST_DATUM) iterator API for access of TOAST datums would allow
> the optimization of searching of large objects like JSONB types, and
> so on, where the thing you are looking for is not at a known location
> in the object. So, things like looking for a particular substring in a
> string, or looking for a particular key in a JSONB. "Iterate until you
> find the thing." would allow optimization of some code lines that
> currently require full decompression of the objects.
>
> P.
>

Thanks for your comment. I've updated the patch.
As for the iterator API, I've implemented a de-TOAST iterator actually[0].
And I’m looking for more of its application scenarios and perfecting it.
Any comments would be much appreciated.

Best Regards, Binguo Bao.

[0]
https://www.postgresql.org/message-id/flat/cal-ogks_onzpc9m9bxpcztmofwulcfkyecekiagxzwrl8kx...@mail.gmail.com
From 2e4e2838937ec6fa1404fe529e7ed303e391d1b2 Mon Sep 17 00:00:00 2001
From: BBG <djydew...@gmail.com>
Date: Sun, 2 Jun 2019 19:18:46 +0800
Subject: [PATCH] Optimize partial TOAST decompression

---
 src/backend/access/heap/tuptoaster.c | 24 +++++++++++++++++-------
 src/common/pg_lzcompress.c           | 26 ++++++++++++++++++++++++++
 src/include/common/pg_lzcompress.h   |  1 +
 3 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index 55d6e91..684f1b2 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -266,6 +266,7 @@ heap_tuple_untoast_attr_slice(struct varlena *attr,
 	if (VARATT_IS_EXTERNAL_ONDISK(attr))
 	{
 		struct varatt_external toast_pointer;
+		int32 max_size;
 
 		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 
@@ -273,8 +274,13 @@ heap_tuple_untoast_attr_slice(struct varlena *attr,
 		if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
 			return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
 
-		/* fetch it back (compressed marker will get set automatically) */
-		preslice = toast_fetch_datum(attr);
+		max_size = pglz_maximum_compressed_size(sliceoffset + slicelength,
+												toast_pointer.va_rawsize);
+		/*
+		 * Be sure to get enough compressed slice
+		 * and compressed marker will get set automatically
+		 */
+		preslice = toast_fetch_datum_slice(attr, 0, max_size);
 	}
 	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
 	{
@@ -2031,7 +2037,8 @@ toast_fetch_datum(struct varlena *attr)
  *	Reconstruct a segment of a Datum from the chunks saved
  *	in the toast relation
  *
- *	Note that this function only supports non-compressed external datums.
+ *	Note that this function supports non-compressed external datums
+ *	and compressed external datum slices at the start of the object.
  * ----------
  */
 static struct varlena *
@@ -2072,10 +2079,9 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
 	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 
 	/*
-	 * It's nonsense to fetch slices of a compressed datum -- this isn't lo_*
-	 * we can't return a compressed datum which is meaningful to toast later
+	 * It's meaningful to fetch slices at the start of a compressed datum.
 	 */
-	Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
+	Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset);
 
 	attrsize = toast_pointer.va_extsize;
 	totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
@@ -2091,7 +2097,11 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
 
 	result = (struct varlena *) palloc(length + VARHDRSZ);
 
-	SET_VARSIZE(result, length + VARHDRSZ);
+	if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) {
+		SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ);
+	} else {
+		SET_VARSIZE(result, length + VARHDRSZ);
+	}
 
 	if (length == 0)
 		return result;			/* Can save a lot of work at this point! */
diff --git a/src/common/pg_lzcompress.c b/src/common/pg_lzcompress.c
index 988b398..80ed17a 100644
--- a/src/common/pg_lzcompress.c
+++ b/src/common/pg_lzcompress.c
@@ -771,3 +771,29 @@ pglz_decompress(const char *source, int32 slen, char *dest,
 	 */
 	return (char *) dp - dest;
 }
+
+
+
+/* ----------
+ * pglz_max_compressed_size -
+ *
+ * 		Calculate the maximum size of the compressed slice corresponding to the
+ * 		raw slice. Return the maximum size, or raw size if maximum size is larger
+ * 		than raw size.
+ * ----------
+ */
+int32
+pglz_maximum_compressed_size(int32 raw_slice_size, int32 raw_size)
+{
+	int32 result;
+
+	/*
+	 * Use int64 to prevent overflow during calculation.
+	 */
+	result = (int32)((int64)raw_slice_size * 9 + 8) / 8;
+
+	/*
+	 * Note that compressed size will never be larger than raw size.
+	 */
+	return result > raw_size ? raw_size : result;
+}
diff --git a/src/include/common/pg_lzcompress.h b/src/include/common/pg_lzcompress.h
index 5555764..cda3e1d 100644
--- a/src/include/common/pg_lzcompress.h
+++ b/src/include/common/pg_lzcompress.h
@@ -87,5 +87,6 @@ extern int32 pglz_compress(const char *source, int32 slen, char *dest,
 						   const PGLZ_Strategy *strategy);
 extern int32 pglz_decompress(const char *source, int32 slen, char *dest,
 							 int32 rawsize, bool check_complete);
+extern int32 pglz_maximum_compressed_size(int32 raw_slice_size, int32 raw_size);
 
 #endif							/* _PG_LZCOMPRESS_H_ */
-- 
2.7.4

Reply via email to