From 79d501b31c967e2e01424ba10cd0d0e14d175c08 Mon Sep 17 00:00:00 2001
From: Nikhil Kumar Veldanda <nikhilkv@amazon.com>
Date: Mon, 21 Apr 2025 13:12:30 +0000
Subject: [PATCH v13 8/8] initial draft to address datum leak problem

---
 src/backend/access/heap/heapam.c              |   3 +-
 src/backend/access/table/toast_helper.c       | 227 +++++++++++++++++-
 src/backend/catalog/pg_zstd_dictionaries.c    |   4 +-
 src/test/regress/expected/compression.out     |   8 +-
 .../regress/expected/compression_zstd_1.out   |  60 ++---
 5 files changed, 264 insertions(+), 38 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index c1a4de14a59..0348161432a 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2278,7 +2278,7 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid,
 		Assert(!HeapTupleHasExternal(tup));
 		return tup;
 	}
-	else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
+	else if (HeapTupleHasExternal(tup) || HeapTupleHasVarWidth(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
 		return heap_toast_insert_or_update(relation, tup, NULL, options);
 	else
 		return tup;
@@ -3776,6 +3776,7 @@ l2:
 	else
 		need_toast = (HeapTupleHasExternal(&oldtup) ||
 					  HeapTupleHasExternal(newtup) ||
+					  HeapTupleHasVarWidth(newtup) ||
 					  newtup->t_len > TOAST_TUPLE_THRESHOLD);
 
 	pagefree = PageGetHeapFreeSpace(page);
diff --git a/src/backend/access/table/toast_helper.c b/src/backend/access/table/toast_helper.c
index f4b1cbe494e..2a90ebf77c9 100644
--- a/src/backend/access/table/toast_helper.c
+++ b/src/backend/access/table/toast_helper.c
@@ -19,7 +19,17 @@
 #include "access/toast_internals.h"
 #include "catalog/pg_type_d.h"
 #include "varatt.h"
-
+#include "utils/lsyscache.h"
+#include "access/htup_details.h"
+#include "catalog/pg_type.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/rangetypes.h"
+#include "utils/multirangetypes.h"
+#include "utils/typcache.h"
+#include "miscadmin.h"
+
+static Datum flatten_datum(Datum value, Oid typid);
 
 /*
  * Prepare to TOAST a tuple.
@@ -151,6 +161,25 @@ toast_tuple_init(ToastTupleContext *ttc)
 				ttc->ttc_flags |= (TOAST_NEEDS_CHANGE | TOAST_NEEDS_FREE);
 			}
 
+			if (!(IsCatalogNamespace(ttc->ttc_rel->rd_rel->relnamespace) || IsToastNamespace(ttc->ttc_rel->rd_rel->relnamespace)))
+			{
+				if (!VARATT_IS_SHORT(new_value))
+				{
+					Datum		oldd = PointerGetDatum(new_value);
+					Datum		clean = flatten_datum(oldd, att->atttypid);
+
+					if (DatumGetPointer(clean) != DatumGetPointer(oldd))
+					{
+						if (ttc->ttc_attr[i].tai_oldexternal != NULL)
+							pfree(new_value);
+						new_value = (struct varlena *) DatumGetPointer(clean);
+						ttc->ttc_values[i] = clean;
+						ttc->ttc_attr[i].tai_colflags |= TOASTCOL_NEEDS_FREE;
+						ttc->ttc_flags |= (TOAST_NEEDS_CHANGE | TOAST_NEEDS_FREE);
+					}
+				}
+			}
+
 			/*
 			 * Remember the size of this attribute
 			 */
@@ -346,3 +375,199 @@ toast_delete_external(Relation rel, const Datum *values, const bool *isnull,
 		}
 	}
 }
+
+static Datum
+flatten_datum(Datum value, Oid typid)
+{
+	Oid			basetypid;
+	char		typtype;
+
+	/* initialize at top of this block */
+	basetypid = getBaseType(typid); /* Get basetype for Domain. */
+	typtype = get_typtype(basetypid);
+
+	/* ---------- BASE / ENUM / PSEUDO ----------------------- */
+	if (typtype == TYPTYPE_BASE ||
+		typtype == TYPTYPE_ENUM ||
+		typtype == TYPTYPE_PSEUDO)
+	{
+		if (get_typlen(basetypid) > 0 ||
+			get_typbyval(basetypid))
+			return value;		/* fixed‑len or pass‑by‑val */
+
+		/* ---------- ARRAY ------------------------------------------------ */
+		if (type_is_array(typid))
+		{
+			ArrayType  *arr;
+			TypeCacheEntry *elemCache;
+			Datum	   *elems;
+			bool	   *nulls;
+			int			nitems;
+			int			i;
+			ArrayType  *new_arr;
+
+			arr = DatumGetArrayTypeP(value);
+			elemCache = lookup_type_cache(ARR_ELEMTYPE(arr), 0);
+
+			if (elemCache->typbyval || elemCache->typlen > 0)
+				return PointerGetDatum(arr);
+
+			deconstruct_array(arr,
+							  ARR_ELEMTYPE(arr),
+							  elemCache->typlen,
+							  elemCache->typbyval,
+							  elemCache->typalign,
+							  &elems, &nulls, &nitems);
+
+			if (nitems == 0)
+				return PointerGetDatum(arr);
+
+			for (i = 0; i < nitems; i++)
+			{
+				if (!nulls[i])
+					elems[i] = flatten_datum(elems[i],
+											 ARR_ELEMTYPE(arr));
+			}
+
+			new_arr = construct_md_array(elems, nulls,
+										 ARR_NDIM(arr),
+										 ARR_DIMS(arr),
+										 ARR_LBOUND(arr),
+										 ARR_ELEMTYPE(arr),
+										 elemCache->typlen,
+										 elemCache->typbyval,
+										 elemCache->typalign);
+
+			pfree(elems);
+			pfree(nulls);
+
+			return PointerGetDatum(new_arr);
+		}
+
+		return PointerGetDatum(PG_DETOAST_DATUM(value));
+	}
+
+	/* ---------- COMPOSITE ------------------------------------------- */
+	if (typtype == TYPTYPE_COMPOSITE)
+	{
+		HeapTupleHeader hdr;
+		TupleDesc	td;
+		int			natts;
+		Datum	   *vals;
+		bool	   *nulls;
+		HeapTupleData t;
+		int			i;
+		Form_pg_attribute att;
+		HeapTuple	newt;
+		HeapTupleHeader copy;
+
+		hdr = DatumGetHeapTupleHeader(value);
+
+		if (!(hdr->t_infomask & HEAP_HASVARWIDTH))
+			return PointerGetDatum(hdr);
+
+		td = lookup_rowtype_tupdesc(HeapTupleHeaderGetTypeId(hdr),
+									HeapTupleHeaderGetTypMod(hdr));
+		natts = td->natts;
+
+		vals = palloc(sizeof(Datum) * natts);
+		nulls = palloc(sizeof(bool) * natts);
+
+		t.t_len = VARSIZE(hdr); /* ⚑ portable */
+		t.t_tableOid = InvalidOid;
+		t.t_data = hdr;
+		ItemPointerSetInvalid(&t.t_self);
+
+		heap_deform_tuple(&t, td, vals, nulls);
+
+		for (i = 0; i < natts; i++)
+		{
+			att = TupleDescAttr(td, i);
+			if (att->attisdropped || att->atthasmissing)
+				continue;
+			if (!nulls[i] && att->attlen == -1)
+				vals[i] = flatten_datum(vals[i], att->atttypid);
+		}
+
+		newt = heap_form_tuple(td, vals, nulls);
+		copy = (HeapTupleHeader) palloc(newt->t_len);
+		memcpy(copy, newt->t_data, newt->t_len);
+
+		ReleaseTupleDesc(td);
+		pfree(vals);
+		pfree(nulls);
+		heap_freetuple(newt);
+
+		return PointerGetDatum(copy);
+	}
+
+	/* ---------- RANGE ----------------------------------------------- */
+	if (typtype == TYPTYPE_RANGE)
+	{
+		RangeType  *r;
+		TypeCacheEntry *tc;
+		RangeBound	l;
+		RangeBound	u;
+		bool		empty;
+
+		r = DatumGetRangeTypeP(value);
+		tc = lookup_type_cache(basetypid, TYPECACHE_RANGE_INFO);
+
+		range_deserialize(tc, r, &l, &u, &empty);
+
+		if (!empty &&
+			!(tc->rngelemtype->typbyval ||
+			  tc->rngelemtype->typlen > 0))
+		{
+			if (!l.infinite)
+				l.val = flatten_datum(l.val,
+									  tc->rngelemtype->type_id);
+			if (!u.infinite)
+				u.val = flatten_datum(u.val,
+									  tc->rngelemtype->type_id);
+			return PointerGetDatum(make_range(tc, &l, &u, empty, NULL));
+		}
+
+		return PointerGetDatum(r);
+	}
+
+	/* ---------- MULTIRANGE ---------------------------- */
+	if (typtype == TYPTYPE_MULTIRANGE)
+	{
+		MultirangeType *mr;
+		TypeCacheEntry *tc;
+		int32		rangeCount;
+		RangeType **ranges;
+		RangeType **out;
+		MultirangeType *newmr;
+		int			i;
+
+		mr = DatumGetMultirangeTypeP(value);
+		tc = lookup_type_cache(basetypid,
+							   TYPECACHE_MULTIRANGE_INFO);
+
+		multirange_deserialize(tc->rngtype, mr,
+							   &rangeCount, &ranges);
+
+		out = palloc(sizeof(RangeType *) * rangeCount);
+
+		for (i = 0; i < rangeCount; i++)
+		{
+			out[i] = DatumGetRangeTypeP(
+										flatten_datum(PointerGetDatum(ranges[i]),
+													  RangeTypeGetOid(ranges[i])));
+		}
+
+		newmr = make_multirange(MultirangeTypeGetOid(mr),
+								tc->rngtype,
+								rangeCount, out);
+		pfree(out);
+
+		return PointerGetDatum(newmr);
+	}
+
+	elog(ERROR, "flatten_datum: unsupported type %u", typid);
+	pg_unreachable();
+	/* keep compiler happy: */
+	return (Datum) 0;
+}
diff --git a/src/backend/catalog/pg_zstd_dictionaries.c b/src/backend/catalog/pg_zstd_dictionaries.c
index 08a6883ecd4..63c2a34190a 100644
--- a/src/backend/catalog/pg_zstd_dictionaries.c
+++ b/src/backend/catalog/pg_zstd_dictionaries.c
@@ -410,7 +410,7 @@ range_typzstdsampling(PG_FUNCTION_ARGS)
 	bool		empty;
 
 	/* Get information about range type; note column might be a domain */
-	TypeCacheEntry *typcache = range_get_typcache(fcinfo, getBaseType(range->rangetypid));
+	TypeCacheEntry *typcache = range_get_typcache(fcinfo, RangeTypeGetOid(range));
 
 	/* If the type does not supply a builder, skip */
 	if (!OidIsValid(typcache->rngelemtype->typzstdsampling))
@@ -436,7 +436,7 @@ multirange_typzstdsampling(PG_FUNCTION_ARGS)
 	RangeType **ranges;
 
 	/* Get information about multirange type; note column might be a domain */
-	TypeCacheEntry *typcache = multirange_get_typcache(fcinfo, getBaseType(mrange->multirangetypid));
+	TypeCacheEntry *typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mrange));
 
 	/* If the type does not supply a builder, skip */
 	if (!OidIsValid(typcache->rngtype->typzstdsampling))
diff --git a/src/test/regress/expected/compression.out b/src/test/regress/expected/compression.out
index 94495388ade..29ef885e516 100644
--- a/src/test/regress/expected/compression.out
+++ b/src/test/regress/expected/compression.out
@@ -69,7 +69,7 @@ SELECT pg_column_compression(f1) FROM cmmove3;
  pg_column_compression 
 -----------------------
  pglz
- lz4
+ pglz
 (2 rows)
 
 -- test LIKE INCLUDING COMPRESSION
@@ -97,7 +97,7 @@ UPDATE cmmove2 SET f1 = cmdata1.f1 FROM cmdata1;
 SELECT pg_column_compression(f1) FROM cmmove2;
  pg_column_compression 
 -----------------------
- lz4
+ pglz
 (1 row)
 
 -- test externally stored compressed data
@@ -200,8 +200,8 @@ SELECT pg_column_compression(f1) FROM cmdata1;
 SELECT pg_column_compression(x) FROM compressmv;
  pg_column_compression 
 -----------------------
- lz4
- lz4
+ pglz
+ pglz
 (2 rows)
 
 -- test compression with partition
diff --git a/src/test/regress/expected/compression_zstd_1.out b/src/test/regress/expected/compression_zstd_1.out
index c1a7936e574..260b9fd2b17 100644
--- a/src/test/regress/expected/compression_zstd_1.out
+++ b/src/test/regress/expected/compression_zstd_1.out
@@ -139,36 +139,36 @@ SELECT pg_column_compression(f1) AS mv_compression
 FROM compressmv_zstd;
  mv_compression 
 ----------------
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
- zstd
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
+ pglz
 (30 rows)
 
 SELECT objid::regclass, refobjid from pg_depend where refclassid = 9946;
-- 
2.47.1

