From 240f738907b28912f177ef95906ec5465589fc8a Mon Sep 17 00:00:00 2001
From: Hou Zhijie <houzj.fnst@cn.fujitsu.com>
Date: Wed, 4 Dec 2024 15:13:03 +0800
Subject: [PATCH v2] Fix memory leak in pgoutput with publication list cache

The pgoutput module caches publication names in a list and frees the list upon
invalidation but forgot to free the actual publication names within the list
elements.

Additionally, these cached publication names are allocated under the
CacheMemoryContext, which does not get explicitly freed at the end of decoding.
While this is not an issue for logical decoding in walsender, as the process
exits at the end of decoding, it poses a memory leak risk when decoding via SQL
APIs like pg_logical_slot_peek_changes. Since these APIs creates a separate
decoding context with each execution, failing to free the publication names
results in a memory leak.

To address this, we create a separate memory context within the logical
decoding context and reset it each time the publication names cache is
invalidated. This ensures that the lifespan of the publication names aligns
with that of the logical decoding context.
---
 src/backend/replication/pgoutput/pgoutput.c | 19 ++++++++++---------
 src/include/replication/pgoutput.h          |  2 ++
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c
index 5e23453f07..b50b3d62e3 100644
--- a/src/backend/replication/pgoutput/pgoutput.c
+++ b/src/backend/replication/pgoutput/pgoutput.c
@@ -436,6 +436,10 @@ pgoutput_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt,
 										   "logical replication cache context",
 										   ALLOCSET_DEFAULT_SIZES);
 
+	data->pubctx = AllocSetContextCreate(ctx->context,
+										 "logical replication publication list context",
+										 ALLOCSET_SMALL_SIZES);
+
 	ctx->output_plugin_private = data;
 
 	/* This plugin uses binary protocol. */
@@ -1734,9 +1738,9 @@ pgoutput_origin_filter(LogicalDecodingContext *ctx,
 /*
  * Shutdown the output plugin.
  *
- * Note, we don't need to clean the data->context and data->cachectx as
- * they are child contexts of the ctx->context so they will be cleaned up by
- * logical decoding machinery.
+ * Note, we don't need to clean the data->context, data->cachectx, and
+ * data->pubctx as they are child contexts of the ctx->context so they
+ * will be cleaned up by logical decoding machinery.
  */
 static void
 pgoutput_shutdown(LogicalDecodingContext *ctx)
@@ -2063,12 +2067,9 @@ get_rel_sync_entry(PGOutputData *data, Relation relation)
 		/* Reload publications if needed before use. */
 		if (!publications_valid)
 		{
-			oldctx = MemoryContextSwitchTo(CacheMemoryContext);
-			if (data->publications)
-			{
-				list_free_deep(data->publications);
-				data->publications = NIL;
-			}
+			MemoryContextReset(data->pubctx);
+
+			oldctx = MemoryContextSwitchTo(data->pubctx);
 			data->publications = LoadPublications(data->publication_names);
 			MemoryContextSwitchTo(oldctx);
 			publications_valid = true;
diff --git a/src/include/replication/pgoutput.h b/src/include/replication/pgoutput.h
index 89f94e1147..93e66eaa5d 100644
--- a/src/include/replication/pgoutput.h
+++ b/src/include/replication/pgoutput.h
@@ -20,6 +20,8 @@ typedef struct PGOutputData
 	MemoryContext context;		/* private memory context for transient
 								 * allocations */
 	MemoryContext cachectx;		/* private memory context for cache data */
+	MemoryContext pubctx;		/* private memory context for
+								 * publication_names */
 
 	bool		in_streaming;	/* true if we are streaming a chunk of
 								 * transaction */
-- 
2.31.1

