From e62f7a1b15dc58a081a1c724a8207526b7037bf0 Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <boekewurm+postgres@gmail.com>
Date: Sat, 10 Feb 2024 00:57:19 +0100
Subject: [PATCH v3 6/7] nodeToString: Apply RLE on Bitmapset and numeric List
 types

This reduces the size of full serialized queries in pg_rewrite by several %,
reducing overhead in the system.
---
 src/backend/nodes/outfuncs.c  | 96 +++++++++++++++++++++++++++++++----
 src/backend/nodes/read.c      | 53 +++++++++++++++++--
 src/backend/nodes/readfuncs.c | 17 ++++++-
 3 files changed, 150 insertions(+), 16 deletions(-)

diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 1a17eafd57..f135753fb4 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -378,23 +378,64 @@ writeNodeArray(StringInfo str, const Node *const *arr, int len,
 
 /*
  * Print a List.
+ *
+ * Notes:
+ * NodeList is formatted as (<node1> <node2> ...)
+ *
+ * OidList is formatted as (o int +int int int ...), with +int indicating N
+ * successive identical values.
+ *
+ * IntList and XidList are formatted as (i/x int +int int int ...), with +int
+ * indicating N successively increasing values. (i 9 +3) is thus equivalent
+ * to (i 9 10 11 12).
  */
 static void
 _outList(StringInfo str, const List *node, bool omitLocation)
 {
 	const ListCell *lc;
+	union LCSurrogate {
+		int32 i;
+		Oid o;
+		TransactionId x;
+	}				previous = { .i = 0};
+	int				run = 0;
+	int				run_delta;
+	const char	   *fmt;
 
 	appendStringInfoChar(str, '(');
 
 	if (IsA(node, IntList))
+	{
 		appendStringInfoChar(str, 'i');
+		fmt = " %d";
+		run_delta = 1;
+	}
 	else if (IsA(node, OidList))
+	{
 		appendStringInfoChar(str, 'o');
+		fmt = " %u";
+		run_delta = 0;
+	}
 	else if (IsA(node, XidList))
+	{
 		appendStringInfoChar(str, 'x');
+		fmt = " %u";
+		run_delta = 1;
+	}
+	else if (IsA(node, List))
+	{
+		/* silence the compiler about uninitialized variables */
+		fmt = "";
+		run_delta = 0;
+	}
+	else
+		elog(ERROR, "unrecognized list node type: %d",
+			 (int) node->type);
 
 	foreach(lc, node)
 	{
+		union LCSurrogate val = {.i = 0};
+
 		/*
 		 * For the sake of backward compatibility, we emit a slightly
 		 * different whitespace format for lists of nodes vs. other types of
@@ -405,26 +446,41 @@ _outList(StringInfo str, const List *node, bool omitLocation)
 			outNode(str, lfirst(lc), omitLocation);
 			if (lnext(node, lc))
 				appendStringInfoChar(str, ' ');
+			continue;
 		}
 		else if (IsA(node, IntList))
-			appendStringInfo(str, " %d", lfirst_int(lc));
+			val.i = lfirst_int(lc);
 		else if (IsA(node, OidList))
-			appendStringInfo(str, " %u", lfirst_oid(lc));
+			val.o = lfirst_oid(lc);
 		else if (IsA(node, XidList))
-			appendStringInfo(str, " %u", lfirst_xid(lc));
+			val.x = lfirst_xid(lc);
+
+		if (val.i == previous.i + run_delta)
+			run += 1;
 		else
-			elog(ERROR, "unrecognized list node type: %d",
-				 (int) node->type);
+		{
+			if (run > 0)
+				appendStringInfo(str, " +%d", run);
+			run = 0;
+			appendStringInfo(str, fmt, val.i);
+		}
+		previous = val;
 	}
 
-	appendStringInfoChar(str, ')');
-}
+	if (run > 0)
+		appendStringInfo(str, " +%d", run);
+
+	appendStringInfoChar(str, ')');}
 
 /*
  * outBitmapset -
  *	   converts a bitmap set of integers
  *
- * Note: the output format is "(b int int ...)", similar to an integer List.
+ * Note: the output format is "(b int int +int ...)", similar to an
+ * integer List. Note that consecutive runs of incremental values are
+ * indicated by [... int +int ...], where the first int is the first set bit,
+ * and the int that's tagged with the '+' sign that follows is the number of
+ * subsequent set bits (resulting in a run of N+1 set bits).
  *
  * We export this function for use by extensions that define extensible nodes.
  * That's somewhat historical, though, because calling outNode() will work.
@@ -432,13 +488,31 @@ _outList(StringInfo str, const List *node, bool omitLocation)
 void
 outBitmapset(StringInfo str, const Bitmapset *bms)
 {
-	int			x;
+	int			x = 0;
+	int			prev = -2;
+	int			run = 0;
 
 	appendStringInfoChar(str, '(');
 	appendStringInfoChar(str, 'b');
-	x = -1;
+
 	while ((x = bms_next_member(bms, x)) >= 0)
-		appendStringInfo(str, " %d", x);
+	{
+		if (x - prev == 1)
+			run++;
+		else
+		{
+			if (run != 0)
+				appendStringInfo(str, " +%d", run);
+
+			run = 0;
+			appendStringInfo(str, " %d", x);
+		}
+		prev = x;
+	}
+
+	if (run > 0)
+		appendStringInfo(str, " +%d", run);
+
 	appendStringInfoChar(str, ')');
 }
 
diff --git a/src/backend/nodes/read.c b/src/backend/nodes/read.c
index d9dcb5865a..34dac773d8 100644
--- a/src/backend/nodes/read.c
+++ b/src/backend/nodes/read.c
@@ -403,6 +403,8 @@ nodeRead(const char *token, int tok_len)
 					elog(ERROR, "unterminated List structure");
 				if (tok_len == 1 && token[0] == 'i')
 				{
+					int		prev = 0;
+
 					/* List of integers */
 					for (;;)
 					{
@@ -418,12 +420,23 @@ nodeRead(const char *token, int tok_len)
 						if (endptr != token + tok_len)
 							elog(ERROR, "unrecognized integer: \"%.*s\"",
 								 tok_len, token);
-						l = lappend_int(l, val);
+
+						if (token[0] == '+')
+						{
+							for (int i = 0; i < val; i++)
+								l = lappend_int(l, ++prev);
+						}
+						else
+						{
+							prev = val;
+							l = lappend_int(l, val);
+						}
 					}
 					result = (Node *) l;
 				}
 				else if (tok_len == 1 && token[0] == 'o')
 				{
+					Oid		prev = 0;
 					/* List of OIDs */
 					for (;;)
 					{
@@ -439,12 +452,23 @@ nodeRead(const char *token, int tok_len)
 						if (endptr != token + tok_len)
 							elog(ERROR, "unrecognized OID: \"%.*s\"",
 								 tok_len, token);
-						l = lappend_oid(l, val);
+
+						if (token[0] == '+')
+						{
+							for (int i = 0; i < val; i++)
+								l = lappend_oid(l, prev);
+						}
+						else
+						{
+							prev = val;
+							l = lappend_oid(l, val);
+						}
 					}
 					result = (Node *) l;
 				}
 				else if (tok_len == 1 && token[0] == 'x')
 				{
+					TransactionId	prev = 0;
 					/* List of TransactionIds */
 					for (;;)
 					{
@@ -460,7 +484,17 @@ nodeRead(const char *token, int tok_len)
 						if (endptr != token + tok_len)
 							elog(ERROR, "unrecognized Xid: \"%.*s\"",
 								 tok_len, token);
-						l = lappend_xid(l, val);
+
+						if (token[0] == '+')
+						{
+							for (int i = 0; i < val; i++)
+								l = lappend_xid(l, ++prev);
+						}
+						else
+						{
+							prev = val;
+							l = lappend_xid(l, val);
+						}
 					}
 					result = (Node *) l;
 				}
@@ -468,6 +502,7 @@ nodeRead(const char *token, int tok_len)
 				{
 					/* Bitmapset -- see also _readBitmapset() */
 					Bitmapset  *bms = NULL;
+					int			prev = -2;
 
 					for (;;)
 					{
@@ -483,7 +518,17 @@ nodeRead(const char *token, int tok_len)
 						if (endptr != token + tok_len)
 							elog(ERROR, "unrecognized integer: \"%.*s\"",
 								 tok_len, token);
-						bms = bms_add_member(bms, val);
+						
+						if (token[0] == '+')
+						{
+							for (int i = 0; i < val; i++)
+								bms = bms_add_member(bms, ++prev);
+						}
+						else
+						{
+							bms = bms_add_member(bms, val);
+							prev = val;
+						}
 					}
 					result = (Node *) bms;
 				}
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 4cdbad9e7e..55e40f5603 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -298,6 +298,7 @@ static Bitmapset *
 _readBitmapset(void)
 {
 	Bitmapset  *result = NULL;
+	int			prev = -2;
 
 	READ_TEMP_LOCALS();
 
@@ -326,7 +327,21 @@ _readBitmapset(void)
 		val = (int) strtol(token, &endptr, 10);
 		if (endptr != token + length)
 			elog(ERROR, "unrecognized integer: \"%.*s\"", length, token);
-		result = bms_add_member(result, val);
+
+		if (token[0] == '+')
+		{
+			Assert(prev >= 0);
+			for (int i = 0; i < val; i++)
+			{
+				prev++;
+				result = bms_add_member(result, prev);
+			}
+		}
+		else
+		{
+			result = bms_add_member(result, val);
+			prev = val;
+		}
 	}
 
 	return result;
-- 
2.40.1

