From 7466ff936bd0829a622125eb351c6df57c4c4f3d Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <boekewurm+postgres@gmail.com>
Date: Wed, 3 Jan 2024 01:39:42 +0100
Subject: [PATCH v1 2/7] pg_node_tree: reset node->location before catalog's
 serialization

We don't store original query texts, so any lingering "location" value
can only be useful in forensic debugging. In normal operation, however,
a non-default value will show up as measurable overhead in
serialization, so we reset it to its default (-1), saving several
10s of kB.
---
 src/backend/catalog/heap.c           | 11 +++-
 src/backend/catalog/index.c          |  2 +
 src/backend/catalog/pg_attrdef.c     |  5 +-
 src/backend/catalog/pg_proc.c        |  6 ++
 src/backend/catalog/pg_publication.c |  4 ++
 src/backend/commands/policy.c        |  7 ++
 src/backend/commands/statscmds.c     |  1 +
 src/backend/commands/trigger.c       |  2 +
 src/backend/commands/typecmds.c      |  6 +-
 src/backend/nodes/nodeFuncs.c        | 95 ++++++++++++++++++++++++++++
 src/backend/rewrite/rewriteDefine.c  |  8 +++
 src/include/nodes/nodeFuncs.h        |  2 +
 12 files changed, 146 insertions(+), 3 deletions(-)

diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index b93894889d..419d4ab30e 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -2062,8 +2062,10 @@ StoreRelCheck(Relation rel, const char *ccname, Node *expr,
 	Oid			constrOid;
 
 	/*
-	 * Flatten expression to string form for storage.
+	 * Remove references into the (to be dropped) query string, and
+	 * flatten expression to string form for storage.
 	 */
+	reset_querytext_references(expr, NULL);
 	ccbin = nodeToString(expr);
 
 	/*
@@ -3676,6 +3678,7 @@ StorePartitionKey(Relation rel,
 	{
 		char	   *exprString;
 
+		reset_querytext_references((Node *) partexprs, NULL);
 		exprString = nodeToString(partexprs);
 		partexprDatum = CStringGetTextDatum(exprString);
 		pfree(exprString);
@@ -3834,6 +3837,12 @@ StorePartitionBound(Relation rel, Relation parent, PartitionBoundSpec *bound)
 	memset(new_val, 0, sizeof(new_val));
 	memset(new_null, false, sizeof(new_null));
 	memset(new_repl, false, sizeof(new_repl));
+
+	/*
+	 * We don't keep the original query text around, so remove any
+	 * references to it to reduce its stored size.
+	 */
+	reset_querytext_references((Node *) bound, NULL);
 	new_val[Anum_pg_class_relpartbound - 1] = CStringGetTextDatum(nodeToString(bound));
 	new_null[Anum_pg_class_relpartbound - 1] = false;
 	new_repl[Anum_pg_class_relpartbound - 1] = true;
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 7b186c0220..e01ff9cf7f 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -588,6 +588,7 @@ UpdateIndexRelation(Oid indexoid,
 	{
 		char	   *exprsString;
 
+		reset_querytext_references((Node *) indexInfo->ii_Expressions, NULL);
 		exprsString = nodeToString(indexInfo->ii_Expressions);
 		exprsDatum = CStringGetTextDatum(exprsString);
 		pfree(exprsString);
@@ -603,6 +604,7 @@ UpdateIndexRelation(Oid indexoid,
 	{
 		char	   *predString;
 
+		reset_querytext_references((Node *) indexInfo->ii_Predicate, NULL);
 		predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
 		predDatum = CStringGetTextDatum(predString);
 		pfree(predString);
diff --git a/src/backend/catalog/pg_attrdef.c b/src/backend/catalog/pg_attrdef.c
index ade0b6d8e6..49cb869e3c 100644
--- a/src/backend/catalog/pg_attrdef.c
+++ b/src/backend/catalog/pg_attrdef.c
@@ -23,6 +23,7 @@
 #include "catalog/objectaccess.h"
 #include "catalog/pg_attrdef.h"
 #include "executor/executor.h"
+#include "nodes/nodeFuncs.h"
 #include "optimizer/optimizer.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
@@ -62,8 +63,10 @@ StoreAttrDefault(Relation rel, AttrNumber attnum,
 	adrel = table_open(AttrDefaultRelationId, RowExclusiveLock);
 
 	/*
-	 * Flatten expression to string form for storage.
+	 * Reset any references to the original query text (which isn't stored),
+	 * and flatten the expression to string form for storage.
 	 */
+	reset_querytext_references(expr, NULL);
 	adbin = nodeToString(expr);
 
 	/*
diff --git a/src/backend/catalog/pg_proc.c b/src/backend/catalog/pg_proc.c
index b5fd364003..ca0d1f6174 100644
--- a/src/backend/catalog/pg_proc.c
+++ b/src/backend/catalog/pg_proc.c
@@ -331,7 +331,10 @@ ProcedureCreate(const char *procedureName,
 	else
 		nulls[Anum_pg_proc_proargnames - 1] = true;
 	if (parameterDefaults != NIL)
+	{
+		reset_querytext_references((Node *) parameterDefaults, NULL);
 		values[Anum_pg_proc_proargdefaults - 1] = CStringGetTextDatum(nodeToString(parameterDefaults));
+	}
 	else
 		nulls[Anum_pg_proc_proargdefaults - 1] = true;
 	if (trftypes != PointerGetDatum(NULL))
@@ -344,7 +347,10 @@ ProcedureCreate(const char *procedureName,
 	else
 		nulls[Anum_pg_proc_probin - 1] = true;
 	if (prosqlbody)
+	{
+		reset_querytext_references(prosqlbody, NULL);
 		values[Anum_pg_proc_prosqlbody - 1] = CStringGetTextDatum(nodeToString(prosqlbody));
+	}
 	else
 		nulls[Anum_pg_proc_prosqlbody - 1] = true;
 	if (proconfig != PointerGetDatum(NULL))
diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c
index c488b6370b..d4b772e543 100644
--- a/src/backend/catalog/pg_publication.c
+++ b/src/backend/catalog/pg_publication.c
@@ -36,6 +36,7 @@
 #include "commands/publicationcmds.h"
 #include "funcapi.h"
 #include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/catcache.h"
@@ -422,7 +423,10 @@ publication_add_relation(Oid pubid, PublicationRelInfo *pri,
 
 	/* Add qualifications, if available */
 	if (pri->whereClause != NULL)
+	{
+		reset_querytext_references(pri->whereClause, NULL);
 		values[Anum_pg_publication_rel_prqual - 1] = CStringGetTextDatum(nodeToString(pri->whereClause));
+	}
 	else
 		nulls[Anum_pg_publication_rel_prqual - 1] = true;
 
diff --git a/src/backend/commands/policy.c b/src/backend/commands/policy.c
index 76a45e56bf..ce01336ad3 100644
--- a/src/backend/commands/policy.c
+++ b/src/backend/commands/policy.c
@@ -30,6 +30,7 @@
 #include "commands/policy.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
 #include "nodes/pg_list.h"
 #include "parser/parse_clause.h"
 #include "parser/parse_collate.h"
@@ -701,13 +702,19 @@ CreatePolicy(CreatePolicyStmt *stmt)
 
 	/* Add qual if present. */
 	if (qual)
+	{
+		reset_querytext_references(qual, NULL);
 		values[Anum_pg_policy_polqual - 1] = CStringGetTextDatum(nodeToString(qual));
+	}
 	else
 		isnull[Anum_pg_policy_polqual - 1] = true;
 
 	/* Add WITH CHECK qual if present */
 	if (with_check_qual)
+	{
+		reset_querytext_references(with_check_qual, NULL);
 		values[Anum_pg_policy_polwithcheck - 1] = CStringGetTextDatum(nodeToString(with_check_qual));
+	}
 	else
 		isnull[Anum_pg_policy_polwithcheck - 1] = true;
 
diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c
index 20be564df2..c9e85f0af8 100644
--- a/src/backend/commands/statscmds.c
+++ b/src/backend/commands/statscmds.c
@@ -477,6 +477,7 @@ CreateStatistics(CreateStatsStmt *stmt)
 	{
 		char	   *exprsString;
 
+		reset_querytext_references((Node *) stxexprs, NULL);
 		exprsString = nodeToString(stxexprs);
 		exprsDatum = CStringGetTextDatum(exprsString);
 		pfree(exprsString);
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 52177759ab..b1ffa8b17f 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -39,6 +39,7 @@
 #include "miscadmin.h"
 #include "nodes/bitmapset.h"
 #include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
 #include "optimizer/optimizer.h"
 #include "parser/parse_clause.h"
 #include "parser/parse_collate.h"
@@ -674,6 +675,7 @@ CreateTriggerFiringOn(CreateTrigStmt *stmt, const char *queryString,
 		/* we'll need the rtable for recordDependencyOnExpr */
 		whenRtable = pstate->p_rtable;
 
+		reset_querytext_references(whenClause, NULL);
 		qual = nodeToString(whenClause);
 
 		free_parsestate(pstate);
diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c
index aaf9728697..b76f1d1c9f 100644
--- a/src/backend/commands/typecmds.c
+++ b/src/backend/commands/typecmds.c
@@ -58,6 +58,7 @@
 #include "executor/executor.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
 #include "optimizer/optimizer.h"
 #include "parser/parse_coerce.h"
 #include "parser/parse_collate.h"
@@ -924,6 +925,7 @@ DefineDomain(CreateDomainStmt *stmt)
 						defaultValue =
 							deparse_expression(defaultExpr,
 											   NIL, false, false);
+						reset_querytext_references(defaultExpr, NULL);
 						defaultValueBin = nodeToString(defaultExpr);
 					}
 				}
@@ -3506,8 +3508,10 @@ domainAddConstraint(Oid domainOid, Oid domainNamespace, Oid baseTypeOid,
 				 errmsg("cannot use table references in domain check constraint")));
 
 	/*
-	 * Convert to string form for storage.
+	 * Remove location references into the original query string (which won't
+	 * be stored) and convert to string form for storage.
 	 */
+	reset_querytext_references(expr, NULL);
 	ccbin = nodeToString(expr);
 
 	/*
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c
index c03f4f23e2..44a922464b 100644
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -1903,6 +1903,101 @@ check_functions_in_node(Node *node, check_function_callback checker,
 	return false;
 }
 
+/*
+ * Reset all query text related fields to their original value.
+ *
+ * This is used for reducing the size of nodeToText, like in pg_rewrite.
+ */
+bool
+reset_querytext_references(Node *node, void *context)
+{
+	const int query_recurse_flags = 0;
+	ListCell   *temp;
+
+#define RESET_FOR(_type_) \
+	case T_##_type_: \
+	{ \
+		castNode(_type_, node)->location = -1; \
+		break; \
+	}
+
+	if (node == NULL)
+		return false;
+
+	if (IsA(node, Query))
+	{
+		Query *query = castNode(Query, node);
+		query->stmt_location = -1;
+		query->stmt_len = 0;
+
+		return query_tree_walker(query, reset_querytext_references, context,
+								 query_recurse_flags);
+	}
+
+	switch (nodeTag(node))
+	{
+	RESET_FOR(RangeVar);
+	RESET_FOR(TableFunc);
+	RESET_FOR(Var);
+	RESET_FOR(Const);
+	RESET_FOR(Param);
+	RESET_FOR(Aggref);
+	RESET_FOR(GroupingFunc);
+	RESET_FOR(WindowFunc);
+	RESET_FOR(FuncExpr);
+	RESET_FOR(NamedArgExpr);
+	RESET_FOR(OpExpr);
+	RESET_FOR(DistinctExpr);
+	RESET_FOR(NullIfExpr);
+	RESET_FOR(ScalarArrayOpExpr);
+	RESET_FOR(BoolExpr);
+	RESET_FOR(SubLink);
+	RESET_FOR(RelabelType);
+	RESET_FOR(CoerceViaIO);
+	RESET_FOR(ArrayCoerceExpr);
+	RESET_FOR(ConvertRowtypeExpr);
+	RESET_FOR(CollateExpr);
+	RESET_FOR(CaseWhen);
+	RESET_FOR(ArrayExpr);
+	RESET_FOR(RowExpr);
+	RESET_FOR(CoalesceExpr);
+	RESET_FOR(MinMaxExpr);
+	RESET_FOR(SQLValueFunction);
+	RESET_FOR(XmlExpr);
+	RESET_FOR(JsonFormat);
+	RESET_FOR(JsonConstructorExpr);
+	RESET_FOR(JsonIsPredicate);
+	RESET_FOR(NullTest);
+	RESET_FOR(BooleanTest);
+	RESET_FOR(CoerceToDomain);
+	RESET_FOR(CoerceToDomainValue);
+	RESET_FOR(SetToDefault);
+	case T_CaseExpr:
+	{
+		/*
+		 * The expression_tree_walker does not call us for CaseWhen nodes,
+		 * but instead directly wires us through to its inner Nodes.
+		 * To correctly reset the location fields, we manually iterate
+		 * to get those fields reset.
+		 */
+		CaseExpr *caseExpr = castNode(CaseExpr, node);
+		caseExpr->location = -1;
+		foreach(temp, caseExpr->args)
+		{
+			CaseWhen *when = lfirst_node(CaseWhen, temp);
+			when->location = -1;
+		}
+		break;
+	}
+	default:
+		break;
+	}
+
+	return expression_tree_walker(node, reset_querytext_references,
+								  (void *) context);
+#undef RESET_FOR
+}
+
 
 /*
  * Standard expression-tree walking support
diff --git a/src/backend/rewrite/rewriteDefine.c b/src/backend/rewrite/rewriteDefine.c
index e36fc72e1e..57650935ac 100644
--- a/src/backend/rewrite/rewriteDefine.c
+++ b/src/backend/rewrite/rewriteDefine.c
@@ -474,6 +474,14 @@ DefineQueryRewrite(const char *rulename,
 	/* discard rule if it's null action and not INSTEAD; it's a no-op */
 	if (action != NIL || is_instead)
 	{
+		/*
+		 * Clear location info from the action and event_qual data. We don't
+		 * store the original query, so keeping track of this information is
+		 * meaningless and hinders serialization/compression efforts.
+		 */
+		reset_querytext_references(event_qual, NULL);
+		reset_querytext_references((Node *) action, NULL);
+
 		ruleId = InsertRule(rulename,
 							event_type,
 							event_relid,
diff --git a/src/include/nodes/nodeFuncs.h b/src/include/nodes/nodeFuncs.h
index 20921b45b9..f513b2ba37 100644
--- a/src/include/nodes/nodeFuncs.h
+++ b/src/include/nodes/nodeFuncs.h
@@ -139,6 +139,8 @@ get_notclausearg(const void *notclause)
 extern bool check_functions_in_node(Node *node, check_function_callback checker,
 									void *context);
 
+extern bool reset_querytext_references(Node *node, void *context);
+
 /*
  * The following functions are usually passed walker or mutator callbacks
  * that are declared like "bool walker(Node *node, my_struct *context)"
-- 
2.40.1

