From 074619b0658d1160e7c2110b67288f47118063bb Mon Sep 17 00:00:00 2001
From: Sami Imseih <simseih@amazon.com>
Date: Mon, 26 May 2025 22:11:46 -0500
Subject: [PATCH v6 3/4] Fix Normalization for squashed query texts

62d712ec added the ability to squash constants from an
IN list/ArrayExpr for queryId computation purposes. However,
in certain cases, this broke normalization. For example,
"IN (1, 2, int4(1))" is normalized to "IN ($2 /*, ... */))",
which leaves an extra parenthesis at the end of the normalized string.

To correct this, the start and end boundaries of an expr_list are
now tracked by the various nodes used during parsing and are made
available to the ArrayExpr node for query jumbling. Having these
boundaries allows normalization to precisely identify the locations
in the query text that should be squashed.
---
 .../pg_stat_statements/expected/squashing.out | 44 +++++----
 .../pg_stat_statements/pg_stat_statements.c   | 76 ++++-----------
 contrib/pg_stat_statements/sql/squashing.sql  |  5 +
 src/backend/nodes/gen_node_support.pl         |  2 +-
 src/backend/nodes/queryjumblefuncs.c          | 84 +++++++++--------
 src/backend/parser/gram.y                     | 94 +++++++++++++------
 src/backend/parser/parse_expr.c               |  4 +
 src/include/nodes/parsenodes.h                |  4 +
 src/include/nodes/primnodes.h                 |  4 +
 9 files changed, 174 insertions(+), 143 deletions(-)

diff --git a/contrib/pg_stat_statements/expected/squashing.out b/contrib/pg_stat_statements/expected/squashing.out
index 725238d3f5c..f3f212183a2 100644
--- a/contrib/pg_stat_statements/expected/squashing.out
+++ b/contrib/pg_stat_statements/expected/squashing.out
@@ -82,6 +82,24 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
  SELECT pg_stat_statements_reset() IS NOT NULL AS t                  |     1
 (2 rows)
 
+-- built-in functions will be squashed
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT WHERE 1 IN (1, 2, int4(1), int4(2));
+--
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                       query                        | calls 
+----------------------------------------------------+-------
+ SELECT WHERE $1 IN ($2 /*, ... */)                 |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
+(2 rows)
+
 -- Multiple squashed intervals
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
  t 
@@ -246,7 +264,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
                        query                        | calls 
 ----------------------------------------------------+-------
  SELECT * FROM test_squash_bigint WHERE data IN    +|     1
-         ($1 /*, ... */::bigint)                    | 
+         ($1 /*, ... */)                            | 
  SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
 (2 rows)
 
@@ -343,7 +361,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
                        query                        | calls 
 ----------------------------------------------------+-------
  SELECT * FROM test_squash_cast WHERE data IN      +|     1
-         ($1 /*, ... */::int4::casttesttype)        | 
+         ($1 /*, ... */)                            | 
  SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
 (2 rows)
 
@@ -367,7 +385,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
                        query                        | calls 
 ----------------------------------------------------+-------
  SELECT * FROM test_squash_jsonb WHERE data IN     +|     1
-         (($1 /*, ... */)::jsonb)                   | 
+         ($1 /*, ... */)                            | 
  SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
 (2 rows)
 
@@ -441,7 +459,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
                                query                                | calls 
 --------------------------------------------------------------------+-------
  SELECT * FROM test_squash WHERE id IN                             +|     1
-         ($1 /*, ... */::oid)                                       | 
+         ($1 /*, ... */)                                            | 
  SELECT * FROM test_squash WHERE id IN ($1::oid, $2::oid::int::oid) |     1
  SELECT pg_stat_statements_reset() IS NOT NULL AS t                 |     1
 (3 rows)
@@ -522,7 +540,7 @@ SELECT ARRAY[1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
                        query                        | calls 
 ----------------------------------------------------+-------
- SELECT ARRAY[$1 /*, ... */::oid]                   |     1
+ SELECT ARRAY[$1 /*, ... */]                        |     1
  SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
 (2 rows)
 
@@ -546,9 +564,7 @@ SELECT ARRAY[
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
                        query                        | calls 
 ----------------------------------------------------+-------
- SELECT ARRAY[                                     +|     1
-     ($1 /*, ... */)::jsonb                        +| 
- ]                                                  | 
+ SELECT ARRAY[$1 /*, ... */]                        |     1
  SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
 (2 rows)
 
@@ -573,9 +589,7 @@ SELECT ARRAY[
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
                        query                        | calls 
 ----------------------------------------------------+-------
- SELECT ARRAY[                                     +|     1
-         $1 /*, ... */::int4::casttesttype         +| 
- ]                                                  | 
+ SELECT ARRAY[$1 /*, ... */]                        |     1
  SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
 (2 rows)
 
@@ -654,9 +668,7 @@ SELECT ARRAY[
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
                        query                        | calls 
 ----------------------------------------------------+-------
- SELECT ARRAY[                                     +|     1
-         $1 /*, ... */::bigint                     +| 
- ]                                                  | 
+ SELECT ARRAY[$1 /*, ... */]                        |     1
  SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
 (2 rows)
 
@@ -681,7 +693,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
                        query                        | calls 
 ----------------------------------------------------+-------
  SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
- select where $1 IN ($2 /*, ... */::int)            |     1
+ select where $1 IN ($2 /*, ... */)                 |     1
  select where $1 IN ($2::int, $3::int::text)        |     1
 (3 rows)
 
@@ -705,6 +717,6 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
                        query                        | calls 
 ----------------------------------------------------+-------
  SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
- select where $1 IN ($2 /*, ... */::int::text)      |     2
+ select where $1 IN ($2 /*, ... */)                 |     2
 (2 rows)
 
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index c58f34e9f30..8cadfa2ff21 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2817,7 +2817,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 				n_quer_loc = 0, /* Normalized query byte location */
 				last_off = 0,	/* Offset from start for previous tok */
 				last_tok_len = 0;	/* Length (in bytes) of that tok */
-	bool		in_squashed = false;	/* in a run of squashed consts? */
 	int			num_constants_replaced = 0;
 
 	/*
@@ -2832,9 +2831,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 	 * certainly isn't more than 11 bytes, even if n reaches INT_MAX.  We
 	 * could refine that limit based on the max value of n for the current
 	 * query, but it hardly seems worth any extra effort to do so.
-	 *
-	 * Note this also gives enough room for the commented-out ", ..." list
-	 * syntax used by constant squashing.
 	 */
 	norm_query_buflen = query_len + jstate->clocations_count * 10;
 
@@ -2856,63 +2852,22 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 		if (tok_len < 0)
 			continue;			/* ignore any duplicates */
 
+		/* Copy next chunk (what precedes the next constant) */
+		len_to_wrt = off - last_off;
+		len_to_wrt -= last_tok_len;
+		Assert(len_to_wrt >= 0);
+		memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+		n_quer_loc += len_to_wrt;
+
 		/*
-		 * What to do next depends on whether we're squashing constant lists,
-		 * and whether we're already in a run of such constants.
+		 * And insert a param symbol in place of the constant token.
+		 *
+		 * However, If we have a squashable list, insert a comment from the
+		 * second value of the list.
 		 */
-		if (!jstate->clocations[i].squashed)
-		{
-			/*
-			 * This location corresponds to a constant not to be squashed.
-			 * Print what comes before the constant ...
-			 */
-			len_to_wrt = off - last_off;
-			len_to_wrt -= last_tok_len;
-
-			Assert(len_to_wrt >= 0);
-
-			memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
-			n_quer_loc += len_to_wrt;
-
-			/* ... and then a param symbol replacing the constant itself */
-			n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
-								  num_constants_replaced++ + 1 + jstate->highest_extern_param_id);
-
-			/* In case previous constants were merged away, stop doing that */
-			in_squashed = false;
-		}
-		else if (!in_squashed)
-		{
-			/*
-			 * This location is the start position of a run of constants to be
-			 * squashed, so we need to print the representation of starting a
-			 * group of stashed constants.
-			 *
-			 * Print what comes before the constant ...
-			 */
-			len_to_wrt = off - last_off;
-			len_to_wrt -= last_tok_len;
-			Assert(len_to_wrt >= 0);
-			Assert(i + 1 < jstate->clocations_count);
-			Assert(jstate->clocations[i + 1].squashed);
-			memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
-			n_quer_loc += len_to_wrt;
-
-			/* ... and then start a run of squashed constants */
-			n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d /*, ... */",
-								  num_constants_replaced++ + 1 + jstate->highest_extern_param_id);
-
-			/* The next location will match the block below, to end the run */
-			in_squashed = true;
-		}
-		else
-		{
-			/*
-			 * The second location of a run of squashable elements; this
-			 * indicates its end.
-			 */
-			in_squashed = false;
-		}
+		n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d%s",
+							  num_constants_replaced++ + 1 + jstate->highest_extern_param_id,
+							  (jstate->clocations[i].squashed) ? " /*, ... */" : "");
 
 		/* Otherwise the constant is squashed away -- move forward */
 		quer_loc = off + tok_len;
@@ -3005,6 +2960,9 @@ fill_in_constant_lengths(JumbleState *jstate, const char *query,
 
 		Assert(loc >= 0);
 
+		if (locs[i].squashed)
+			continue;			/* squashable list, ignore */
+
 		if (loc <= last_loc)
 			continue;			/* Duplicate constant, ignore */
 
diff --git a/contrib/pg_stat_statements/sql/squashing.sql b/contrib/pg_stat_statements/sql/squashing.sql
index 0aaa893eb1a..aed4e42286c 100644
--- a/contrib/pg_stat_statements/sql/squashing.sql
+++ b/contrib/pg_stat_statements/sql/squashing.sql
@@ -26,6 +26,11 @@ SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) AND data =
 SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) AND data = 2;
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
 
+-- built-in functions will be squashed
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT WHERE 1 IN (1, 2, int4(1), int4(2));
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
 -- Multiple squashed intervals
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
 
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 77659b0f760..17ba3696226 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -1324,7 +1324,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
 			# Node type.  Squash constants if requested.
 			if ($query_jumble_squash)
 			{
-				print $jff "\tJUMBLE_ELEMENTS($f);\n"
+				print $jff "\tJUMBLE_ELEMENTS($f, node);\n"
 				  unless $query_jumble_ignore;
 			}
 			else
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index d1e82a63f09..219023b1173 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -60,10 +60,10 @@ static uint64 DoJumble(JumbleState *jstate, Node *node);
 static void AppendJumble(JumbleState *jstate,
 						 const unsigned char *value, Size size);
 static void FlushPendingNulls(JumbleState *jstate);
-static void RecordConstLocation(JumbleState *jstate,
-								int location, bool squashed);
+static void RecordExpressionLocation(JumbleState *jstate,
+									 int location, int len);
 static void _jumbleNode(JumbleState *jstate, Node *node);
-static void _jumbleElements(JumbleState *jstate, List *elements);
+static void _jumbleElements(JumbleState *jstate, List *elements, Node *node);
 static void _jumbleA_Const(JumbleState *jstate, Node *node);
 static void _jumbleList(JumbleState *jstate, Node *node);
 static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
@@ -381,7 +381,7 @@ FlushPendingNulls(JumbleState *jstate)
  * element contributes nothing to the jumble hash.
  */
 static void
-RecordConstLocation(JumbleState *jstate, int location, bool squashed)
+RecordExpressionLocation(JumbleState *jstate, int location, int len)
 {
 	/* -1 indicates unknown or undefined location */
 	if (location >= 0)
@@ -396,9 +396,15 @@ RecordConstLocation(JumbleState *jstate, int location, bool squashed)
 						 sizeof(LocationLen));
 		}
 		jstate->clocations[jstate->clocations_count].location = location;
-		/* initialize lengths to -1 to simplify third-party module usage */
-		jstate->clocations[jstate->clocations_count].squashed = squashed;
-		jstate->clocations[jstate->clocations_count].length = -1;
+
+		/*
+		 * initialize lengths to -1 to simplify third-party module usage
+		 *
+		 * If we have a length that is greater than -1, this indicates a
+		 * squashable list.
+		 */
+		jstate->clocations[jstate->clocations_count].length = (len > -1) ? len : -1;
+		jstate->clocations[jstate->clocations_count].squashed = (len > -1) ? true : false;
 		jstate->clocations_count++;
 	}
 }
@@ -413,7 +419,7 @@ RecordConstLocation(JumbleState *jstate, int location, bool squashed)
  * - Otherwise test if the expression is a simple Const.
  */
 static bool
-IsSquashableConst(Node *element)
+IsSquashableExpression(Node *element)
 {
 	if (IsA(element, RelabelType))
 		element = (Node *) ((RelabelType *) element)->arg;
@@ -450,6 +456,7 @@ IsSquashableConst(Node *element)
 	return true;
 }
 
+
 /*
  * Subroutine for _jumbleElements: Verify whether the provided list
  * can be squashed, meaning it contains only constant expressions.
@@ -461,7 +468,7 @@ IsSquashableConst(Node *element)
  * expressions.
  */
 static bool
-IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr)
+IsSquashableExpressionList(List *elements)
 {
 	ListCell   *temp;
 
@@ -474,22 +481,19 @@ IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr)
 
 	foreach(temp, elements)
 	{
-		if (!IsSquashableConst(lfirst(temp)))
+		if (!IsSquashableExpression(lfirst(temp)))
 			return false;
 	}
 
-	*firstExpr = linitial(elements);
-	*lastExpr = llast(elements);
-
 	return true;
 }
 
 #define JUMBLE_NODE(item) \
 	_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_ELEMENTS(list) \
-	_jumbleElements(jstate, (List *) expr->list)
+#define JUMBLE_ELEMENTS(list, node) \
+	_jumbleElements(jstate, (List *) expr->list, node)
 #define JUMBLE_LOCATION(location) \
-	RecordConstLocation(jstate, expr->location, false)
+	RecordExpressionLocation(jstate, expr->location, -1)
 #define JUMBLE_FIELD(item) \
 do { \
 	if (sizeof(expr->item) == 8) \
@@ -517,36 +521,36 @@ do { \
 #include "queryjumblefuncs.funcs.c"
 
 /*
- * We jumble lists of constant elements as one individual item regardless
- * of how many elements are in the list.  This means different queries
- * jumble to the same query_id, if the only difference is the number of
- * elements in the list.
+ * We try to jumble lists of expressions as one individual item regardless
+ * of how many elements are in the list. This is know as squashing, which
+ * results in different queries jumbling to the same query_id, if the only
+ * difference is the number of elements in the list.
+ *
+ * We allow constants to be squashed. To normalize such queries, we use
+ * the start and end locations of the list of elements in a list.
  */
 static void
-_jumbleElements(JumbleState *jstate, List *elements)
+_jumbleElements(JumbleState *jstate, List *elements, Node *node)
 {
-	Node	   *first,
-			   *last;
+	bool		normalize_list = false;
 
-	if (IsSquashableConstList(elements, &first, &last))
+	if (IsSquashableExpressionList(elements))
 	{
-		/*
-		 * If this list of elements is squashable, keep track of the location
-		 * of its first and last elements.  When reading back the locations
-		 * array, we'll see two consecutive locations with ->squashed set to
-		 * true, indicating the location of initial and final elements of this
-		 * list.
-		 *
-		 * For the limited set of cases we support now (implicit coerce via
-		 * FuncExpr, Const) it's fine to use exprLocation of the 'last'
-		 * expression, but if more complex composite expressions are to be
-		 * supported (e.g., OpExpr or FuncExpr as an explicit call), more
-		 * sophisticated tracking will be needed.
-		 */
-		RecordConstLocation(jstate, exprLocation(first), true);
-		RecordConstLocation(jstate, exprLocation(last), true);
+		if (IsA(node, ArrayExpr))
+		{
+			ArrayExpr  *aexpr = (ArrayExpr *) node;
+
+			if (aexpr->list_start > 0 && aexpr->list_end > 0)
+			{
+				RecordExpressionLocation(jstate,
+										 aexpr->list_start + 1,
+										 (aexpr->list_end - aexpr->list_start) - 1);
+				normalize_list = true;
+			}
+		}
 	}
-	else
+
+	if (!normalize_list)
 	{
 		_jumbleNode(jstate, (Node *) elements);
 	}
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 0b5652071d1..0cd5f794db3 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -136,6 +136,17 @@ typedef struct KeyActions
 	KeyAction *deleteAction;
 } KeyActions;
 
+/*
+ * Track the start and end of a list in an expression, such as an 'IN' list
+ * or Array Expression
+ */
+typedef struct ListWithBoundary
+{
+	Node	   *expr;
+	ParseLoc	start;
+	ParseLoc	end;
+} ListWithBoundary;
+
 /* ConstraintAttributeSpec yields an integer bitmask of these flags: */
 #define CAS_NOT_DEFERRABLE			0x01
 #define CAS_DEFERRABLE				0x02
@@ -184,7 +195,7 @@ static void doNegateFloat(Float *v);
 static Node *makeAndExpr(Node *lexpr, Node *rexpr, int location);
 static Node *makeOrExpr(Node *lexpr, Node *rexpr, int location);
 static Node *makeNotExpr(Node *expr, int location);
-static Node *makeAArrayExpr(List *elements, int location);
+static Node *makeAArrayExpr(List *elements, int location, int end_location);
 static Node *makeSQLValueFunction(SQLValueFunctionOp op, int32 typmod,
 								  int location);
 static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args,
@@ -269,6 +280,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	struct KeyAction *keyaction;
 	ReturningClause *retclause;
 	ReturningOptionKind retoptionkind;
+	struct ListWithBoundary *listwithboundary;
 }
 
 %type <node>	stmt toplevel_stmt schema_stmt routine_body_stmt
@@ -523,8 +535,9 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <defelt>	def_elem reloption_elem old_aggr_elem operator_def_elem
 %type <node>	def_arg columnElem where_clause where_or_current_clause
 				a_expr b_expr c_expr AexprConst indirection_el opt_slice_bound
-				columnref in_expr having_clause func_table xmltable array_expr
+				columnref having_clause func_table xmltable array_expr
 				OptWhereClause operator_def_arg
+%type <listwithboundary> in_expr
 %type <list>	opt_column_and_period_list
 %type <list>	rowsfrom_item rowsfrom_list opt_col_def_list
 %type <boolean> opt_ordinality opt_without_overlaps
@@ -15289,46 +15302,58 @@ a_expr:		c_expr									{ $$ = $1; }
 				}
 			| a_expr IN_P in_expr
 				{
+					ListWithBoundary *n = $3;
+
 					/* in_expr returns a SubLink or a list of a_exprs */
-					if (IsA($3, SubLink))
+					if (IsA(n->expr, SubLink))
 					{
 						/* generate foo = ANY (subquery) */
-						SubLink	   *n = (SubLink *) $3;
-
-						n->subLinkType = ANY_SUBLINK;
-						n->subLinkId = 0;
-						n->testexpr = $1;
-						n->operName = NIL;		/* show it's IN not = ANY */
-						n->location = @2;
-						$$ = (Node *) n;
+						SubLink	   *n2 = (SubLink *) n->expr;
+
+						n2->subLinkType = ANY_SUBLINK;
+						n2->subLinkId = 0;
+						n2->testexpr = $1;
+						n2->operName = NIL;		/* show it's IN not = ANY */
+						n2->location = @2;
+						$$ = (Node *) n2;
 					}
 					else
 					{
 						/* generate scalar IN expression */
-						$$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "=", $1, $3, @2);
+						A_Expr *n2 = makeSimpleA_Expr(AEXPR_IN, "=", $1, n->expr, @2);
+
+						n2->rexpr_list_start = $3->start;
+						n2->rexpr_list_end = $3->end;
+						$$ = (Node *) n2;
 					}
 				}
 			| a_expr NOT_LA IN_P in_expr						%prec NOT_LA
 				{
+					ListWithBoundary *n = $4;
+
 					/* in_expr returns a SubLink or a list of a_exprs */
-					if (IsA($4, SubLink))
+					if (IsA(n->expr, SubLink))
 					{
 						/* generate NOT (foo = ANY (subquery)) */
 						/* Make an = ANY node */
-						SubLink	   *n = (SubLink *) $4;
+						SubLink	   *n2 = (SubLink *) n->expr;
 
-						n->subLinkType = ANY_SUBLINK;
-						n->subLinkId = 0;
-						n->testexpr = $1;
-						n->operName = NIL;		/* show it's IN not = ANY */
-						n->location = @2;
+						n2->subLinkType = ANY_SUBLINK;
+						n2->subLinkId = 0;
+						n2->testexpr = $1;
+						n2->operName = NIL;		/* show it's IN not = ANY */
+						n2->location = @2;
 						/* Stick a NOT on top; must have same parse location */
-						$$ = makeNotExpr((Node *) n, @2);
+						$$ = makeNotExpr((Node *) n2, @2);
 					}
 					else
 					{
 						/* generate scalar NOT IN expression */
-						$$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "<>", $1, $4, @2);
+						A_Expr *n2 = makeSimpleA_Expr(AEXPR_IN, "<>", $1, n->expr, @2);
+
+						n2->rexpr_list_start = $4->start;
+						n2->rexpr_list_end = $4->end;
+						$$ = (Node *) n2;
 					}
 				}
 			| a_expr subquery_Op sub_type select_with_parens	%prec Op
@@ -16764,15 +16789,15 @@ type_list:	Typename								{ $$ = list_make1($1); }
 
 array_expr: '[' expr_list ']'
 				{
-					$$ = makeAArrayExpr($2, @1);
+					$$ = makeAArrayExpr($2, @1, @3);
 				}
 			| '[' array_expr_list ']'
 				{
-					$$ = makeAArrayExpr($2, @1);
+					$$ = makeAArrayExpr($2, @1, @3);
 				}
 			| '[' ']'
 				{
-					$$ = makeAArrayExpr(NIL, @1);
+					$$ = makeAArrayExpr(NIL, @1, @2);
 				}
 		;
 
@@ -16897,12 +16922,25 @@ trim_list:	a_expr FROM expr_list					{ $$ = lappend($3, $1); }
 in_expr:	select_with_parens
 				{
 					SubLink	   *n = makeNode(SubLink);
+					ListWithBoundary *n2 = palloc(sizeof(ListWithBoundary));
 
 					n->subselect = $1;
 					/* other fields will be filled later */
-					$$ = (Node *) n;
+
+					n2->expr = (Node *) n;
+					n2->start = -1;
+					n2->end = -1;
+					$$ = n2;
+				}
+			| '(' expr_list ')'
+				{
+					ListWithBoundary *n = palloc(sizeof(ListWithBoundary));
+
+					n->expr = (Node *) $2;
+					n->start = @1;
+					n->end = @3;
+					$$ = n;
 				}
-			| '(' expr_list ')'						{ $$ = (Node *) $2; }
 		;
 
 /*
@@ -19300,12 +19338,14 @@ makeNotExpr(Node *expr, int location)
 }
 
 static Node *
-makeAArrayExpr(List *elements, int location)
+makeAArrayExpr(List *elements, int location, int location_end)
 {
 	A_ArrayExpr *n = makeNode(A_ArrayExpr);
 
 	n->elements = elements;
 	n->location = location;
+	n->list_start = location;
+	n->list_end = location_end;
 	return (Node *) n;
 }
 
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 1f8e2d54673..7347c989e11 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -1224,6 +1224,8 @@ transformAExprIn(ParseState *pstate, A_Expr *a)
 			newa->elements = aexprs;
 			newa->multidims = false;
 			newa->location = -1;
+			newa->list_start = a->rexpr_list_start;
+			newa->list_end = a->rexpr_list_end;
 
 			result = (Node *) make_scalar_array_op(pstate,
 												   a->name,
@@ -2166,6 +2168,8 @@ transformArrayExpr(ParseState *pstate, A_ArrayExpr *a,
 	newa->element_typeid = element_type;
 	newa->elements = newcoercedelems;
 	newa->location = a->location;
+	newa->list_start = a->list_start;
+	newa->list_end = a->list_end;
 
 	return (Node *) newa;
 }
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 4610fc61293..2f078887d06 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -347,6 +347,8 @@ typedef struct A_Expr
 	Node	   *lexpr;			/* left argument, or NULL if none */
 	Node	   *rexpr;			/* right argument, or NULL if none */
 	ParseLoc	location;		/* token location, or -1 if unknown */
+	ParseLoc	rexpr_list_start;	/* location of the start of a rexpr list */
+	ParseLoc	rexpr_list_end; /* location of the end of a rexpr list */
 } A_Expr;
 
 /*
@@ -502,6 +504,8 @@ typedef struct A_ArrayExpr
 	NodeTag		type;
 	List	   *elements;		/* array element expressions */
 	ParseLoc	location;		/* token location, or -1 if unknown */
+	ParseLoc	list_start;		/* location of the start of the elements list */
+	ParseLoc	list_end;		/* location of the end of the elements list */
 } A_ArrayExpr;
 
 /*
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 7d3b4198f26..773cdd880aa 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1399,6 +1399,10 @@ typedef struct ArrayExpr
 	bool		multidims pg_node_attr(query_jumble_ignore);
 	/* token location, or -1 if unknown */
 	ParseLoc	location;
+	/* location of the start of the elements list */
+	ParseLoc	list_start;
+	/* location of the end of the elements list */
+	ParseLoc	list_end;
 } ArrayExpr;
 
 /*
-- 
2.39.5 (Apple Git-154)

