Re: Cleaning up array_in()

jian he Sun, 10 Sep 2023 22:26:50 -0700

On Sun, Sep 10, 2023 at 6:00 PM Alexander Lakhin <[email protected]> wrote:
> Case 1:
> SELECT '{1,'::integer[];
> ERROR:  malformed array literal: "{1,"
> LINE 1: SELECT '{1,'::integer[];
>                 ^
> DETAIL:  Unexpected end of input.
>
> vs
>
> ERROR:  malformed array literal: "{1,"
> LINE 1: SELECT '{1,'::integer[];
>                 ^
>
> (no DETAIL)
>
> Case 2:
> SELECT '{{},}'::text[];
> ERROR:  malformed array literal: "{{},}"
> LINE 1: SELECT '{{},}'::text[];
>                 ^
> DETAIL:  Unexpected "}" character
>
> vs
>   text
> ------
>   {}
> (1 row)
>
> Case 3:
> select '{\{}'::text[];
>   text
> -------
>   {"{"}
> (1 row)
>
> vs
>   text
> ------
>   {""}
>
> Best regards,
> Alexander


hi.
Thanks for reviewing it.

> DETAIL:  Unexpected end of input.
In many cases, ending errors will happen, so I consolidate it.

SELECT '{{},}'::text[];
solved by tracking current token type and previous token type.

select '{\{}'::text[];
solved by update dstendptr.

attached.

From d57060acd39a8460eb8ffa2cb448181e2ab24c53 Mon Sep 17 00:00:00 2001
From: Tom Lane <[email protected]>
Date: Tue, 4 Jul 2023 12:39:41 -0400
Subject: [PATCH v6 1/7] Simplify and speed up ReadArrayStr().

ReadArrayStr() seems to have been written on the assumption that
non-rectangular input is fine and it should pad with NULLs anywhere
that elements are missing.  We disallowed non-rectangular input
ages ago (commit 0e13d627b), but never simplified this function
as a follow-up.  In particular, the existing code recomputes each
element's linear location from scratch, which is quite unnecessary
for rectangular input: we can just assign the elements sequentially,
saving lots of arithmetic.  Add some more commentary while at it.

ArrayGetOffset0() is no longer used anywhere, so remove it.
---
 src/backend/utils/adt/arrayfuncs.c | 69 ++++++++++++++----------------
 src/backend/utils/adt/arrayutils.c | 15 -------
 src/include/utils/array.h          |  1 -
 3 files changed, 33 insertions(+), 52 deletions(-)

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index 7828a626..df1ebb47 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -93,7 +93,7 @@ typedef struct ArrayIteratorData
 static int	ArrayCount(const char *str, int *dim, char typdelim,
 					   Node *escontext);
 static bool ReadArrayStr(char *arrayStr, const char *origStr,
-						 int nitems, int ndim, int *dim,
+						 int nitems,
 						 FmgrInfo *inputproc, Oid typioparam, int32 typmod,
 						 char typdelim,
 						 int typlen, bool typbyval, char typalign,
@@ -391,7 +391,7 @@ array_in(PG_FUNCTION_ARGS)
 	dataPtr = (Datum *) palloc(nitems * sizeof(Datum));
 	nullsPtr = (bool *) palloc(nitems * sizeof(bool));
 	if (!ReadArrayStr(p, string,
-					  nitems, ndim, dim,
+					  nitems,
 					  &my_extra->proc, typioparam, typmod,
 					  typdelim,
 					  typlen, typbyval, typalign,
@@ -436,7 +436,8 @@ array_in(PG_FUNCTION_ARGS)
 
 /*
  * ArrayCount
- *	 Determines the dimensions for an array string.
+ *	 Determines the dimensions for an array string.  This includes
+ *	 syntax-checking the array structure decoration (braces and delimiters).
  *
  * Returns number of dimensions as function result.  The axis lengths are
  * returned in dim[], which must be of size MAXDIM.
@@ -683,16 +684,14 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 /*
  * ReadArrayStr :
  *	 parses the array string pointed to by "arrayStr" and converts the values
- *	 to internal format.  Unspecified elements are initialized to nulls.
- *	 The array dimensions must already have been determined.
+ *	 to internal format.  The array dimensions must have been determined,
+ *	 and the case of an empty array must have been handled earlier.
  *
  * Inputs:
  *	arrayStr: the string to parse.
  *			  CAUTION: the contents of "arrayStr" will be modified!
  *	origStr: the unmodified input string, used only in error messages.
  *	nitems: total number of array elements, as already determined.
- *	ndim: number of array dimensions
- *	dim[]: array axis lengths
  *	inputproc: type-specific input procedure for element datatype.
  *	typioparam, typmod: auxiliary values to pass to inputproc.
  *	typdelim: the value delimiter (type-specific).
@@ -717,8 +716,6 @@ static bool
 ReadArrayStr(char *arrayStr,
 			 const char *origStr,
 			 int nitems,
-			 int ndim,
-			 int *dim,
 			 FmgrInfo *inputproc,
 			 Oid typioparam,
 			 int32 typmod,
@@ -732,20 +729,13 @@ ReadArrayStr(char *arrayStr,
 			 int32 *nbytes,
 			 Node *escontext)
 {
-	int			i,
+	int			i = 0,
 				nest_level = 0;
 	char	   *srcptr;
 	bool		in_quotes = false;
 	bool		eoArray = false;
 	bool		hasnull;
 	int32		totbytes;
-	int			indx[MAXDIM] = {0},
-				prod[MAXDIM];
-
-	mda_get_prod(ndim, dim, prod);
-
-	/* Initialize is-null markers to true */
-	memset(nulls, true, nitems * sizeof(bool));
 
 	/*
 	 * We have to remove " and \ characters to create a clean item value to
@@ -768,11 +758,20 @@ ReadArrayStr(char *arrayStr,
 		bool		itemdone = false;
 		bool		leadingspace = true;
 		bool		hasquoting = false;
-		char	   *itemstart;
-		char	   *dstptr;
-		char	   *dstendptr;
+		char	   *itemstart;	/* start of de-escaped text */
+		char	   *dstptr;		/* next output point for de-escaped text */
+		char	   *dstendptr;	/* last significant output char + 1 */
 
-		i = -1;
+		/*
+		 * Parse next array element, collecting the de-escaped text into
+		 * itemstart..dstendptr-1.
+		 *
+		 * Notice that we do not set "itemdone" until we see a separator
+		 * (typdelim character) or the array's final right brace.  Since the
+		 * array is already verified to be nonempty and rectangular, there is
+		 * guaranteed to be another element to be processed in the first case,
+		 * while in the second case of course we'll exit the outer loop.
+		 */
 		itemstart = dstptr = dstendptr = srcptr;
 
 		while (!itemdone)
@@ -819,13 +818,7 @@ ReadArrayStr(char *arrayStr,
 				case '{':
 					if (!in_quotes)
 					{
-						if (nest_level >= ndim)
-							ereturn(escontext, false,
-									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-									 errmsg("malformed array literal: \"%s\"",
-											origStr)));
 						nest_level++;
-						indx[nest_level - 1] = 0;
 						srcptr++;
 					}
 					else
@@ -839,14 +832,9 @@ ReadArrayStr(char *arrayStr,
 									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 									 errmsg("malformed array literal: \"%s\"",
 											origStr)));
-						if (i == -1)
-							i = ArrayGetOffset0(ndim, indx, prod);
-						indx[nest_level - 1] = 0;
 						nest_level--;
 						if (nest_level == 0)
 							eoArray = itemdone = true;
-						else
-							indx[nest_level - 1]++;
 						srcptr++;
 					}
 					else
@@ -857,10 +845,7 @@ ReadArrayStr(char *arrayStr,
 						*dstptr++ = *srcptr++;
 					else if (*srcptr == typdelim)
 					{
-						if (i == -1)
-							i = ArrayGetOffset0(ndim, indx, prod);
 						itemdone = true;
-						indx[ndim - 1]++;
 						srcptr++;
 					}
 					else if (scanner_isspace(*srcptr))
@@ -884,15 +869,18 @@ ReadArrayStr(char *arrayStr,
 			}
 		}
 
+		/* Terminate de-escaped string */
 		Assert(dstptr < srcptr);
 		*dstendptr = '\0';
 
-		if (i < 0 || i >= nitems)
+		/* Safety check that we don't write past the output arrays */
+		if (i >= nitems)
 			ereturn(escontext, false,
 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 					 errmsg("malformed array literal: \"%s\"",
 							origStr)));
 
+		/* Convert the de-escaped string into the next output array entries */
 		if (Array_nulls && !hasquoting &&
 			pg_strcasecmp(itemstart, "NULL") == 0)
 		{
@@ -913,8 +901,17 @@ ReadArrayStr(char *arrayStr,
 				return false;
 			nulls[i] = false;
 		}
+
+		i++;
 	}
 
+	/* Cross-check that we filled all the output array entries */
+	if (i != nitems)
+		ereturn(escontext, false,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("malformed array literal: \"%s\"",
+						origStr)));
+
 	/*
 	 * Check for nulls, compute total data space needed
 	 */
diff --git a/src/backend/utils/adt/arrayutils.c b/src/backend/utils/adt/arrayutils.c
index 62152f79..b8bccc7c 100644
--- a/src/backend/utils/adt/arrayutils.c
+++ b/src/backend/utils/adt/arrayutils.c
@@ -43,21 +43,6 @@ ArrayGetOffset(int n, const int *dim, const int *lb, const int *indx)
 	return offset;
 }
 
-/*
- * Same, but subscripts are assumed 0-based, and use a scale array
- * instead of raw dimension data (see mda_get_prod to create scale array)
- */
-int
-ArrayGetOffset0(int n, const int *tup, const int *scale)
-{
-	int			i,
-				lin = 0;
-
-	for (i = 0; i < n; i++)
-		lin += tup[i] * scale[i];
-	return lin;
-}
-
 /*
  * Convert array dimensions into number of elements
  *
diff --git a/src/include/utils/array.h b/src/include/utils/array.h
index b13dfb34..d49adc38 100644
--- a/src/include/utils/array.h
+++ b/src/include/utils/array.h
@@ -448,7 +448,6 @@ extern void array_free_iterator(ArrayIterator iterator);
  */
 
 extern int	ArrayGetOffset(int n, const int *dim, const int *lb, const int *indx);
-extern int	ArrayGetOffset0(int n, const int *tup, const int *scale);
 extern int	ArrayGetNItems(int ndim, const int *dims);
 extern int	ArrayGetNItemsSafe(int ndim, const int *dims,
 							   struct Node *escontext);
-- 
2.34.1

From 321dcba4c535c30e0b3fb1dc04a699a5f8dd0829 Mon Sep 17 00:00:00 2001
From: Tom Lane <[email protected]>
Date: Tue, 4 Jul 2023 14:03:40 -0400
Subject: [PATCH v6 2/7] Rewrite ArrayCount() to make dimensionality checks
 more complete.

ArrayCount has had a few layers of revision over the years,
reaching a point where it's really quite baroque and unintelligible.
There are actually four different arrays holding dimensions, which
can be reduced to two with little effort.  It was also close to
impossible to figure out why the dimension-counting actually worked
correctly.  Rewrite to make it perhaps a little less opaque.
In particular, a new element or subarray is now counted when we see
the start of the element or subarray, not at some later point where
we might not even be at the same nest_level anymore.

Also, install guards to catch non-rectangularity cases that were
previously missed, along the same lines as recent plperl and plpython
fixes: we were not checking that all scalar elements appear at the
same nesting depth.  Thanks to the very different logic, this seems
not to have caused any of the sort of internal errors that the PLs
suffered from, but it's still wrong.

On the other hand, the behavior of plperl and plpython suggests
that we should allow empty sub-arrays, as in '{{},{}}'; those
languages interpret equivalent constructs such as '[[],[]]' as
an empty (hence zero-dimensional) array, and it seems unclear
why array_in should not.  Remove the hack that was installed to
reject that case.

Also, fix things so that it's possible to write an empty array
with explicit bounds spec, along the lines of '[1:0]={}'.
Before, that was rejected with a complaint about upper bound
less than lower, but I don't see a good reason not to allow it.

In passing, remove ArrayParseState.ARRAY_ELEM_COMPLETED, a state
that the state machine never entered yet wasted cycles checking for.

(This patch looks a bit odd because I tried to avoid reindenting
existing code, despite having removed one level of loop.  That
seemed to me to make it easier to read the code changes.  A
follow-up pgindent run is needed.)
---
 src/backend/utils/adt/arrayfuncs.c   | 247 +++++++++++++++++----------
 src/test/regress/expected/arrays.out |  68 +++++++-
 src/test/regress/sql/arrays.sql      |  13 +-
 3 files changed, 233 insertions(+), 95 deletions(-)

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index df1ebb47..59bb0614 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -58,7 +58,6 @@ typedef enum
 	ARRAY_NO_LEVEL,
 	ARRAY_LEVEL_STARTED,
 	ARRAY_ELEM_STARTED,
-	ARRAY_ELEM_COMPLETED,
 	ARRAY_QUOTED_ELEM_STARTED,
 	ARRAY_QUOTED_ELEM_COMPLETED,
 	ARRAY_ELEM_DELIMITED,
@@ -302,10 +301,18 @@ array_in(PG_FUNCTION_ARGS)
 		*q = '\0';
 		ub = atoi(p);
 		p = q + 1;
-		if (ub < lBound[ndim])
+
+		/* Upper bound of INT_MAX is disallowed, cf ArrayCheckBounds() */
+		if (ub == INT_MAX)
+			ereturn(escontext, (Datum) 0,
+					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+					 errmsg("array upper bound is too large: %d",
+							ub)));
+		/* Now it's safe to compute ub + 1 */
+		if (ub + 1 < lBound[ndim])
 			ereturn(escontext, (Datum) 0,
 					(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
-					 errmsg("upper bound cannot be less than lower bound")));
+					 errmsg("upper bound cannot be less than lower bound minus one")));
 
 		dim[ndim] = ub - lBound[ndim] + 1;
 		ndim++;
@@ -440,7 +447,9 @@ array_in(PG_FUNCTION_ARGS)
  *	 syntax-checking the array structure decoration (braces and delimiters).
  *
  * Returns number of dimensions as function result.  The axis lengths are
- * returned in dim[], which must be of size MAXDIM.
+ * returned in dim[], which must be of size MAXDIM.  This function does not
+ * special-case empty arrays: it will return a positive result with some
+ * zero axis length(s).
  *
  * If we detect an error, fill *escontext with error details and return -1
  * (unless escontext isn't provided, in which case errors will be thrown).
@@ -451,31 +460,22 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 	int			nest_level = 0,
 				i;
 	int			ndim = 1,
-				temp[MAXDIM],
-				nelems[MAXDIM],
-				nelems_last[MAXDIM];
+				nelems[MAXDIM];
+	bool		ndim_frozen = false;
 	bool		in_quotes = false;
 	bool		eoArray = false;
-	bool		empty_array = true;
 	const char *ptr;
 	ArrayParseState parse_state = ARRAY_NO_LEVEL;
 
+	/* Initialize dim[] entries to -1 meaning "unknown" */
 	for (i = 0; i < MAXDIM; ++i)
-	{
-		temp[i] = dim[i] = nelems_last[i] = 0;
-		nelems[i] = 1;
-	}
+		dim[i] = -1;
 
+	/* Scan string until we reach closing brace */
 	ptr = str;
 	while (!eoArray)
 	{
-		bool		itemdone = false;
-
-		while (!itemdone)
-		{
-			if (parse_state == ARRAY_ELEM_STARTED ||
-				parse_state == ARRAY_QUOTED_ELEM_STARTED)
-				empty_array = false;
+		bool		new_element = false;
 
 			switch (*ptr)
 			{
@@ -492,17 +492,25 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 					 * start, or after an element delimiter. In any case we
 					 * now must be past an element start.
 					 */
-					if (parse_state != ARRAY_LEVEL_STARTED &&
-						parse_state != ARRAY_ELEM_STARTED &&
-						parse_state != ARRAY_QUOTED_ELEM_STARTED &&
-						parse_state != ARRAY_ELEM_DELIMITED)
+				switch (parse_state)
+				{
+					case ARRAY_LEVEL_STARTED:
+					case ARRAY_ELEM_DELIMITED:
+						/* start new unquoted element */
+						parse_state = ARRAY_ELEM_STARTED;
+						new_element = true;
+						break;
+					case ARRAY_ELEM_STARTED:
+					case ARRAY_QUOTED_ELEM_STARTED:
+						/* already in element */
+						break;
+					default:
 						ereturn(escontext, -1,
 								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 								 errmsg("malformed array literal: \"%s\"", str),
 								 errdetail("Unexpected \"%c\" character.",
 										   '\\')));
-					if (parse_state != ARRAY_QUOTED_ELEM_STARTED)
-						parse_state = ARRAY_ELEM_STARTED;
+				}
 					/* skip the escaped character */
 					if (*(ptr + 1))
 						ptr++;
@@ -519,18 +527,28 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 					 * element start, or after an element delimiter. In any
 					 * case we now must be past an element start.
 					 */
-					if (parse_state != ARRAY_LEVEL_STARTED &&
-						parse_state != ARRAY_QUOTED_ELEM_STARTED &&
-						parse_state != ARRAY_ELEM_DELIMITED)
-						ereturn(escontext, -1,
-								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-								 errmsg("malformed array literal: \"%s\"", str),
-								 errdetail("Unexpected array element.")));
-					in_quotes = !in_quotes;
-					if (in_quotes)
+				switch (parse_state)
+				{
+					case ARRAY_LEVEL_STARTED:
+					case ARRAY_ELEM_DELIMITED:
+						/* start new quoted element */
+						Assert(!in_quotes);
+						in_quotes = true;
 						parse_state = ARRAY_QUOTED_ELEM_STARTED;
-					else
+						new_element = true;
+						break;
+					case ARRAY_QUOTED_ELEM_STARTED:
+						/* already in element, end it */
+						Assert(in_quotes);
+						in_quotes = false;
 						parse_state = ARRAY_QUOTED_ELEM_COMPLETED;
+						break;
+					default:
+						ereturn(escontext, -1,
+								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+								 errmsg("malformed array literal: \"%s\"", str),
+								 errdetail("Unexpected array element.")));
+				}
 					break;
 				case '{':
 					if (!in_quotes)
@@ -538,27 +556,52 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 						/*
 						 * A left brace can occur if no nesting has occurred
 						 * yet, after a level start, or after a level
-						 * delimiter.
+						 * delimiter.  If we see one after an element
+						 * delimiter, we have something like "{1,{2}}", so
+						 * complain about non-rectangularity.
 						 */
-						if (parse_state != ARRAY_NO_LEVEL &&
-							parse_state != ARRAY_LEVEL_STARTED &&
-							parse_state != ARRAY_LEVEL_DELIMITED)
+					switch (parse_state)
+					{
+						case ARRAY_NO_LEVEL:
+						case ARRAY_LEVEL_STARTED:
+						case ARRAY_LEVEL_DELIMITED:
+							/* okay */
+							break;
+						case ARRAY_ELEM_DELIMITED:
+							ereturn(escontext, -1,
+									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+									 errmsg("malformed array literal: \"%s\"", str),
+									 errdetail("Multidimensional arrays must have sub-arrays with matching dimensions.")));
+						default:
 							ereturn(escontext, -1,
 									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 									 errmsg("malformed array literal: \"%s\"", str),
 									 errdetail("Unexpected \"%c\" character.",
 											   '{')));
+					}
 						parse_state = ARRAY_LEVEL_STARTED;
+					/* Nested sub-arrays count as elements of outer level */
+					if (nest_level > 0)
+						nelems[nest_level - 1]++;
+					/* Initialize element counting in the new level */
 						if (nest_level >= MAXDIM)
 							ereturn(escontext, -1,
 									(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 									 errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
 											nest_level + 1, MAXDIM)));
-						temp[nest_level] = 0;
+					nelems[nest_level] = 0;
 						nest_level++;
 						if (ndim < nest_level)
+					{
+						/* Can't increase ndim once it's frozen */
+						if (ndim_frozen)
+							ereturn(escontext, -1,
+									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+									 errmsg("malformed array literal: \"%s\"", str),
+									 errdetail("Multidimensional arrays must have sub-arrays with matching dimensions.")));
 							ndim = nest_level;
 					}
+				}
 					break;
 				case '}':
 					if (!in_quotes)
@@ -566,46 +609,56 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 						/*
 						 * A right brace can occur after an element start, an
 						 * element completion, a quoted element completion, or
-						 * a level completion.
+						 * a level completion.  We also allow it after a level
+						 * start, that is an empty sub-array "{}" --- but that
+						 * freezes the number of dimensions and all such
+						 * sub-arrays must be at the same level, just like
+						 * sub-arrays containing elements.
 						 */
-						if (parse_state != ARRAY_ELEM_STARTED &&
-							parse_state != ARRAY_ELEM_COMPLETED &&
-							parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
-							parse_state != ARRAY_LEVEL_COMPLETED &&
-							!(nest_level == 1 && parse_state == ARRAY_LEVEL_STARTED))
+					switch (parse_state)
+					{
+						case ARRAY_ELEM_STARTED:
+						case ARRAY_QUOTED_ELEM_COMPLETED:
+						case ARRAY_LEVEL_COMPLETED:
+							/* okay */
+							break;
+						case ARRAY_LEVEL_STARTED:
+							/* empty sub-array: OK if at correct nest_level */
+							ndim_frozen = true;
+							if (nest_level != ndim)
+							ereturn(escontext, -1,
+									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+									 errmsg("malformed array literal: \"%s\"", str),
+										 errdetail("Multidimensional arrays must have sub-arrays with matching dimensions.")));
+							break;
+						default:
 							ereturn(escontext, -1,
 									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 									 errmsg("malformed array literal: \"%s\"", str),
 									 errdetail("Unexpected \"%c\" character.",
 											   '}')));
-						parse_state = ARRAY_LEVEL_COMPLETED;
-						if (nest_level == 0)
-							ereturn(escontext, -1,
-									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-									 errmsg("malformed array literal: \"%s\"", str),
-									 errdetail("Unmatched \"%c\" character.", '}')));
+					}
+					parse_state = ARRAY_LEVEL_COMPLETED;
+					/* The parse state check assured we're in a level. */
+					Assert(nest_level > 0);
 						nest_level--;
 
-						if (nelems_last[nest_level] != 0 &&
-							nelems[nest_level] != nelems_last[nest_level])
+					if (dim[nest_level] < 0)
+					{
+						/* Save length of first sub-array of this level */
+						dim[nest_level] = nelems[nest_level];
+					}
+					else if (nelems[nest_level] != dim[nest_level])
+					{
+						/* Subsequent sub-arrays must have same length */
 							ereturn(escontext, -1,
 									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 									 errmsg("malformed array literal: \"%s\"", str),
-									 errdetail("Multidimensional arrays must have "
-											   "sub-arrays with matching "
-											   "dimensions.")));
-						nelems_last[nest_level] = nelems[nest_level];
-						nelems[nest_level] = 1;
-						if (nest_level == 0)
-							eoArray = itemdone = true;
-						else
-						{
-							/*
-							 * We don't set itemdone here; see comments in
-							 * ReadArrayStr
-							 */
-							temp[nest_level - 1]++;
+								 errdetail("Multidimensional arrays must have sub-arrays with matching dimensions.")));
 						}
+					/* Done if this is the outermost level's '}' */
+					if (nest_level == 0)
+						eoArray = true;
 					}
 					break;
 				default:
@@ -614,12 +667,11 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 						if (*ptr == typdelim)
 						{
 							/*
-							 * Delimiters can occur after an element start, an
-							 * element completion, a quoted element
-							 * completion, or a level completion.
+							 * Delimiters can occur after an element start, a
+							 * quoted element completion, or a level
+							 * completion.
 							 */
 							if (parse_state != ARRAY_ELEM_STARTED &&
-								parse_state != ARRAY_ELEM_COMPLETED &&
 								parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
 								parse_state != ARRAY_LEVEL_COMPLETED)
 								ereturn(escontext, -1,
@@ -631,8 +683,6 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 								parse_state = ARRAY_LEVEL_DELIMITED;
 							else
 								parse_state = ARRAY_ELEM_DELIMITED;
-							itemdone = true;
-							nelems[nest_level - 1]++;
 						}
 						else if (!scanner_isspace(*ptr))
 						{
@@ -641,23 +691,51 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 							 * level start, after an element start, or after
 							 * an element delimiter. In any case we now must
 							 * be past an element start.
+							 *
+							 * If it's a space character, we can ignore it; it
+							 * might be data or not, but it doesn't change the
+							 * parsing state.
 							 */
-							if (parse_state != ARRAY_LEVEL_STARTED &&
-								parse_state != ARRAY_ELEM_STARTED &&
-								parse_state != ARRAY_ELEM_DELIMITED)
+						switch (parse_state)
+						{
+							case ARRAY_LEVEL_STARTED:
+							case ARRAY_ELEM_DELIMITED:
+								/* start new unquoted element */
+								parse_state = ARRAY_ELEM_STARTED;
+								new_element = true;
+								break;
+							case ARRAY_ELEM_STARTED:
+								/* already in element */
+								break;
+							default:
 								ereturn(escontext, -1,
 										(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 										 errmsg("malformed array literal: \"%s\"", str),
 										 errdetail("Unexpected array element.")));
-							parse_state = ARRAY_ELEM_STARTED;
+						}
 						}
 					}
 					break;
 			}
-			if (!itemdone)
-				ptr++;
+
+		/* To reduce duplication, all new-element cases go through here. */
+		if (new_element)
+		{
+			/*
+			 * Once we have found an element, the number of dimensions can no
+			 * longer increase, and subsequent elements must all be at the
+			 * same nesting depth.
+			 */
+			ndim_frozen = true;
+			if (nest_level != ndim)
+				ereturn(escontext, -1,
+						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+						 errmsg("malformed array literal: \"%s\"", str),
+						 errdetail("Multidimensional arrays must have sub-arrays with matching dimensions.")));
+			/* Count the new element */
+			nelems[nest_level - 1]++;
 		}
-		temp[ndim - 1]++;
+
 		ptr++;
 	}
 
@@ -671,13 +749,6 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 					 errdetail("Junk after closing right brace.")));
 	}
 
-	/* special case for an empty array */
-	if (empty_array)
-		return 0;
-
-	for (i = 0; i < ndim; ++i)
-		dim[i] = temp[i];
-
 	return ndim;
 }
 
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 70643914..26d4281d 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -1475,12 +1475,7 @@ select '{{1,{2}},{2,3}}'::text[];
 ERROR:  malformed array literal: "{{1,{2}},{2,3}}"
 LINE 1: select '{{1,{2}},{2,3}}'::text[];
                ^
-DETAIL:  Unexpected "{" character.
-select '{{},{}}'::text[];
-ERROR:  malformed array literal: "{{},{}}"
-LINE 1: select '{{},{}}'::text[];
-               ^
-DETAIL:  Unexpected "}" character.
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
 select E'{{1,2},\\{2,3}}'::text[];
 ERROR:  malformed array literal: "{{1,2},\{2,3}}"
 LINE 1: select E'{{1,2},\\{2,3}}'::text[];
@@ -1501,6 +1496,49 @@ ERROR:  malformed array literal: "{ }}"
 LINE 1: select '{ }}'::text[];
                ^
 DETAIL:  Junk after closing right brace.
+select '{{1},{{2}}}'::text[];
+ERROR:  malformed array literal: "{{1},{{2}}}"
+LINE 1: select '{{1},{{2}}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{{1}},{2}}'::text[];
+ERROR:  malformed array literal: "{{{1}},{2}}"
+LINE 1: select '{{{1}},{2}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{},{{}}}'::text[];
+ERROR:  malformed array literal: "{{},{{}}}"
+LINE 1: select '{{},{{}}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{{}},{}}'::text[];
+ERROR:  malformed array literal: "{{{}},{}}"
+LINE 1: select '{{{}},{}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{1},{}}'::text[];
+ERROR:  malformed array literal: "{{1},{}}"
+LINE 1: select '{{1},{}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{},{1}}'::text[];
+ERROR:  malformed array literal: "{{},{1}}"
+LINE 1: select '{{},{1}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '[2147483646:2147483647]={1,2}'::int[];
+ERROR:  array upper bound is too large: 2147483647
+LINE 1: select '[2147483646:2147483647]={1,2}'::int[];
+               ^
+select '[1:-1]={}'::int[];
+ERROR:  upper bound cannot be less than lower bound minus one
+LINE 1: select '[1:-1]={}'::int[];
+               ^
+select '[1:0]={1}'::int[];
+ERROR:  malformed array literal: "[1:0]={1}"
+LINE 1: select '[1:0]={1}'::int[];
+               ^
+DETAIL:  Specified array dimensions do not match array contents.
 select array[];
 ERROR:  cannot determine type of empty array
 LINE 1: select array[];
@@ -1514,6 +1552,12 @@ select '{}'::text[];
  {}
 (1 row)
 
+select '{{},{}}'::text[];
+ text 
+------
+ {}
+(1 row)
+
 select '{{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}'::text[];
                      text                      
 -----------------------------------------------
@@ -1559,6 +1603,18 @@ select '[0:1]={1.1,2.2}'::float8[];
  [0:1]={1.1,2.2}
 (1 row)
 
+select '[2147483646:2147483646]={1}'::int[];
+            int4             
+-----------------------------
+ [2147483646:2147483646]={1}
+(1 row)
+
+select '[1:0]={}'::int[];
+ int4 
+------
+ {}
+(1 row)
+
 -- all of the above should be accepted
 -- tests for array aggregates
 CREATE TEMP TABLE arraggtest ( f1 INT[], f2 TEXT[][], f3 FLOAT[]);
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index f1375621..b7e2b180 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -454,16 +454,25 @@ select 'foo' ilike all (array['F%', '%O']); -- t
 
 -- none of the following should be accepted
 select '{{1,{2}},{2,3}}'::text[];
-select '{{},{}}'::text[];
 select E'{{1,2},\\{2,3}}'::text[];
 select '{{"1 2" x},{3}}'::text[];
 select '{}}'::text[];
 select '{ }}'::text[];
+select '{{1},{{2}}}'::text[];
+select '{{{1}},{2}}'::text[];
+select '{{},{{}}}'::text[];
+select '{{{}},{}}'::text[];
+select '{{1},{}}'::text[];
+select '{{},{1}}'::text[];
+select '[2147483646:2147483647]={1,2}'::int[];
+select '[1:-1]={}'::int[];
+select '[1:0]={1}'::int[];
 select array[];
 -- none of the above should be accepted
 
 -- all of the following should be accepted
 select '{}'::text[];
+select '{{},{}}'::text[];
 select '{{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}'::text[];
 select '{0 second  ,0 second}'::interval[];
 select '{ { "," } , { 3 } }'::text[];
@@ -474,6 +483,8 @@ select '{
          }'::interval[];
 select array[]::text[];
 select '[0:1]={1.1,2.2}'::float8[];
+select '[2147483646:2147483646]={1}'::int[];
+select '[1:0]={}'::int[];
 -- all of the above should be accepted
 
 -- tests for array aggregates
-- 
2.34.1

From c797c13e4736d37a5ae50b91bc4e6dfbab83ef1b Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <[email protected]>
Date: Sat, 8 Jul 2023 20:37:15 +0300
Subject: [PATCH v6 4/7] Extract loop to read array dimensions to subroutine.

For clarity.

One functional change: this changes from atoi() to strtol() for
parsing the dimensions. We now check the result of strtol(). That
means that bogus dimensions like

    select '[1+-:2]={foo,bar}'::text[];

are no longer accepted.
---
 src/backend/utils/adt/arrayfuncs.c | 229 +++++++++++++++++++----------
 1 file changed, 152 insertions(+), 77 deletions(-)

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index d1b5d48b..7a63db09 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -89,6 +89,9 @@ typedef struct ArrayIteratorData
 	int			current_item;	/* the item # we're at in the array */
 }			ArrayIteratorData;
 
+static bool ReadDimensionInt(char **srcptr, int *result, const char *origStr, Node *escontext);
+static bool ReadArrayDimensions(char **srcptr, int *ndim, int *dim, int *lBound,
+								const char *origStr, Node *escontext);
 static int	ArrayCount(const char *str, int *dim, char typdelim,
 					   Node *escontext);
 static bool ReadArrayStr(char *arrayStr, const char *origStr,
@@ -238,85 +241,10 @@ array_in(PG_FUNCTION_ARGS)
 	 * If the input string starts with dimension info, read and use that.
 	 * Otherwise, we require the input to be in curly-brace style, and we
 	 * prescan the input to determine dimensions.
-	 *
-	 * Dimension info takes the form of one or more [n] or [m:n] items. The
-	 * outer loop iterates once per dimension item.
 	 */
 	p = string_save;
-	ndim = 0;
-	for (;;)
-	{
-		char	   *q;
-		int			ub;
-
-		/*
-		 * Note: we currently allow whitespace between, but not within,
-		 * dimension items.
-		 */
-		while (scanner_isspace(*p))
-			p++;
-		if (*p != '[')
-			break;				/* no more dimension items */
-		p++;
-		if (ndim >= MAXDIM)
-			ereturn(escontext, (Datum) 0,
-					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-					 errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
-							ndim + 1, MAXDIM)));
-
-		for (q = p; isdigit((unsigned char) *q) || (*q == '-') || (*q == '+'); q++)
-			 /* skip */ ;
-		if (q == p)				/* no digits? */
-			ereturn(escontext, (Datum) 0,
-					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-					 errmsg("malformed array literal: \"%s\"", string),
-					 errdetail("\"[\" must introduce explicitly-specified array dimensions.")));
-
-		if (*q == ':')
-		{
-			/* [m:n] format */
-			*q = '\0';
-			lBound[ndim] = atoi(p);
-			p = q + 1;
-			for (q = p; isdigit((unsigned char) *q) || (*q == '-') || (*q == '+'); q++)
-				 /* skip */ ;
-			if (q == p)			/* no digits? */
-				ereturn(escontext, (Datum) 0,
-						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-						 errmsg("malformed array literal: \"%s\"", string),
-						 errdetail("Missing array dimension value.")));
-		}
-		else
-		{
-			/* [n] format */
-			lBound[ndim] = 1;
-		}
-		if (*q != ']')
-			ereturn(escontext, (Datum) 0,
-					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-					 errmsg("malformed array literal: \"%s\"", string),
-					 errdetail("Missing \"%s\" after array dimensions.",
-							   "]")));
-
-		*q = '\0';
-		ub = atoi(p);
-		p = q + 1;
-
-		/* Upper bound of INT_MAX is disallowed, cf ArrayCheckBounds() */
-		if (ub == INT_MAX)
-			ereturn(escontext, (Datum) 0,
-					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-					 errmsg("array upper bound is too large: %d",
-							ub)));
-		/* Now it's safe to compute ub + 1 */
-		if (ub + 1 < lBound[ndim])
-			ereturn(escontext, (Datum) 0,
-					(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
-					 errmsg("upper bound cannot be less than lower bound minus one")));
-
-		dim[ndim] = ub - lBound[ndim] + 1;
-		ndim++;
-	}
+	if (!ReadArrayDimensions(&p, &ndim, dim, lBound, string, escontext))
+		return (Datum) 0;
 
 	if (ndim == 0)
 	{
@@ -441,6 +369,153 @@ array_in(PG_FUNCTION_ARGS)
 	PG_RETURN_ARRAYTYPE_P(retval);
 }
 
+
+/*
+ * ReadArrayDimensions
+ *	 parses the array dimensions part from "src" and converts the values
+ *	 to internal format.
+ *
+ * On entry, *srcptr points to the string to parse. It is advanced to point
+ * after the dimension info.
+ *
+ * *ndim_p, *dim, and *lBound are output variables. They are filled with the
+ * number of dimensions (<= MAXDIM), the length of each dimension, and the
+ * lower bounds of the slices, respectively. If there is no dimension
+ * information, *ndim_p is set to 0.
+ *
+ * 'origStr' is the original input string, used only in error messages.
+ * If *escontext points to an ErrorSaveContext, details of any error are
+ * reported there.
+ *
+ * Result:
+ *	true for success, false for failure (if escontext is provided).
+ *
+ * Note that dim[] and lBound[] are allocated by the caller, and must have
+ * MAXDIM elements.
+ */
+static bool
+ReadArrayDimensions(char **srcptr, int *ndim_p, int *dim, int *lBound,
+					const char *origStr, Node *escontext)
+{
+	char	   *p = *srcptr;
+	int			ndim;
+
+	/*
+	 * Dimension info takes the form of one or more [n] or [m:n] items. The
+	 * loop iterates once per dimension item.
+	 */
+	ndim = 0;
+	for (;;)
+	{
+		char	   *q;
+		int			ub;
+		int			i;
+
+		/*
+		 * Note: we currently allow whitespace between, but not within,
+		 * dimension items.
+		 */
+		while (scanner_isspace(*p))
+			p++;
+		if (*p != '[')
+			break;				/* no more dimension items */
+		p++;
+		if (ndim >= MAXDIM)
+			ereturn(escontext, false,
+					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+					 errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
+							ndim + 1, MAXDIM)));
+
+		q = p;
+		if (!ReadDimensionInt(&p, &i, origStr, escontext))
+			return false;
+		if (p == q)				/* no digits? */
+			ereturn(escontext, false,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("malformed array literal: \"%s\"", origStr),
+					 errdetail("\"[\" must introduce explicitly-specified array dimensions.")));
+
+		if (*p == ':')
+		{
+			/* [m:n] format */
+			lBound[ndim] = i;
+			p++;
+			q = p;
+			if (!ReadDimensionInt(&p, &ub, origStr, escontext))
+				return false;
+			if (p == q)			/* no digits? */
+				ereturn(escontext, false,
+						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+						 errmsg("malformed array literal: \"%s\"", origStr),
+						 errdetail("Missing array dimension value.")));
+		}
+		else
+		{
+			/* [n] format */
+			lBound[ndim] = 1;
+			ub = i;
+		}
+		if (*p != ']')
+			ereturn(escontext, false,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("malformed array literal: \"%s\"", origStr),
+					 errdetail("Missing \"%s\" after array dimensions.",
+							   "]")));
+		p++;
+
+		/* Upper bound of INT_MAX is disallowed, cf ArrayCheckBounds() */
+		if (ub == INT_MAX)
+			ereturn(escontext, false,
+					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+					 errmsg("array upper bound is too large: %d", ub)));
+		/* Now it's safe to compute ub + 1 */
+		if (ub + 1 < lBound[ndim])
+			ereturn(escontext, false,
+					(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+					 errmsg("upper bound cannot be less than lower bound minus one")));
+
+		dim[ndim] = ub - lBound[ndim] + 1;
+		ndim++;
+	}
+
+	*srcptr = p;
+	*ndim_p = ndim;
+	return true;
+}
+
+/*
+ * ReadDimensionInt
+ *	 parse an integer, for the array dimensions
+ *
+ * On entry, *srcptr points to the string to parse. It is advanced past the
+ * digits of the integer. If there are no digits, returns true and leaves
+ * *srcptr unchanged.
+ *
+ * Result:
+ *	true for success, false for failure (if escontext is provided).
+ *	On success, the parsed integer is returned in *result.
+ */
+static bool
+ReadDimensionInt(char **srcptr, int *result, const char *origStr, Node *escontext)
+{
+	char	   *p = *srcptr;
+
+	/* don't accept leading whitespace */
+	if (!isdigit((unsigned char) *p) && *p != '-' && *p != '+')
+	{
+		*result = 0;
+		return true;			/* leave 'src' unmodified */
+	}
+
+	errno = 0;
+	*result = strtol(p, srcptr, 10);
+	if (errno == ERANGE)
+		ereturn(escontext, false,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				 errmsg("array bound is out of range")));
+	return true;
+}
+
 /*
  * ArrayCount
  *	 Determines the dimensions for an array string.  This includes
-- 
2.34.1

From 068eab0780f73926fdea0da1d229346ff4983d97 Mon Sep 17 00:00:00 2001
From: Tom Lane <[email protected]>
Date: Tue, 4 Jul 2023 14:07:31 -0400
Subject: [PATCH v6 3/7] Re-indent ArrayCount().

This cleans up after the removal of the "while (!itemdone)" loop.
---
 src/backend/utils/adt/arrayfuncs.c | 214 ++++++++++++++---------------
 1 file changed, 106 insertions(+), 108 deletions(-)

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index 59bb0614..d1b5d48b 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -477,21 +477,21 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 	{
 		bool		new_element = false;
 
-			switch (*ptr)
-			{
-				case '\0':
-					/* Signal a premature end of the string */
-					ereturn(escontext, -1,
-							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-							 errmsg("malformed array literal: \"%s\"", str),
-							 errdetail("Unexpected end of input.")));
-				case '\\':
+		switch (*ptr)
+		{
+			case '\0':
+				/* Signal a premature end of the string */
+				ereturn(escontext, -1,
+						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+						 errmsg("malformed array literal: \"%s\"", str),
+						 errdetail("Unexpected end of input.")));
+			case '\\':
 
-					/*
-					 * An escape must be after a level start, after an element
-					 * start, or after an element delimiter. In any case we
-					 * now must be past an element start.
-					 */
+				/*
+				 * An escape must be after a level start, after an element
+				 * start, or after an element delimiter. In any case we now
+				 * must be past an element start.
+				 */
 				switch (parse_state)
 				{
 					case ARRAY_LEVEL_STARTED:
@@ -511,22 +511,22 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 								 errdetail("Unexpected \"%c\" character.",
 										   '\\')));
 				}
-					/* skip the escaped character */
-					if (*(ptr + 1))
-						ptr++;
-					else
-						ereturn(escontext, -1,
-								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-								 errmsg("malformed array literal: \"%s\"", str),
-								 errdetail("Unexpected end of input.")));
-					break;
-				case '"':
+				/* skip the escaped character */
+				if (*(ptr + 1))
+					ptr++;
+				else
+					ereturn(escontext, -1,
+							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+							 errmsg("malformed array literal: \"%s\"", str),
+							 errdetail("Unexpected end of input.")));
+				break;
+			case '"':
 
-					/*
-					 * A quote must be after a level start, after a quoted
-					 * element start, or after an element delimiter. In any
-					 * case we now must be past an element start.
-					 */
+				/*
+				 * A quote must be after a level start, after a quoted element
+				 * start, or after an element delimiter. In any case we now
+				 * must be past an element start.
+				 */
 				switch (parse_state)
 				{
 					case ARRAY_LEVEL_STARTED:
@@ -549,17 +549,16 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 								 errmsg("malformed array literal: \"%s\"", str),
 								 errdetail("Unexpected array element.")));
 				}
-					break;
-				case '{':
-					if (!in_quotes)
-					{
-						/*
-						 * A left brace can occur if no nesting has occurred
-						 * yet, after a level start, or after a level
-						 * delimiter.  If we see one after an element
-						 * delimiter, we have something like "{1,{2}}", so
-						 * complain about non-rectangularity.
-						 */
+				break;
+			case '{':
+				if (!in_quotes)
+				{
+					/*
+					 * A left brace can occur if no nesting has occurred yet,
+					 * after a level start, or after a level delimiter.  If we
+					 * see one after an element delimiter, we have something
+					 * like "{1,{2}}", so complain about non-rectangularity.
+					 */
 					switch (parse_state)
 					{
 						case ARRAY_NO_LEVEL:
@@ -579,19 +578,19 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 									 errdetail("Unexpected \"%c\" character.",
 											   '{')));
 					}
-						parse_state = ARRAY_LEVEL_STARTED;
+					parse_state = ARRAY_LEVEL_STARTED;
 					/* Nested sub-arrays count as elements of outer level */
 					if (nest_level > 0)
 						nelems[nest_level - 1]++;
 					/* Initialize element counting in the new level */
-						if (nest_level >= MAXDIM)
-							ereturn(escontext, -1,
-									(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-									 errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
-											nest_level + 1, MAXDIM)));
+					if (nest_level >= MAXDIM)
+						ereturn(escontext, -1,
+								(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+								 errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
+										nest_level + 1, MAXDIM)));
 					nelems[nest_level] = 0;
-						nest_level++;
-						if (ndim < nest_level)
+					nest_level++;
+					if (ndim < nest_level)
 					{
 						/* Can't increase ndim once it's frozen */
 						if (ndim_frozen)
@@ -599,22 +598,22 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 									 errmsg("malformed array literal: \"%s\"", str),
 									 errdetail("Multidimensional arrays must have sub-arrays with matching dimensions.")));
-							ndim = nest_level;
+						ndim = nest_level;
 					}
 				}
-					break;
-				case '}':
-					if (!in_quotes)
-					{
-						/*
-						 * A right brace can occur after an element start, an
-						 * element completion, a quoted element completion, or
-						 * a level completion.  We also allow it after a level
-						 * start, that is an empty sub-array "{}" --- but that
-						 * freezes the number of dimensions and all such
-						 * sub-arrays must be at the same level, just like
-						 * sub-arrays containing elements.
-						 */
+				break;
+			case '}':
+				if (!in_quotes)
+				{
+					/*
+					 * A right brace can occur after an element start, an
+					 * element completion, a quoted element completion, or a
+					 * level completion.  We also allow it after a level
+					 * start, that is an empty sub-array "{}" --- but that
+					 * freezes the number of dimensions and all such
+					 * sub-arrays must be at the same level, just like
+					 * sub-arrays containing elements.
+					 */
 					switch (parse_state)
 					{
 						case ARRAY_ELEM_STARTED:
@@ -626,9 +625,9 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 							/* empty sub-array: OK if at correct nest_level */
 							ndim_frozen = true;
 							if (nest_level != ndim)
-							ereturn(escontext, -1,
-									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-									 errmsg("malformed array literal: \"%s\"", str),
+								ereturn(escontext, -1,
+										(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+										 errmsg("malformed array literal: \"%s\"", str),
 										 errdetail("Multidimensional arrays must have sub-arrays with matching dimensions.")));
 							break;
 						default:
@@ -641,7 +640,7 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 					parse_state = ARRAY_LEVEL_COMPLETED;
 					/* The parse state check assured we're in a level. */
 					Assert(nest_level > 0);
-						nest_level--;
+					nest_level--;
 
 					if (dim[nest_level] < 0)
 					{
@@ -651,51 +650,50 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 					else if (nelems[nest_level] != dim[nest_level])
 					{
 						/* Subsequent sub-arrays must have same length */
-							ereturn(escontext, -1,
-									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-									 errmsg("malformed array literal: \"%s\"", str),
+						ereturn(escontext, -1,
+								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+								 errmsg("malformed array literal: \"%s\"", str),
 								 errdetail("Multidimensional arrays must have sub-arrays with matching dimensions.")));
-						}
+					}
 					/* Done if this is the outermost level's '}' */
 					if (nest_level == 0)
 						eoArray = true;
+				}
+				break;
+			default:
+				if (!in_quotes)
+				{
+					if (*ptr == typdelim)
+					{
+						/*
+						 * Delimiters can occur after an element start, a
+						 * quoted element completion, or a level completion.
+						 */
+						if (parse_state != ARRAY_ELEM_STARTED &&
+							parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
+							parse_state != ARRAY_LEVEL_COMPLETED)
+							ereturn(escontext, -1,
+									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+									 errmsg("malformed array literal: \"%s\"", str),
+									 errdetail("Unexpected \"%c\" character.",
+											   typdelim)));
+						if (parse_state == ARRAY_LEVEL_COMPLETED)
+							parse_state = ARRAY_LEVEL_DELIMITED;
+						else
+							parse_state = ARRAY_ELEM_DELIMITED;
 					}
-					break;
-				default:
-					if (!in_quotes)
+					else if (!scanner_isspace(*ptr))
 					{
-						if (*ptr == typdelim)
-						{
-							/*
-							 * Delimiters can occur after an element start, a
-							 * quoted element completion, or a level
-							 * completion.
-							 */
-							if (parse_state != ARRAY_ELEM_STARTED &&
-								parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
-								parse_state != ARRAY_LEVEL_COMPLETED)
-								ereturn(escontext, -1,
-										(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-										 errmsg("malformed array literal: \"%s\"", str),
-										 errdetail("Unexpected \"%c\" character.",
-												   typdelim)));
-							if (parse_state == ARRAY_LEVEL_COMPLETED)
-								parse_state = ARRAY_LEVEL_DELIMITED;
-							else
-								parse_state = ARRAY_ELEM_DELIMITED;
-						}
-						else if (!scanner_isspace(*ptr))
-						{
-							/*
-							 * Other non-space characters must be after a
-							 * level start, after an element start, or after
-							 * an element delimiter. In any case we now must
-							 * be past an element start.
-							 *
-							 * If it's a space character, we can ignore it; it
-							 * might be data or not, but it doesn't change the
-							 * parsing state.
-							 */
+						/*
+						 * Other non-space characters must be after a level
+						 * start, after an element start, or after an element
+						 * delimiter. In any case we now must be past an
+						 * element start.
+						 *
+						 * If it's a space character, we can ignore it; it
+						 * might be data or not, but it doesn't change the
+						 * parsing state.
+						 */
 						switch (parse_state)
 						{
 							case ARRAY_LEVEL_STARTED:
@@ -713,10 +711,10 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 										 errmsg("malformed array literal: \"%s\"", str),
 										 errdetail("Unexpected array element.")));
 						}
-						}
 					}
-					break;
-			}
+				}
+				break;
+		}
 
 		/* To reduce duplication, all new-element cases go through here. */
 		if (new_element)
-- 
2.34.1

From 2c31568a105fa99460d09e5a01bb2ac6afa0f87d Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <[email protected]>
Date: Sat, 8 Jul 2023 22:19:48 +0300
Subject: [PATCH v6 5/7] Determine array dimensions and parse the elements in
 one pass.

The logic in ArrayCount() and ReadArrayStr() was hard to follow,
because they mixed low-level escaping and quoting with tracking the
curly braces. Introduce a ReadArrayToken() function that tokenizes the
input, handling quoting and escaping.

Instead of having separate ArrayCount() and ReadArrayStr() functions, move
the logic for determining and checking the structure of the dimensions
to ReadArrayStr(). ReadArrayStr() now determines the dimensions and
parses the elements in one pass.

ReadArrayStr() no longer scribbles on the input. Instead, it allocates
a separate scratch buffer that it uses to hold the string representation
of each element. The scratch buffer must be as large as the input string,
so this doesn't save memory, but palloc(strlen(x)) is cheaper than
pstrdup(x).
---
 src/backend/utils/adt/arrayfuncs.c   | 1044 +++++++++++---------------
 src/test/regress/expected/arrays.out |    2 +-
 src/tools/pgindent/typedefs.list     |    2 +-
 3 files changed, 440 insertions(+), 608 deletions(-)

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index 7a63db09..19d7af6d 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -53,18 +53,6 @@ bool		Array_nulls = true;
 			PG_FREE_IF_COPY(array, n); \
 	} while (0)
 
-typedef enum
-{
-	ARRAY_NO_LEVEL,
-	ARRAY_LEVEL_STARTED,
-	ARRAY_ELEM_STARTED,
-	ARRAY_QUOTED_ELEM_STARTED,
-	ARRAY_QUOTED_ELEM_COMPLETED,
-	ARRAY_ELEM_DELIMITED,
-	ARRAY_LEVEL_COMPLETED,
-	ARRAY_LEVEL_DELIMITED
-} ArrayParseState;
-
 /* Working state for array_iterate() */
 typedef struct ArrayIteratorData
 {
@@ -89,18 +77,30 @@ typedef struct ArrayIteratorData
 	int			current_item;	/* the item # we're at in the array */
 }			ArrayIteratorData;
 
+/* ReadArrayToken return type */
+typedef enum
+{
+	ATOK_LEVEL_START,
+	ATOK_LEVEL_END,
+	ATOK_DELIM,
+	ATOK_ELEM,
+	ATOK_ELEM_NULL,
+	ATOK_ERROR,
+} ArrayToken;
+
 static bool ReadDimensionInt(char **srcptr, int *result, const char *origStr, Node *escontext);
 static bool ReadArrayDimensions(char **srcptr, int *ndim, int *dim, int *lBound,
 								const char *origStr, Node *escontext);
-static int	ArrayCount(const char *str, int *dim, char typdelim,
-					   Node *escontext);
-static bool ReadArrayStr(char *arrayStr, const char *origStr,
-						 int nitems,
+static bool ReadArrayStr(char **srcptr,
 						 FmgrInfo *inputproc, Oid typioparam, int32 typmod,
 						 char typdelim,
 						 int typlen, bool typbyval, char typalign,
-						 Datum *values, bool *nulls,
-						 bool *hasnulls, int32 *nbytes, Node *escontext);
+						 int *ndim, int *dim,
+						 int *nitems,
+						 Datum **values, bool **nulls,
+						 const char *origStr, Node *escontext);
+static ArrayToken ReadArrayToken(char **srcptr, char *elembuf, char typdelim,
+								 const char *origStr, Node *escontext);
 static void ReadArrayBinary(StringInfo buf, int nitems,
 							FmgrInfo *receiveproc, Oid typioparam, int32 typmod,
 							int typlen, bool typbyval, char typalign,
@@ -186,12 +186,11 @@ array_in(PG_FUNCTION_ARGS)
 	char		typalign;
 	char		typdelim;
 	Oid			typioparam;
-	char	   *string_save,
-			   *p;
-	int			i,
-				nitems;
-	Datum	   *dataPtr;
-	bool	   *nullsPtr;
+	char	   *p;
+	int			nitems;
+	int			nitems_according_to_dims;
+	Datum	   *values;
+	bool	   *nulls;
 	bool		hasnulls;
 	int32		nbytes;
 	int32		dataoffset;
@@ -234,15 +233,13 @@ array_in(PG_FUNCTION_ARGS)
 	typdelim = my_extra->typdelim;
 	typioparam = my_extra->typioparam;
 
-	/* Make a modifiable copy of the input */
-	string_save = pstrdup(string);
-
 	/*
+	 * Start processing the input string.
+	 *
 	 * If the input string starts with dimension info, read and use that.
-	 * Otherwise, we require the input to be in curly-brace style, and we
-	 * prescan the input to determine dimensions.
+	 * Otherwise, we determine the dimensions from the in curly-braces.
 	 */
-	p = string_save;
+	p = string;
 	if (!ReadArrayDimensions(&p, &ndim, dim, lBound, string, escontext))
 		return (Datum) 0;
 
@@ -254,17 +251,11 @@ array_in(PG_FUNCTION_ARGS)
 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 					 errmsg("malformed array literal: \"%s\"", string),
 					 errdetail("Array value must start with \"{\" or dimension information.")));
-		ndim = ArrayCount(p, dim, typdelim, escontext);
-		if (ndim < 0)
-			PG_RETURN_NULL();
-		for (i = 0; i < ndim; i++)
+		for (int i = 0; i < MAXDIM; i++)
 			lBound[i] = 1;
 	}
 	else
 	{
-		int			ndim_braces,
-					dim_braces[MAXDIM];
-
 		/* If array dimensions are given, expect '=' operator */
 		if (strncmp(p, ASSGN, strlen(ASSGN)) != 0)
 			ereturn(escontext, (Datum) 0,
@@ -276,46 +267,62 @@ array_in(PG_FUNCTION_ARGS)
 		while (scanner_isspace(*p))
 			p++;
 
-		/*
-		 * intuit dimensions from brace structure -- it better match what we
-		 * were given
-		 */
 		if (*p != '{')
 			ereturn(escontext, (Datum) 0,
 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 					 errmsg("malformed array literal: \"%s\"", string),
 					 errdetail("Array contents must start with \"{\".")));
-		ndim_braces = ArrayCount(p, dim_braces, typdelim, escontext);
-		if (ndim_braces < 0)
-			PG_RETURN_NULL();
-		if (ndim_braces != ndim)
-			ereturn(escontext, (Datum) 0,
+	}
+
+	/* Parse the value part, in the curly braces: { ... } */
+	if (!ReadArrayStr(&p,
+					  &my_extra->proc, typioparam, typmod,
+					  typdelim,
+					  typlen, typbyval, typalign,
+					  &ndim,
+					  dim,
+					  &nitems,
+					  &values, &nulls,
+					  string,
+					  escontext))
+		PG_RETURN_NULL();
+
+	/* only whitespace is allowed after the closing brace */
+	while (*p)
+	{
+		if (!scanner_isspace(*p++))
+			ereturn(escontext, false,
 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 					 errmsg("malformed array literal: \"%s\"", string),
-					 errdetail("Specified array dimensions do not match array contents.")));
-		for (i = 0; i < ndim; ++i)
-		{
-			if (dim[i] != dim_braces[i])
-				ereturn(escontext, (Datum) 0,
-						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-						 errmsg("malformed array literal: \"%s\"", string),
-						 errdetail("Specified array dimensions do not match array contents.")));
-		}
+					 errdetail("Junk after closing right brace.")));
 	}
 
 #ifdef ARRAYDEBUG
-	printf("array_in- ndim %d (", ndim);
-	for (i = 0; i < ndim; i++)
 	{
-		printf(" %d", dim[i]);
-	};
-	printf(") for %s\n", string);
+		StringInfoData buf;
+
+		initStringInfo(&buf);
+
+		appendStringInfo(&buf, "array_in- ndim %d (", ndim);
+		for (int i = 0; i < ndim; i++)
+			appendStringInfo(&buf, " %d", dim[i]);
+		appendStringInfo(&buf, ") for %s\n", string);
+		elog(NOTICE, "%s", buf.data);
+		pfree(buf.data);
+	}
 #endif
 
 	/* This checks for overflow of the array dimensions */
-	nitems = ArrayGetNItemsSafe(ndim, dim, escontext);
-	if (nitems < 0)
+
+	/*
+	 * FIXME: Is this still required? I believe all the checks it performs are
+	 * redundant with other checks in ReadArrayDimension() and ReadArrayStr()
+	 */
+	nitems_according_to_dims = ArrayGetNItemsSafe(ndim, dim, escontext);
+	if (nitems_according_to_dims < 0)
 		PG_RETURN_NULL();
+	if (nitems != nitems_according_to_dims)
+		elog(ERROR, "mismatch nitems, %d vs %d", nitems, nitems_according_to_dims);
 	if (!ArrayCheckBoundsSafe(ndim, dim, lBound, escontext))
 		PG_RETURN_NULL();
 
@@ -323,16 +330,30 @@ array_in(PG_FUNCTION_ARGS)
 	if (nitems == 0)
 		PG_RETURN_ARRAYTYPE_P(construct_empty_array(element_type));
 
-	dataPtr = (Datum *) palloc(nitems * sizeof(Datum));
-	nullsPtr = (bool *) palloc(nitems * sizeof(bool));
-	if (!ReadArrayStr(p, string,
-					  nitems,
-					  &my_extra->proc, typioparam, typmod,
-					  typdelim,
-					  typlen, typbyval, typalign,
-					  dataPtr, nullsPtr,
-					  &hasnulls, &nbytes, escontext))
-		PG_RETURN_NULL();
+	/*
+	 * Check for nulls, compute total data space needed
+	 */
+	hasnulls = false;
+	nbytes = 0;
+	for (int i = 0; i < nitems; i++)
+	{
+		if (nulls[i])
+			hasnulls = true;
+		else
+		{
+			/* let's just make sure data is not toasted */
+			if (typlen == -1)
+				values[i] = PointerGetDatum(PG_DETOAST_DATUM(values[i]));
+			nbytes = att_addlength_datum(nbytes, typlen, values[i]);
+			nbytes = att_align_nominal(nbytes, typalign);
+			/* check for overflow of total request */
+			if (!AllocSizeIsValid(nbytes))
+				ereturn(escontext, false,
+						(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+						 errmsg("array size exceeds the maximum allowed (%d)",
+								(int) MaxAllocSize)));
+		}
+	}
 	if (hasnulls)
 	{
 		dataoffset = ARR_OVERHEAD_WITHNULLS(ndim, nitems);
@@ -343,6 +364,10 @@ array_in(PG_FUNCTION_ARGS)
 		dataoffset = 0;			/* marker for no null bitmap */
 		nbytes += ARR_OVERHEAD_NONULLS(ndim);
 	}
+
+	/*
+	 * Construct the final array datum
+	 */
 	retval = (ArrayType *) palloc0(nbytes);
 	SET_VARSIZE(retval, nbytes);
 	retval->ndim = ndim;
@@ -358,18 +383,16 @@ array_in(PG_FUNCTION_ARGS)
 	memcpy(ARR_LBOUND(retval), lBound, ndim * sizeof(int));
 
 	CopyArrayEls(retval,
-				 dataPtr, nullsPtr, nitems,
+				 values, nulls, nitems,
 				 typlen, typbyval, typalign,
 				 true);
 
-	pfree(dataPtr);
-	pfree(nullsPtr);
-	pfree(string_save);
+	pfree(values);
+	pfree(nulls);
 
 	PG_RETURN_ARRAYTYPE_P(retval);
 }
 
-
 /*
  * ReadArrayDimensions
  *	 parses the array dimensions part from "src" and converts the values
@@ -380,8 +403,7 @@ array_in(PG_FUNCTION_ARGS)
  *
  * *ndim_p, *dim, and *lBound are output variables. They are filled with the
  * number of dimensions (<= MAXDIM), the length of each dimension, and the
- * lower bounds of the slices, respectively. If there is no dimension
- * information, *ndim_p is set to 0.
+ * lower bounds of the slices, respectively.
  *
  * 'origStr' is the original input string, used only in error messages.
  * If *escontext points to an ErrorSaveContext, details of any error are
@@ -493,7 +515,7 @@ ReadArrayDimensions(char **srcptr, int *ndim_p, int *dim, int *lBound,
  *
  * Result:
  *	true for success, false for failure (if escontext is provided).
- *	On success, the parsed integer is returned in *result.
+ *  On success, the parsed integer is returned in *result.
  */
 static bool
 ReadDimensionInt(char **srcptr, int *result, const char *origStr, Node *escontext)
@@ -516,350 +538,40 @@ ReadDimensionInt(char **srcptr, int *result, const char *origStr, Node *escontex
 	return true;
 }
 
-/*
- * ArrayCount
- *	 Determines the dimensions for an array string.  This includes
- *	 syntax-checking the array structure decoration (braces and delimiters).
- *
- * Returns number of dimensions as function result.  The axis lengths are
- * returned in dim[], which must be of size MAXDIM.  This function does not
- * special-case empty arrays: it will return a positive result with some
- * zero axis length(s).
- *
- * If we detect an error, fill *escontext with error details and return -1
- * (unless escontext isn't provided, in which case errors will be thrown).
- */
-static int
-ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
-{
-	int			nest_level = 0,
-				i;
-	int			ndim = 1,
-				nelems[MAXDIM];
-	bool		ndim_frozen = false;
-	bool		in_quotes = false;
-	bool		eoArray = false;
-	const char *ptr;
-	ArrayParseState parse_state = ARRAY_NO_LEVEL;
-
-	/* Initialize dim[] entries to -1 meaning "unknown" */
-	for (i = 0; i < MAXDIM; ++i)
-		dim[i] = -1;
-
-	/* Scan string until we reach closing brace */
-	ptr = str;
-	while (!eoArray)
-	{
-		bool		new_element = false;
-
-		switch (*ptr)
-		{
-			case '\0':
-				/* Signal a premature end of the string */
-				ereturn(escontext, -1,
-						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-						 errmsg("malformed array literal: \"%s\"", str),
-						 errdetail("Unexpected end of input.")));
-			case '\\':
-
-				/*
-				 * An escape must be after a level start, after an element
-				 * start, or after an element delimiter. In any case we now
-				 * must be past an element start.
-				 */
-				switch (parse_state)
-				{
-					case ARRAY_LEVEL_STARTED:
-					case ARRAY_ELEM_DELIMITED:
-						/* start new unquoted element */
-						parse_state = ARRAY_ELEM_STARTED;
-						new_element = true;
-						break;
-					case ARRAY_ELEM_STARTED:
-					case ARRAY_QUOTED_ELEM_STARTED:
-						/* already in element */
-						break;
-					default:
-						ereturn(escontext, -1,
-								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-								 errmsg("malformed array literal: \"%s\"", str),
-								 errdetail("Unexpected \"%c\" character.",
-										   '\\')));
-				}
-				/* skip the escaped character */
-				if (*(ptr + 1))
-					ptr++;
-				else
-					ereturn(escontext, -1,
-							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-							 errmsg("malformed array literal: \"%s\"", str),
-							 errdetail("Unexpected end of input.")));
-				break;
-			case '"':
-
-				/*
-				 * A quote must be after a level start, after a quoted element
-				 * start, or after an element delimiter. In any case we now
-				 * must be past an element start.
-				 */
-				switch (parse_state)
-				{
-					case ARRAY_LEVEL_STARTED:
-					case ARRAY_ELEM_DELIMITED:
-						/* start new quoted element */
-						Assert(!in_quotes);
-						in_quotes = true;
-						parse_state = ARRAY_QUOTED_ELEM_STARTED;
-						new_element = true;
-						break;
-					case ARRAY_QUOTED_ELEM_STARTED:
-						/* already in element, end it */
-						Assert(in_quotes);
-						in_quotes = false;
-						parse_state = ARRAY_QUOTED_ELEM_COMPLETED;
-						break;
-					default:
-						ereturn(escontext, -1,
-								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-								 errmsg("malformed array literal: \"%s\"", str),
-								 errdetail("Unexpected array element.")));
-				}
-				break;
-			case '{':
-				if (!in_quotes)
-				{
-					/*
-					 * A left brace can occur if no nesting has occurred yet,
-					 * after a level start, or after a level delimiter.  If we
-					 * see one after an element delimiter, we have something
-					 * like "{1,{2}}", so complain about non-rectangularity.
-					 */
-					switch (parse_state)
-					{
-						case ARRAY_NO_LEVEL:
-						case ARRAY_LEVEL_STARTED:
-						case ARRAY_LEVEL_DELIMITED:
-							/* okay */
-							break;
-						case ARRAY_ELEM_DELIMITED:
-							ereturn(escontext, -1,
-									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-									 errmsg("malformed array literal: \"%s\"", str),
-									 errdetail("Multidimensional arrays must have sub-arrays with matching dimensions.")));
-						default:
-							ereturn(escontext, -1,
-									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-									 errmsg("malformed array literal: \"%s\"", str),
-									 errdetail("Unexpected \"%c\" character.",
-											   '{')));
-					}
-					parse_state = ARRAY_LEVEL_STARTED;
-					/* Nested sub-arrays count as elements of outer level */
-					if (nest_level > 0)
-						nelems[nest_level - 1]++;
-					/* Initialize element counting in the new level */
-					if (nest_level >= MAXDIM)
-						ereturn(escontext, -1,
-								(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-								 errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
-										nest_level + 1, MAXDIM)));
-					nelems[nest_level] = 0;
-					nest_level++;
-					if (ndim < nest_level)
-					{
-						/* Can't increase ndim once it's frozen */
-						if (ndim_frozen)
-							ereturn(escontext, -1,
-									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-									 errmsg("malformed array literal: \"%s\"", str),
-									 errdetail("Multidimensional arrays must have sub-arrays with matching dimensions.")));
-						ndim = nest_level;
-					}
-				}
-				break;
-			case '}':
-				if (!in_quotes)
-				{
-					/*
-					 * A right brace can occur after an element start, an
-					 * element completion, a quoted element completion, or a
-					 * level completion.  We also allow it after a level
-					 * start, that is an empty sub-array "{}" --- but that
-					 * freezes the number of dimensions and all such
-					 * sub-arrays must be at the same level, just like
-					 * sub-arrays containing elements.
-					 */
-					switch (parse_state)
-					{
-						case ARRAY_ELEM_STARTED:
-						case ARRAY_QUOTED_ELEM_COMPLETED:
-						case ARRAY_LEVEL_COMPLETED:
-							/* okay */
-							break;
-						case ARRAY_LEVEL_STARTED:
-							/* empty sub-array: OK if at correct nest_level */
-							ndim_frozen = true;
-							if (nest_level != ndim)
-								ereturn(escontext, -1,
-										(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-										 errmsg("malformed array literal: \"%s\"", str),
-										 errdetail("Multidimensional arrays must have sub-arrays with matching dimensions.")));
-							break;
-						default:
-							ereturn(escontext, -1,
-									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-									 errmsg("malformed array literal: \"%s\"", str),
-									 errdetail("Unexpected \"%c\" character.",
-											   '}')));
-					}
-					parse_state = ARRAY_LEVEL_COMPLETED;
-					/* The parse state check assured we're in a level. */
-					Assert(nest_level > 0);
-					nest_level--;
-
-					if (dim[nest_level] < 0)
-					{
-						/* Save length of first sub-array of this level */
-						dim[nest_level] = nelems[nest_level];
-					}
-					else if (nelems[nest_level] != dim[nest_level])
-					{
-						/* Subsequent sub-arrays must have same length */
-						ereturn(escontext, -1,
-								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-								 errmsg("malformed array literal: \"%s\"", str),
-								 errdetail("Multidimensional arrays must have sub-arrays with matching dimensions.")));
-					}
-					/* Done if this is the outermost level's '}' */
-					if (nest_level == 0)
-						eoArray = true;
-				}
-				break;
-			default:
-				if (!in_quotes)
-				{
-					if (*ptr == typdelim)
-					{
-						/*
-						 * Delimiters can occur after an element start, a
-						 * quoted element completion, or a level completion.
-						 */
-						if (parse_state != ARRAY_ELEM_STARTED &&
-							parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
-							parse_state != ARRAY_LEVEL_COMPLETED)
-							ereturn(escontext, -1,
-									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-									 errmsg("malformed array literal: \"%s\"", str),
-									 errdetail("Unexpected \"%c\" character.",
-											   typdelim)));
-						if (parse_state == ARRAY_LEVEL_COMPLETED)
-							parse_state = ARRAY_LEVEL_DELIMITED;
-						else
-							parse_state = ARRAY_ELEM_DELIMITED;
-					}
-					else if (!scanner_isspace(*ptr))
-					{
-						/*
-						 * Other non-space characters must be after a level
-						 * start, after an element start, or after an element
-						 * delimiter. In any case we now must be past an
-						 * element start.
-						 *
-						 * If it's a space character, we can ignore it; it
-						 * might be data or not, but it doesn't change the
-						 * parsing state.
-						 */
-						switch (parse_state)
-						{
-							case ARRAY_LEVEL_STARTED:
-							case ARRAY_ELEM_DELIMITED:
-								/* start new unquoted element */
-								parse_state = ARRAY_ELEM_STARTED;
-								new_element = true;
-								break;
-							case ARRAY_ELEM_STARTED:
-								/* already in element */
-								break;
-							default:
-								ereturn(escontext, -1,
-										(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-										 errmsg("malformed array literal: \"%s\"", str),
-										 errdetail("Unexpected array element.")));
-						}
-					}
-				}
-				break;
-		}
-
-		/* To reduce duplication, all new-element cases go through here. */
-		if (new_element)
-		{
-			/*
-			 * Once we have found an element, the number of dimensions can no
-			 * longer increase, and subsequent elements must all be at the
-			 * same nesting depth.
-			 */
-			ndim_frozen = true;
-			if (nest_level != ndim)
-				ereturn(escontext, -1,
-						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-						 errmsg("malformed array literal: \"%s\"", str),
-						 errdetail("Multidimensional arrays must have sub-arrays with matching dimensions.")));
-			/* Count the new element */
-			nelems[nest_level - 1]++;
-		}
-
-		ptr++;
-	}
-
-	/* only whitespace is allowed after the closing brace */
-	while (*ptr)
-	{
-		if (!scanner_isspace(*ptr++))
-			ereturn(escontext, -1,
-					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-					 errmsg("malformed array literal: \"%s\"", str),
-					 errdetail("Junk after closing right brace.")));
-	}
-
-	return ndim;
-}
-
 /*
  * ReadArrayStr :
- *	 parses the array string pointed to by "arrayStr" and converts the values
- *	 to internal format.  The array dimensions must have been determined,
- *	 and the case of an empty array must have been handled earlier.
+ *	 parses the array string pointed to by *srcptr and converts the values
+ *	 to internal format.  Determines the array dimensions as it goes.
  *
- * Inputs:
- *	arrayStr: the string to parse.
- *			  CAUTION: the contents of "arrayStr" will be modified!
- *	origStr: the unmodified input string, used only in error messages.
- *	nitems: total number of array elements, as already determined.
+ * On entry, *srcptr points to the string to parse. On successful return,
+ * it is advanced to point past the closing '}'.
+ *
+ * If dimensions were specified explicitly, they are passed in *ndim_p and
+ * dim. This function will check that the array values match the specified
+ * dimensions. If *ndim_p == 0, this function deduces the dimensions from
+ * the structure of the input, and stores them in *ndim_p and the dim array.
+ *
+ * Element type information:
  *	inputproc: type-specific input procedure for element datatype.
  *	typioparam, typmod: auxiliary values to pass to inputproc.
  *	typdelim: the value delimiter (type-specific).
  *	typlen, typbyval, typalign: storage parameters of element datatype.
  *
  * Outputs:
- *	values[]: filled with converted data values.
- *	nulls[]: filled with is-null markers.
- *	*hasnulls: set true iff there are any null elements.
- *	*nbytes: set to total size of data area needed (including alignment
- *		padding but not including array header overhead).
- *	*escontext: if this points to an ErrorSaveContext, details of
- *		any error are reported there.
+ *  *ndim_p, dim: dimensions deduced from the input structure
+ *  *nitems_p: total number of elements
+ *	*values[]: palloc'd array, filled with converted data values.
+ *	*nulls[]: palloc'd array, filled with is-null markers.
+ *
+ * 'origStr' is the original input string, used only in error messages.
+ * If *escontext points to an ErrorSaveContext, details of any error are
+ * reported there.
  *
  * Result:
  *	true for success, false for failure (if escontext is provided).
- *
- * Note that values[] and nulls[] are allocated by the caller, and must have
- * nitems elements.
  */
 static bool
-ReadArrayStr(char *arrayStr,
-			 const char *origStr,
-			 int nitems,
+ReadArrayStr(char **srcptr,
 			 FmgrInfo *inputproc,
 			 Oid typioparam,
 			 int32 typmod,
@@ -867,225 +579,345 @@ ReadArrayStr(char *arrayStr,
 			 int typlen,
 			 bool typbyval,
 			 char typalign,
-			 Datum *values,
-			 bool *nulls,
-			 bool *hasnulls,
-			 int32 *nbytes,
+			 int *ndim_p,
+			 int *dim,
+			 int *nitems_p,
+			 Datum **values_p,
+			 bool **nulls_p,
+			 const char *origStr,
 			 Node *escontext)
 {
-	int			i = 0,
-				nest_level = 0;
-	char	   *srcptr;
-	bool		in_quotes = false;
-	bool		eoArray = false;
-	bool		hasnull;
-	int32		totbytes;
+	int			nest_level;
+	int			maxitems;
+	char	   *elembuf;
+	bool		expect_delim;
+	Datum	   *values;
+	bool	   *nulls;
+	int			nitems;
+
+	int			ndim = *ndim_p;
+	bool		dimensions_specified = ndim != 0;
+	bool		ndim_frozen;
+	int			nelems[MAXDIM];
+
+	/* The caller already checked this */
+	Assert(**srcptr == '{');
+
+	if (!dimensions_specified)
+	{
+		/* Initialize dim[] entries to -1 meaning "unknown" */
+		for (int i = 0; i < MAXDIM; ++i)
+			dim[i] = -1;
+	}
+	ndim_frozen = dimensions_specified;
+
+	maxitems = 16;
+	values = (Datum *) palloc(maxitems * sizeof(Datum));
+	nulls = (bool *) palloc(maxitems * sizeof(bool));
 
 	/*
-	 * We have to remove " and \ characters to create a clean item value to
-	 * pass to the datatype input routine.  We overwrite each item value
-	 * in-place within arrayStr to do this.  srcptr is the current scan point,
-	 * and dstptr is where we are copying to.
+	 * Allocate scratch space to hold (string representation of) one element.
 	 *
-	 * We also want to suppress leading and trailing unquoted whitespace. We
-	 * use the leadingspace flag to suppress leading space.  Trailing space is
-	 * tracked by using dstendptr to point to the last significant output
-	 * character.
+	 * We don't know how large the elements are, so be conservative and
+	 * allocate a buffer as large as the input. The string representation of
+	 * any element in the array cannot be larger than the array string itself.
 	 *
-	 * The error checking in this routine is mostly pro-forma, since we expect
-	 * that ArrayCount() already validated the string.  So we don't bother
-	 * with errdetail messages.
+	 * XXX: For a very large array, it'd probably be better to use an
+	 * expandable StringInfo. That would save memory, and avoid the strlen()
+	 * call too.
 	 */
-	srcptr = arrayStr;
-	while (!eoArray)
+	elembuf = palloc(strlen(*srcptr) + 1);
+
+	expect_delim = false;
+	nest_level = 0;
+	nitems = 0;
+	do
 	{
-		bool		itemdone = false;
-		bool		leadingspace = true;
-		bool		hasquoting = false;
-		char	   *itemstart;	/* start of de-escaped text */
-		char	   *dstptr;		/* next output point for de-escaped text */
-		char	   *dstendptr;	/* last significant output char + 1 */
+		ArrayToken	tok;
 
-		/*
-		 * Parse next array element, collecting the de-escaped text into
-		 * itemstart..dstendptr-1.
-		 *
-		 * Notice that we do not set "itemdone" until we see a separator
-		 * (typdelim character) or the array's final right brace.  Since the
-		 * array is already verified to be nonempty and rectangular, there is
-		 * guaranteed to be another element to be processed in the first case,
-		 * while in the second case of course we'll exit the outer loop.
-		 */
-		itemstart = dstptr = dstendptr = srcptr;
+		tok = ReadArrayToken(srcptr, elembuf, typdelim, origStr, escontext);
 
-		while (!itemdone)
+		switch (tok)
 		{
-			switch (*srcptr)
-			{
-				case '\0':
-					/* Signal a premature end of the string */
-					ereturn(escontext, false,
-							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-							 errmsg("malformed array literal: \"%s\"",
-									origStr)));
-					break;
-				case '\\':
-					/* Skip backslash, copy next character as-is. */
-					srcptr++;
-					if (*srcptr == '\0')
+			case ATOK_LEVEL_START:
+				if (expect_delim)
+					ereturn(escontext, false,
+							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+							 errmsg("malformed array literal: \"%s\"", origStr),
+							 errdetail("Unexpected \"%c\" character.", '{')));
+
+				/* Initialize element counting in the new level */
+				if (nest_level >= MAXDIM)
+					ereturn(escontext, false,
+							(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+							 errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
+									nest_level + 1, MAXDIM)));
+
+				nelems[nest_level] = 0;
+				nest_level++;
+				if (ndim < nest_level)
+				{
+					/* Can't increase ndim once it's frozen */
+					if (ndim_frozen)
+						goto dimension_error;
+					ndim = nest_level;
+				}
+				break;
+			case ATOK_LEVEL_END:
+				if (nest_level == 0)
+					ereturn(escontext, false,
+							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+							 errmsg("malformed array literal: \"%s\"", origStr),
+							 errdetail("Unexpected \"%c\" character.",
+									   '}')));
+				nest_level--;
+				if (nest_level > 0)
+				{
+					/* Nested sub-arrays count as elements of outer level */
+					nelems[nest_level - 1]++;
+					expect_delim = true;
+				}
+				else
+					expect_delim = false;
+
+				if (dim[nest_level] < 0)
+				{
+					/* Save length of first sub-array of this level */
+					dim[nest_level] = nelems[nest_level];
+				}
+				else if (nelems[nest_level] != dim[nest_level])
+				{
+					/* Subsequent sub-arrays must have same length */
+					goto dimension_error;
+				}
+				break;
+
+			case ATOK_DELIM:
+				if (!expect_delim)
+					ereturn(escontext, false,
+							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+							 errmsg("malformed array literal: \"%s\"", origStr),
+							 errdetail("Unexpected \"%c\" character.",
+									   typdelim)));
+				expect_delim = false;
+				break;
+
+			case ATOK_ELEM:
+			case ATOK_ELEM_NULL:
+				if (expect_delim)
+					ereturn(escontext, false,
+							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+							 errmsg("malformed array literal: \"%s\"", origStr),
+							 errdetail("Unexpected array element.")));
+
+				/* enlarge the values/nulls arrays if needed */
+				if (nitems == maxitems)
+				{
+#define MaxArraySize ((size_t) (MaxAllocSize / sizeof(Datum)))
+					if (maxitems == MaxArraySize)
 						ereturn(escontext, false,
-								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-								 errmsg("malformed array literal: \"%s\"",
-										origStr)));
-					*dstptr++ = *srcptr++;
-					/* Treat the escaped character as non-whitespace */
-					leadingspace = false;
-					dstendptr = dstptr;
-					hasquoting = true;	/* can't be a NULL marker */
-					break;
-				case '"':
-					in_quotes = !in_quotes;
-					if (in_quotes)
-						leadingspace = false;
-					else
-					{
-						/*
-						 * Advance dstendptr when we exit in_quotes; this
-						 * saves having to do it in all the other in_quotes
-						 * cases.
-						 */
-						dstendptr = dstptr;
-					}
-					hasquoting = true;	/* can't be a NULL marker */
-					srcptr++;
-					break;
-				case '{':
-					if (!in_quotes)
-					{
-						nest_level++;
-						srcptr++;
-					}
-					else
-						*dstptr++ = *srcptr++;
-					break;
-				case '}':
-					if (!in_quotes)
-					{
-						if (nest_level == 0)
-							ereturn(escontext, false,
-									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-									 errmsg("malformed array literal: \"%s\"",
-											origStr)));
-						nest_level--;
-						if (nest_level == 0)
-							eoArray = itemdone = true;
-						srcptr++;
-					}
-					else
-						*dstptr++ = *srcptr++;
-					break;
-				default:
-					if (in_quotes)
-						*dstptr++ = *srcptr++;
-					else if (*srcptr == typdelim)
-					{
-						itemdone = true;
-						srcptr++;
-					}
-					else if (scanner_isspace(*srcptr))
-					{
-						/*
-						 * If leading space, drop it immediately.  Else, copy
-						 * but don't advance dstendptr.
-						 */
-						if (leadingspace)
-							srcptr++;
-						else
-							*dstptr++ = *srcptr++;
-					}
-					else
-					{
-						*dstptr++ = *srcptr++;
-						leadingspace = false;
-						dstendptr = dstptr;
-					}
-					break;
-			}
-		}
+								(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+								 errmsg("array size exceeds the maximum allowed (%d)",
+										(int) MaxArraySize)));
+					maxitems = Min(maxitems * 2, MaxArraySize);
+					values = (Datum *) repalloc(values, maxitems * sizeof(Datum));
+					nulls = (bool *) repalloc(nulls, maxitems * sizeof(bool));
+				}
 
-		/* Terminate de-escaped string */
-		Assert(dstptr < srcptr);
-		*dstendptr = '\0';
+				if (!InputFunctionCallSafe(inputproc, tok == ATOK_ELEM_NULL ? NULL : elembuf,
+										   typioparam, typmod,
+										   escontext,
+										   &values[nitems]))
+					return false;
+				nulls[nitems] = tok == ATOK_ELEM_NULL;
+				nitems++;
 
-		/* Safety check that we don't write past the output arrays */
-		if (i >= nitems)
-			ereturn(escontext, false,
-					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-					 errmsg("malformed array literal: \"%s\"",
-							origStr)));
+				/*
+				 * Once we have found an element, the number of dimensions can
+				 * no longer increase, and subsequent elements must all be at
+				 * the same nesting depth.
+				 */
+				ndim_frozen = true;
+				if (nest_level != ndim)
+					goto dimension_error;
+				/* Count the new element */
+				nelems[nest_level - 1]++;
+				expect_delim = true;
+				break;
 
-		/* Convert the de-escaped string into the next output array entries */
-		if (Array_nulls && !hasquoting &&
-			pg_strcasecmp(itemstart, "NULL") == 0)
-		{
-			/* it's a NULL item */
-			if (!InputFunctionCallSafe(inputproc, NULL,
-									   typioparam, typmod,
-									   escontext,
-									   &values[i]))
-				return false;
-			nulls[i] = true;
-		}
-		else
-		{
-			if (!InputFunctionCallSafe(inputproc, itemstart,
-									   typioparam, typmod,
-									   escontext,
-									   &values[i]))
+			case ATOK_ERROR:
 				return false;
-			nulls[i] = false;
 		}
+	} while (nest_level > 0);
 
-		i++;
-	}
+	pfree(elembuf);
 
-	/* Cross-check that we filled all the output array entries */
-	if (i != nitems)
-		ereturn(escontext, false,
-				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-				 errmsg("malformed array literal: \"%s\"",
-						origStr)));
-
-	/*
-	 * Check for nulls, compute total data space needed
-	 */
-	hasnull = false;
-	totbytes = 0;
-	for (i = 0; i < nitems; i++)
-	{
-		if (nulls[i])
-			hasnull = true;
-		else
-		{
-			/* let's just make sure data is not toasted */
-			if (typlen == -1)
-				values[i] = PointerGetDatum(PG_DETOAST_DATUM(values[i]));
-			totbytes = att_addlength_datum(totbytes, typlen, values[i]);
-			totbytes = att_align_nominal(totbytes, typalign);
-			/* check for overflow of total request */
-			if (!AllocSizeIsValid(totbytes))
-				ereturn(escontext, false,
-						(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-						 errmsg("array size exceeds the maximum allowed (%d)",
-								(int) MaxAllocSize)));
-		}
-	}
-	*hasnulls = hasnull;
-	*nbytes = totbytes;
+	*ndim_p = ndim;
+	*values_p = values;
+	*nulls_p = nulls;
+	*nitems_p = nitems;
 	return true;
+
+dimension_error:
+	if (dimensions_specified)
+		errsave(escontext,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("malformed array literal: \"%s\"", origStr),
+				 errdetail("Specified array dimensions do not match array contents.")));
+	else
+		errsave(escontext,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("malformed array literal: \"%s\"", origStr),
+				 errdetail("Multidimensional arrays must have sub-arrays with matching dimensions.")));
+	return false;
 }
 
 
+/*
+ * ReadArrayToken
+ *	 read one token from an array value string
+ *
+ * Starts scanning from *srcptr. One return, *srcptr is updated past the the
+ * token.
+ *
+ * If the token is ATOK_ELEM, the token string is copied to *elembuf. The
+ * caller is responsible for allocating a large enough buffer!
+ */
+static ArrayToken
+ReadArrayToken(char **srcptr, char *elembuf, char typdelim,
+			   const char *origStr, Node *escontext)
+{
+	char	   *p = *srcptr;
+	char	   *dst = elembuf;
+	char	   *dstendptr;
+	bool		has_escapes;
+
+	for (;;)
+	{
+		switch (*p)
+		{
+			case '\0':
+				errsave(escontext,
+						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+						 errmsg("malformed array literal: \"%s\"",
+								origStr)));
+				*srcptr = p;
+				return ATOK_ERROR;
+			case '{':
+				*srcptr = p + 1;
+				return ATOK_LEVEL_START;
+			case '}':
+				*srcptr = p + 1;
+				return ATOK_LEVEL_END;
+			case '"':
+				/* quoted element */
+				p++;
+				goto quoted_element;
+			default:
+				if (*p == typdelim)
+				{
+					*srcptr = p + 1;
+					return ATOK_DELIM;
+				}
+				if (scanner_isspace(*p))
+				{
+					p++;
+					continue;
+				}
+				goto unquoted_element;
+		}
+	}
+
+quoted_element:
+	for (;;)
+	{
+		switch (*p)
+		{
+			case '\0':
+				errsave(escontext,
+						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+						 errmsg("malformed array literal: \"%s\"",
+								origStr)));
+				*srcptr = p;
+				return ATOK_ERROR;
+			case '\\':
+				/* Skip backslash, copy next character as-is. */
+				p++;
+				if (*p == '\0')
+				{
+					errsave(escontext,
+							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+							 errmsg("malformed array literal: \"%s\"",
+									origStr)));
+					*srcptr = p;
+					return ATOK_ERROR;
+				}
+				*dst++ = *p++;
+				break;
+			case '"':
+				*dst++ = '\0';
+				*srcptr = p + 1;
+				return ATOK_ELEM;
+			default:
+				*dst++ = *p++;
+		}
+	}
+
+unquoted_element:
+	dstendptr = dst;
+	has_escapes = false;
+	for (;;)
+	{
+		switch (*p)
+		{
+			case '\0':
+			case '{':
+				errsave(escontext,
+						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+						 errmsg("malformed array literal: \"%s\"",
+								origStr)));
+				*srcptr = p;
+				return ATOK_ERROR;
+			case '\\':
+				/* Skip backslash, copy next character as-is. */
+				p++;
+				if (*p == '\0')
+				{
+					errsave(escontext,
+							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+							 errmsg("malformed array literal: \"%s\"",
+									origStr)));
+					*srcptr = p;
+					return ATOK_ERROR;
+				}
+				*dst++ = *p++;
+				break;
+			case '"':
+				errsave(escontext,
+						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+						 errmsg("malformed array literal: \"%s\"",
+								origStr)));
+				*srcptr = p;
+				return ATOK_ERROR;
+
+			default:
+				if (*p == typdelim || *p == '}')
+				{
+					*dstendptr = '\0';
+					*srcptr = p;
+					if (Array_nulls && !has_escapes && pg_strcasecmp(elembuf, "NULL") == 0)
+						return ATOK_ELEM_NULL;
+					else
+						return ATOK_ELEM;
+				}
+				*dst++ = *p;
+				if (!scanner_isspace(*p))
+					dstendptr = dst;
+				p++;
+		}
+	}
+}
+
 /*
  * Copy data into an array object from a temporary array of Datums.
  *
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 26d4281d..93e11068 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -1480,7 +1480,7 @@ select E'{{1,2},\\{2,3}}'::text[];
 ERROR:  malformed array literal: "{{1,2},\{2,3}}"
 LINE 1: select E'{{1,2},\\{2,3}}'::text[];
                ^
-DETAIL:  Unexpected "\" character.
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
 select '{{"1 2" x},{3}}'::text[];
 ERROR:  malformed array literal: "{{"1 2" x},{3}}"
 LINE 1: select '{{"1 2" x},{3}}'::text[];
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index f2af84d7..0610cdbf 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -148,8 +148,8 @@ ArrayIOData
 ArrayIterator
 ArrayMapState
 ArrayMetaState
-ArrayParseState
 ArraySubWorkspace
+ArrayToken
 ArrayType
 AsyncQueueControl
 AsyncQueueEntry
-- 
2.34.1

From 520749d09afcf34502a7a81ec06102be85ca03d4 Mon Sep 17 00:00:00 2001
From: pgaddict <[email protected]>
Date: Mon, 11 Sep 2023 12:17:55 +0800
Subject: [PATCH v6 6/7] refactor ReadDimensionInt.

in array_in, if bound info explicily mentioned, then check and validdate bound info
of a array.
in array_in initialize variable dim and lBound. we need use the variable defaults valus in function ReadArrayDimensions and ReadArrayStr.
---
 src/backend/utils/adt/arrayfuncs.c   | 51 ++++++++++++----------------
 src/test/regress/expected/arrays.out |  6 ++++
 src/test/regress/sql/arrays.sql      |  1 +
 3 files changed, 28 insertions(+), 30 deletions(-)

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index 19d7af6d..79f4e1b4 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -188,7 +188,6 @@ array_in(PG_FUNCTION_ARGS)
 	Oid			typioparam;
 	char	   *p;
 	int			nitems;
-	int			nitems_according_to_dims;
 	Datum	   *values;
 	bool	   *nulls;
 	bool		hasnulls;
@@ -232,7 +231,12 @@ array_in(PG_FUNCTION_ARGS)
 	typalign = my_extra->typalign;
 	typdelim = my_extra->typdelim;
 	typioparam = my_extra->typioparam;
-
+	/* initialize dim, lBound. useful for ReadArrayDimensions ReadArrayStr */
+	for (int i = 0; i < MAXDIM; i++)
+	{
+		dim[i] = -1;
+		lBound[i] = 1;
+	}
 	/*
 	 * Start processing the input string.
 	 *
@@ -245,14 +249,12 @@ array_in(PG_FUNCTION_ARGS)
 
 	if (ndim == 0)
 	{
-		/* No array dimensions, so intuit dimensions from brace structure */
+		/* No array dimensions, so first literal character should be oepn curl-braces */
 		if (*p != '{')
 			ereturn(escontext, (Datum) 0,
 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 					 errmsg("malformed array literal: \"%s\"", string),
 					 errdetail("Array value must start with \"{\" or dimension information.")));
-		for (int i = 0; i < MAXDIM; i++)
-			lBound[i] = 1;
 	}
 	else
 	{
@@ -304,28 +306,17 @@ array_in(PG_FUNCTION_ARGS)
 		initStringInfo(&buf);
 
 		appendStringInfo(&buf, "array_in- ndim %d (", ndim);
-		for (int i = 0; i < ndim; i++)
+		for (int i = 0; i < MAXDIM; i++)
 			appendStringInfo(&buf, " %d", dim[i]);
+		appendStringInfo(&buf, "lBound info");
+		for (int i = 0; i < MAXDIM; i++)
+			appendStringInfo(&buf, " %d", lBound[i]);
 		appendStringInfo(&buf, ") for %s\n", string);
 		elog(NOTICE, "%s", buf.data);
 		pfree(buf.data);
 	}
 #endif
 
-	/* This checks for overflow of the array dimensions */
-
-	/*
-	 * FIXME: Is this still required? I believe all the checks it performs are
-	 * redundant with other checks in ReadArrayDimension() and ReadArrayStr()
-	 */
-	nitems_according_to_dims = ArrayGetNItemsSafe(ndim, dim, escontext);
-	if (nitems_according_to_dims < 0)
-		PG_RETURN_NULL();
-	if (nitems != nitems_according_to_dims)
-		elog(ERROR, "mismatch nitems, %d vs %d", nitems, nitems_according_to_dims);
-	if (!ArrayCheckBoundsSafe(ndim, dim, lBound, escontext))
-		PG_RETURN_NULL();
-
 	/* Empty array? */
 	if (nitems == 0)
 		PG_RETURN_ARRAYTYPE_P(construct_empty_array(element_type));
@@ -485,8 +476,8 @@ ReadArrayDimensions(char **srcptr, int *ndim_p, int *dim, int *lBound,
 							   "]")));
 		p++;
 
-		/* Upper bound of INT_MAX is disallowed, cf ArrayCheckBounds() */
-		if (ub == INT_MAX)
+		/* Upper bound of PG_INT32_MAX is disallowed, cf ArrayCheckBounds() */
+		if (ub == PG_INT32_MAX)
 			ereturn(escontext, false,
 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 					 errmsg("array upper bound is too large: %d", ub)));
@@ -521,6 +512,7 @@ static bool
 ReadDimensionInt(char **srcptr, int *result, const char *origStr, Node *escontext)
 {
 	char	   *p = *srcptr;
+	long	l;
 
 	/* don't accept leading whitespace */
 	if (!isdigit((unsigned char) *p) && *p != '-' && *p != '+')
@@ -530,11 +522,16 @@ ReadDimensionInt(char **srcptr, int *result, const char *origStr, Node *escontex
 	}
 
 	errno = 0;
-	*result = strtol(p, srcptr, 10);
-	if (errno == ERANGE)
+	l = strtol(p, srcptr, 10);
+
+	if (errno == ERANGE || l > PG_INT32_MAX || l < PG_INT32_MIN)
+	{
 		ereturn(escontext, false,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 				 errmsg("array bound is out of range")));
+	}
+
+	*result = l;
 	return true;
 }
 
@@ -603,12 +600,6 @@ ReadArrayStr(char **srcptr,
 	/* The caller already checked this */
 	Assert(**srcptr == '{');
 
-	if (!dimensions_specified)
-	{
-		/* Initialize dim[] entries to -1 meaning "unknown" */
-		for (int i = 0; i < MAXDIM; ++i)
-			dim[i] = -1;
-	}
 	ndim_frozen = dimensions_specified;
 
 	maxitems = 16;
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 93e11068..bea0d00b 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -1615,6 +1615,12 @@ select '[1:0]={}'::int[];
  {}
 (1 row)
 
+select '[-2147483648:-2147483647]={1,2}'::int[];
+              int4               
+---------------------------------
+ [-2147483648:-2147483647]={1,2}
+(1 row)
+
 -- all of the above should be accepted
 -- tests for array aggregates
 CREATE TEMP TABLE arraggtest ( f1 INT[], f2 TEXT[][], f3 FLOAT[]);
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index b7e2b180..5eacb351 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -485,6 +485,7 @@ select array[]::text[];
 select '[0:1]={1.1,2.2}'::float8[];
 select '[2147483646:2147483646]={1}'::int[];
 select '[1:0]={}'::int[];
+select '[-2147483648:-2147483647]={1,2}'::int[];
 -- all of the above should be accepted
 
 -- tests for array aggregates
-- 
2.34.1

From 827f6d75a97a065374a368f5ba772f14c53b72b1 Mon Sep 17 00:00:00 2001
From: pgaddict <[email protected]>
Date: Mon, 11 Sep 2023 12:31:39 +0800
Subject: [PATCH v6 7/7] refactor ReadArrayStr.

array ending error can happen in any of cases(quoted_element, unquoted_element, with backslash, non-backslash).
conslidate it, using a goto ending_errror.
add more tests.
expect_delim state is not enought for case like '{{},}'. You need tracking
current token and previous token to solve cases where a delimiter followed by
a closing brace.
---
 src/backend/utils/adt/arrayfuncs.c   | 55 ++++++++++------------------
 src/test/regress/expected/arrays.out | 28 ++++++++++++++
 src/test/regress/sql/arrays.sql      |  7 ++++
 3 files changed, 54 insertions(+), 36 deletions(-)

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index 79f4e1b4..b8f5d59c 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -625,7 +625,7 @@ ReadArrayStr(char **srcptr,
 	do
 	{
 		ArrayToken	tok;
-
+		ArrayToken	prev_tok;
 		tok = ReadArrayToken(srcptr, elembuf, typdelim, origStr, escontext);
 
 		switch (tok)
@@ -655,7 +655,8 @@ ReadArrayStr(char **srcptr,
 				}
 				break;
 			case ATOK_LEVEL_END:
-				if (nest_level == 0)
+				/* cannot precede with a delim. nest_level should larger than 0 */
+				if (prev_tok == ATOK_DELIM || nest_level == 0)
 					ereturn(escontext, false,
 							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 							 errmsg("malformed array literal: \"%s\"", origStr),
@@ -739,6 +740,7 @@ ReadArrayStr(char **srcptr,
 			case ATOK_ERROR:
 				return false;
 		}
+		prev_tok = tok;
 	} while (nest_level > 0);
 
 	pfree(elembuf);
@@ -788,12 +790,7 @@ ReadArrayToken(char **srcptr, char *elembuf, char typdelim,
 		switch (*p)
 		{
 			case '\0':
-				errsave(escontext,
-						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-						 errmsg("malformed array literal: \"%s\"",
-								origStr)));
-				*srcptr = p;
-				return ATOK_ERROR;
+				goto ending_error;
 			case '{':
 				*srcptr = p + 1;
 				return ATOK_LEVEL_START;
@@ -825,24 +822,12 @@ quoted_element:
 		switch (*p)
 		{
 			case '\0':
-				errsave(escontext,
-						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-						 errmsg("malformed array literal: \"%s\"",
-								origStr)));
-				*srcptr = p;
-				return ATOK_ERROR;
+				goto ending_error;
 			case '\\':
 				/* Skip backslash, copy next character as-is. */
 				p++;
 				if (*p == '\0')
-				{
-					errsave(escontext,
-							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-							 errmsg("malformed array literal: \"%s\"",
-									origStr)));
-					*srcptr = p;
-					return ATOK_ERROR;
-				}
+					goto ending_error;
 				*dst++ = *p++;
 				break;
 			case '"':
@@ -863,25 +848,14 @@ unquoted_element:
 		{
 			case '\0':
 			case '{':
-				errsave(escontext,
-						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-						 errmsg("malformed array literal: \"%s\"",
-								origStr)));
-				*srcptr = p;
-				return ATOK_ERROR;
+				goto ending_error;
 			case '\\':
 				/* Skip backslash, copy next character as-is. */
 				p++;
 				if (*p == '\0')
-				{
-					errsave(escontext,
-							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-							 errmsg("malformed array literal: \"%s\"",
-									origStr)));
-					*srcptr = p;
-					return ATOK_ERROR;
-				}
+					goto ending_error;
 				*dst++ = *p++;
+				dstendptr = dst;
 				break;
 			case '"':
 				errsave(escontext,
@@ -907,6 +881,15 @@ unquoted_element:
 				p++;
 		}
 	}
+
+ending_error:
+	errsave(escontext,
+			(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				errmsg("malformed array literal: \"%s\"",
+					origStr),
+				errdetail("Unexpected end of input.")));
+	*srcptr = p;
+	return ATOK_ERROR;
 }
 
 /*
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index bea0d00b..9bb9924f 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -1544,6 +1544,34 @@ ERROR:  cannot determine type of empty array
 LINE 1: select array[];
                ^
 HINT:  Explicitly cast to the desired type, for example ARRAY[]::integer[].
+select '{{1,},{1},}'::text[];
+ERROR:  malformed array literal: "{{1,},{1},}"
+LINE 1: select '{{1,},{1},}'::text[];
+               ^
+DETAIL:  Unexpected "}" character.
+select '{{1,},{1}}'::text[];
+ERROR:  malformed array literal: "{{1,},{1}}"
+LINE 1: select '{{1,},{1}}'::text[];
+               ^
+DETAIL:  Unexpected "}" character.
+select '{{1,}}'::text[];
+ERROR:  malformed array literal: "{{1,}}"
+LINE 1: select '{{1,}}'::text[];
+               ^
+DETAIL:  Unexpected "}" character.
+select '{1,}'::text[];
+ERROR:  malformed array literal: "{1,}"
+LINE 1: select '{1,}'::text[];
+               ^
+DETAIL:  Unexpected "}" character.
+select '[21474836488:21474836489]={1,2}'::int[];
+ERROR:  array bound is out of range
+LINE 1: select '[21474836488:21474836489]={1,2}'::int[];
+               ^
+select '[-2147483649:-2147483648]={1,2}'::int[];
+ERROR:  array bound is out of range
+LINE 1: select '[-2147483649:-2147483648]={1,2}'::int[];
+               ^
 -- none of the above should be accepted
 -- all of the following should be accepted
 select '{}'::text[];
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 5eacb351..6bf18b7d 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -468,6 +468,13 @@ select '[2147483646:2147483647]={1,2}'::int[];
 select '[1:-1]={}'::int[];
 select '[1:0]={1}'::int[];
 select array[];
+select '{{1,},{1},}'::text[];
+select '{{1,},{1}}'::text[];
+select '{{1,}}'::text[];
+select '{1,}'::text[];
+select '[21474836488:21474836489]={1,2}'::int[];
+select '[-2147483649:-2147483648]={1,2}'::int[];
+
 -- none of the above should be accepted
 
 -- all of the following should be accepted
-- 
2.34.1

Re: Cleaning up array_in()

Reply via email to