On 2022-12-22 Th 11:44, Tom Lane wrote:
> Andrew Dunstan <and...@dunslane.net> writes:
>> Yeah, I started there, but it's substantially more complex - unlike cube
>> the jsonpath scanner calls the error routines as well as the parser.
>> Anyway, here's a patch.
> I looked through this and it seems generally OK.  A minor nitpick is
> that we usually write "(Datum) 0" not "(Datum) NULL" for dont-care Datum
> values.  


Fixed in the new version attached.


> A slightly bigger issue is that makeItemLikeRegex still allows
> an error to be thrown from RE_compile_and_cache if a bogus regex is
> presented.  But that could be dealt with later.


I'd rather fix it now while we're paying attention.


>
> (I wonder why this is using RE_compile_and_cache at all, really,
> rather than some other API.  There doesn't seem to be value in
> forcing the regex into the cache at this point.)
>
>                       


I agree. The attached uses pg_regcomp instead. I had a lift a couple of
lines from regexp.c, but not too many.


cheers


andrew


--
Andrew Dunstan
EDB: https://www.enterprisedb.com
diff --git a/src/backend/utils/adt/jsonpath.c b/src/backend/utils/adt/jsonpath.c
index 91af030095..6c7a5b9854 100644
--- a/src/backend/utils/adt/jsonpath.c
+++ b/src/backend/utils/adt/jsonpath.c
@@ -66,16 +66,19 @@
 #include "funcapi.h"
 #include "lib/stringinfo.h"
 #include "libpq/pqformat.h"
+#include "nodes/miscnodes.h"
 #include "miscadmin.h"
 #include "utils/builtins.h"
 #include "utils/json.h"
 #include "utils/jsonpath.h"
 
 
-static Datum jsonPathFromCstring(char *in, int len);
+static Datum jsonPathFromCstring(char *in, int len, struct Node *escontext);
 static char *jsonPathToCstring(StringInfo out, JsonPath *in,
 							   int estimated_len);
-static int	flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
+static bool	flattenJsonPathParseItem(StringInfo buf, int *result,
+									 struct Node *escontext,
+									 JsonPathParseItem *item,
 									 int nestingLevel, bool insideArraySubscript);
 static void alignStringInfoInt(StringInfo buf);
 static int32 reserveSpaceForItemPointer(StringInfo buf);
@@ -95,7 +98,7 @@ jsonpath_in(PG_FUNCTION_ARGS)
 	char	   *in = PG_GETARG_CSTRING(0);
 	int			len = strlen(in);
 
-	return jsonPathFromCstring(in, len);
+	return jsonPathFromCstring(in, len, fcinfo->context);
 }
 
 /*
@@ -119,7 +122,7 @@ jsonpath_recv(PG_FUNCTION_ARGS)
 	else
 		elog(ERROR, "unsupported jsonpath version number: %d", version);
 
-	return jsonPathFromCstring(str, nbytes);
+	return jsonPathFromCstring(str, nbytes, NULL);
 }
 
 /*
@@ -165,24 +168,29 @@ jsonpath_send(PG_FUNCTION_ARGS)
  * representation of jsonpath.
  */
 static Datum
-jsonPathFromCstring(char *in, int len)
+jsonPathFromCstring(char *in, int len, struct Node *escontext)
 {
-	JsonPathParseResult *jsonpath = parsejsonpath(in, len);
+	JsonPathParseResult *jsonpath = parsejsonpath(in, len, escontext);
 	JsonPath   *res;
 	StringInfoData buf;
 
-	initStringInfo(&buf);
-	enlargeStringInfo(&buf, 4 * len /* estimation */ );
-
-	appendStringInfoSpaces(&buf, JSONPATH_HDRSZ);
+	if (SOFT_ERROR_OCCURRED(escontext))
+		return (Datum) NULL;
 
 	if (!jsonpath)
-		ereport(ERROR,
+		ereturn(escontext, (Datum) NULL,
 				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 				 errmsg("invalid input syntax for type %s: \"%s\"", "jsonpath",
 						in)));
 
-	flattenJsonPathParseItem(&buf, jsonpath->expr, 0, false);
+	initStringInfo(&buf);
+	enlargeStringInfo(&buf, 4 * len /* estimation */ );
+
+	appendStringInfoSpaces(&buf, JSONPATH_HDRSZ);
+
+	if (!flattenJsonPathParseItem(&buf, NULL, escontext,
+								  jsonpath->expr, 0, false))
+		return (Datum) NULL;
 
 	res = (JsonPath *) buf.data;
 	SET_VARSIZE(res, buf.len);
@@ -225,9 +233,10 @@ jsonPathToCstring(StringInfo out, JsonPath *in, int estimated_len)
  * Recursive function converting given jsonpath parse item and all its
  * children into a binary representation.
  */
-static int
-flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
-						 int nestingLevel, bool insideArraySubscript)
+static bool
+flattenJsonPathParseItem(StringInfo buf,  int *result, struct Node *escontext,
+						 JsonPathParseItem *item, int nestingLevel,
+						 bool insideArraySubscript)
 {
 	/* position from beginning of jsonpath data */
 	int32		pos = buf->len - JSONPATH_HDRSZ;
@@ -295,16 +304,22 @@ flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
 				int32		left = reserveSpaceForItemPointer(buf);
 				int32		right = reserveSpaceForItemPointer(buf);
 
-				chld = !item->value.args.left ? pos :
-					flattenJsonPathParseItem(buf, item->value.args.left,
-											 nestingLevel + argNestingLevel,
-											 insideArraySubscript);
+				if (!item->value.args.left)
+					chld = pos;
+				else if (! flattenJsonPathParseItem(buf, &chld, escontext,
+													item->value.args.left,
+													nestingLevel + argNestingLevel,
+													insideArraySubscript))
+					return false;
 				*(int32 *) (buf->data + left) = chld - pos;
 
-				chld = !item->value.args.right ? pos :
-					flattenJsonPathParseItem(buf, item->value.args.right,
-											 nestingLevel + argNestingLevel,
-											 insideArraySubscript);
+				if (!item->value.args.right)
+					chld = pos;
+				else if (! flattenJsonPathParseItem(buf, &chld, escontext,
+													item->value.args.right,
+													nestingLevel + argNestingLevel,
+													insideArraySubscript))
+					return false;
 				*(int32 *) (buf->data + right) = chld - pos;
 			}
 			break;
@@ -323,9 +338,11 @@ flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
 									   item->value.like_regex.patternlen);
 				appendStringInfoChar(buf, '\0');
 
-				chld = flattenJsonPathParseItem(buf, item->value.like_regex.expr,
-												nestingLevel,
-												insideArraySubscript);
+				if (! flattenJsonPathParseItem(buf, &chld, escontext,
+											   item->value.like_regex.expr,
+											   nestingLevel,
+											   insideArraySubscript))
+					return false;
 				*(int32 *) (buf->data + offs) = chld - pos;
 			}
 			break;
@@ -341,10 +358,13 @@ flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
 			{
 				int32		arg = reserveSpaceForItemPointer(buf);
 
-				chld = !item->value.arg ? pos :
-					flattenJsonPathParseItem(buf, item->value.arg,
-											 nestingLevel + argNestingLevel,
-											 insideArraySubscript);
+				if (!item->value.arg)
+					chld = pos;
+				else if (! flattenJsonPathParseItem(buf, &chld, escontext,
+													item->value.arg,
+													nestingLevel + argNestingLevel,
+													insideArraySubscript))
+					return false;
 				*(int32 *) (buf->data + arg) = chld - pos;
 			}
 			break;
@@ -357,13 +377,13 @@ flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
 			break;
 		case jpiCurrent:
 			if (nestingLevel <= 0)
-				ereport(ERROR,
+				ereturn(escontext, false,
 						(errcode(ERRCODE_SYNTAX_ERROR),
 						 errmsg("@ is not allowed in root expressions")));
 			break;
 		case jpiLast:
 			if (!insideArraySubscript)
-				ereport(ERROR,
+				ereturn(escontext, false,
 						(errcode(ERRCODE_SYNTAX_ERROR),
 						 errmsg("LAST is allowed only in array subscripts")));
 			break;
@@ -383,15 +403,22 @@ flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
 				{
 					int32	   *ppos;
 					int32		topos;
-					int32		frompos =
-					flattenJsonPathParseItem(buf,
-											 item->value.array.elems[i].from,
-											 nestingLevel, true) - pos;
+					int32		frompos;
+
+					if (! flattenJsonPathParseItem(buf, &frompos, escontext,
+												   item->value.array.elems[i].from,
+												   nestingLevel, true))
+						return false;
+					frompos -= pos;
 
 					if (item->value.array.elems[i].to)
-						topos = flattenJsonPathParseItem(buf,
-														 item->value.array.elems[i].to,
-														 nestingLevel, true) - pos;
+					{
+						if (! flattenJsonPathParseItem(buf, &topos, escontext,
+													   item->value.array.elems[i].to,
+													   nestingLevel, true))
+							return false;
+						topos -= pos;
+					}
 					else
 						topos = 0;
 
@@ -424,12 +451,17 @@ flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
 
 	if (item->next)
 	{
-		chld = flattenJsonPathParseItem(buf, item->next, nestingLevel,
-										insideArraySubscript) - pos;
+		if (! flattenJsonPathParseItem(buf, &chld, escontext,
+									   item->next, nestingLevel,
+									   insideArraySubscript))
+			return false;
+		chld -= pos;
 		*(int32 *) (buf->data + next) = chld;
 	}
 
-	return pos;
+	if (result)
+		*result = pos;
+	return true;
 }
 
 /*
diff --git a/src/backend/utils/adt/jsonpath_exec.c b/src/backend/utils/adt/jsonpath_exec.c
index e758616eb8..4335e1c282 100644
--- a/src/backend/utils/adt/jsonpath_exec.c
+++ b/src/backend/utils/adt/jsonpath_exec.c
@@ -1721,7 +1721,8 @@ executeLikeRegex(JsonPathItem *jsp, JsonbValue *str, JsonbValue *rarg,
 		cxt->regex =
 			cstring_to_text_with_len(jsp->content.like_regex.pattern,
 									 jsp->content.like_regex.patternlen);
-		cxt->cflags = jspConvertRegexFlags(jsp->content.like_regex.flags);
+		(void) jspConvertRegexFlags(jsp->content.like_regex.flags,
+									&(cxt->cflags), NULL);
 	}
 
 	if (RE_compile_and_execute(cxt->regex, str->val.string.val,
diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y
index 2a56629cc3..8c3a0c7623 100644
--- a/src/backend/utils/adt/jsonpath_gram.y
+++ b/src/backend/utils/adt/jsonpath_gram.y
@@ -38,9 +38,11 @@ static JsonPathParseItem *makeItemUnary(JsonPathItemType type,
 static JsonPathParseItem *makeItemList(List *list);
 static JsonPathParseItem *makeIndexArray(List *list);
 static JsonPathParseItem *makeAny(int first, int last);
-static JsonPathParseItem *makeItemLikeRegex(JsonPathParseItem *expr,
-											JsonPathString *pattern,
-											JsonPathString *flags);
+static bool makeItemLikeRegex(JsonPathParseItem *expr,
+							  JsonPathString *pattern,
+							  JsonPathString *flags,
+							  JsonPathParseItem ** result,
+							  struct Node *escontext);
 
 /*
  * Bison doesn't allocate anything that needs to live across parser calls,
@@ -57,6 +59,9 @@ static JsonPathParseItem *makeItemLikeRegex(JsonPathParseItem *expr,
 %expect 0
 %name-prefix="jsonpath_yy"
 %parse-param {JsonPathParseResult **result}
+%parse-param {struct Node *escontext}
+%lex-param {JsonPathParseResult **result}
+%lex-param {struct Node *escontext}
 
 %union
 {
@@ -163,9 +168,20 @@ predicate:
 									{ $$ = makeItemUnary(jpiIsUnknown, $2); }
 	| expr STARTS_P WITH_P starts_with_initial
 									{ $$ = makeItemBinary(jpiStartsWith, $1, $4); }
-	| expr LIKE_REGEX_P STRING_P	{ $$ = makeItemLikeRegex($1, &$3, NULL); }
+	| expr LIKE_REGEX_P STRING_P
+	{
+		JsonPathParseItem *jppitem;
+		if (! makeItemLikeRegex($1, &$3, NULL, &jppitem, escontext))
+			YYABORT;
+		$$ = jppitem;
+	}
 	| expr LIKE_REGEX_P STRING_P FLAG_P STRING_P
-									{ $$ = makeItemLikeRegex($1, &$3, &$5); }
+	{
+		JsonPathParseItem *jppitem;
+		if (! makeItemLikeRegex($1, &$3, &$5, &jppitem, escontext))
+			YYABORT;
+		$$ = jppitem;
+	}
 	;
 
 starts_with_initial:
@@ -472,9 +488,10 @@ makeAny(int first, int last)
 	return v;
 }
 
-static JsonPathParseItem *
+static bool
 makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
-				  JsonPathString *flags)
+				  JsonPathString *flags, JsonPathParseItem ** result,
+				  struct Node *escontext)
 {
 	JsonPathParseItem *v = makeItemType(jpiLikeRegex);
 	int			i;
@@ -506,7 +523,7 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
 				v->value.like_regex.flags |= JSP_REGEX_QUOTE;
 				break;
 			default:
-				ereport(ERROR,
+				ereturn(escontext, false,
 						(errcode(ERRCODE_SYNTAX_ERROR),
 						 errmsg("invalid input syntax for type %s", "jsonpath"),
 						 errdetail("Unrecognized flag character \"%.*s\" in LIKE_REGEX predicate.",
@@ -516,21 +533,45 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
 	}
 
 	/* Convert flags to what RE_compile_and_cache needs */
-	cflags = jspConvertRegexFlags(v->value.like_regex.flags);
+	if ( !jspConvertRegexFlags(v->value.like_regex.flags, &cflags, escontext))
+		 return false;
 
 	/* check regex validity */
-	(void) RE_compile_and_cache(cstring_to_text_with_len(pattern->val,
-														 pattern->len),
-								cflags, DEFAULT_COLLATION_OID);
+	{
+		regex_t     re_tmp;
+		pg_wchar   *wpattern;
+		int         wpattern_len;
+		int         re_result;
 
-	return v;
+		wpattern = (pg_wchar *) palloc((pattern->len + 1) * sizeof(pg_wchar));
+		wpattern_len = pg_mb2wchar_with_len(pattern->val,
+											wpattern,
+											pattern->len);
+
+		if ((re_result = pg_regcomp(&re_tmp, wpattern, wpattern_len, cflags,
+									DEFAULT_COLLATION_OID)) != REG_OKAY)
+		{
+			char        errMsg[100];
+
+			/* See regexp.c for explanation */
+			CHECK_FOR_INTERRUPTS();
+			pg_regerror(re_result, &re_tmp, errMsg, sizeof(errMsg));
+			ereturn(escontext, false,
+					(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+					 errmsg("invalid regular expression: %s", errMsg)));
+		}
+	}
+
+	*result = v;
+
+	return true;
 }
 
 /*
  * Convert from XQuery regex flags to those recognized by our regex library.
  */
-int
-jspConvertRegexFlags(uint32 xflags)
+bool
+jspConvertRegexFlags(uint32 xflags, int *result, struct Node *escontext)
 {
 	/* By default, XQuery is very nearly the same as Spencer's AREs */
 	int			cflags = REG_ADVANCED;
@@ -561,7 +602,7 @@ jspConvertRegexFlags(uint32 xflags)
 		 * XQuery-style ignore-whitespace mode.
 		 */
 		if (xflags & JSP_REGEX_WSPACE)
-			ereport(ERROR,
+			ereturn(escontext, false,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 					 errmsg("XQuery \"x\" flag (expanded regular expressions) is not implemented")));
 	}
@@ -574,5 +615,7 @@ jspConvertRegexFlags(uint32 xflags)
 	 */
 	cflags |= REG_NOSUB;
 
-	return cflags;
+	*result = cflags;
+
+	return true;
 }
diff --git a/src/backend/utils/adt/jsonpath_internal.h b/src/backend/utils/adt/jsonpath_internal.h
index edfc6191a0..b27716855e 100644
--- a/src/backend/utils/adt/jsonpath_internal.h
+++ b/src/backend/utils/adt/jsonpath_internal.h
@@ -25,8 +25,14 @@ typedef struct JsonPathString
 #include "utils/jsonpath.h"
 #include "jsonpath_gram.h"
 
-extern int     jsonpath_yylex(YYSTYPE *yylval_param);
-extern int     jsonpath_yyparse(JsonPathParseResult **result);
-extern void jsonpath_yyerror(JsonPathParseResult **result, const char *message);
+#define YY_DECL extern int     jsonpath_yylex(YYSTYPE *yylval_param, \
+							  JsonPathParseResult **result, \
+							  struct Node *escontext)
+YY_DECL;
+extern int     jsonpath_yyparse(JsonPathParseResult **result,
+								struct Node *escontext);
+extern void jsonpath_yyerror(JsonPathParseResult **result,
+							 struct Node *escontext,
+							 const char *message);
 
 #endif							/* JSONPATH_INTERNAL_H */
diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l
index 948f379e76..59652c76dc 100644
--- a/src/backend/utils/adt/jsonpath_scan.l
+++ b/src/backend/utils/adt/jsonpath_scan.l
@@ -25,6 +25,7 @@
 #include "jsonpath_gram.h"
 
 #include "mb/pg_wchar.h"
+#include "nodes/miscnodes.h"
 #include "nodes/pg_list.h"
 }
 
@@ -39,8 +40,8 @@ static int	scanbuflen;
 static void addstring(bool init, char *s, int l);
 static void addchar(bool init, char c);
 static enum yytokentype checkKeyword(void);
-static void parseUnicode(char *s, int l);
-static void parseHexChar(char *s);
+static bool parseUnicode(char *s, int l, struct Node *escontext);
+static bool parseHexChar(char *s, struct Node *escontext);
 
 /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
 #undef fprintf
@@ -147,25 +148,48 @@ hex_fail	\\x{hex_dig}{0,1}
 
 <xnq,xq,xvq>\\v				{ addchar(false, '\v'); }
 
-<xnq,xq,xvq>{unicode}+		{ parseUnicode(yytext, yyleng); }
+<xnq,xq,xvq>{unicode}+		{
+								if (!parseUnicode(yytext, yyleng, escontext))
+									yyterminate();
+							}
 
-<xnq,xq,xvq>{hex_char}		{ parseHexChar(yytext); }
+<xnq,xq,xvq>{hex_char}		{
+								if (!parseHexChar(yytext, escontext))
+									yyterminate();
+							}
 
-<xnq,xq,xvq>{unicode}*{unicodefail}	{ jsonpath_yyerror(NULL, "invalid unicode sequence"); }
+<xnq,xq,xvq>{unicode}*{unicodefail} {
+								jsonpath_yyerror(NULL, escontext,
+												 "invalid unicode sequence");
+								yyterminate();
+							}
 
-<xnq,xq,xvq>{hex_fail}		{ jsonpath_yyerror(NULL, "invalid hex character sequence"); }
+<xnq,xq,xvq>{hex_fail}		{
+								jsonpath_yyerror(NULL, escontext,
+												 "invalid hex character sequence");
+								yyterminate();
+							}
 
 <xnq,xq,xvq>{unicode}+\\	{
 								/* throw back the \\, and treat as unicode */
 								yyless(yyleng - 1);
-								parseUnicode(yytext, yyleng);
+								if (!parseUnicode(yytext, yyleng, escontext))
+									yyterminate();
 							}
 
 <xnq,xq,xvq>\\.				{ addchar(false, yytext[1]); }
 
-<xnq,xq,xvq>\\				{ jsonpath_yyerror(NULL, "unexpected end after backslash"); }
+<xnq,xq,xvq>\\				{
+							  jsonpath_yyerror(NULL, escontext,
+											   "unexpected end after backslash");
+							  yyterminate();
+							}
 
-<xq,xvq><<EOF>>				{ jsonpath_yyerror(NULL, "unexpected end of quoted string"); }
+<xq,xvq><<EOF>>				{
+							  jsonpath_yyerror(NULL, escontext,
+											   "unexpected end of quoted string");
+							  yyterminate();
+							}
 
 <xq>\"							{
 									yylval->str = scanstring;
@@ -187,8 +211,12 @@ hex_fail	\\x{hex_dig}{0,1}
 
 <xc>\*							{ }
 
-<xc><<EOF>>						{ jsonpath_yyerror(NULL, "unexpected end of comment"); }
-
+<xc><<EOF>>						{
+									jsonpath_yyerror(
+										NULL, escontext,
+										"unexpected end of comment");
+									yyterminate();
+								}
 \&\&							{ return AND_P; }
 
 \|\|							{ return OR_P; }
@@ -253,11 +281,30 @@ hex_fail	\\x{hex_dig}{0,1}
 									return INT_P;
 								}
 
-{realfail}						{ jsonpath_yyerror(NULL, "invalid numeric literal"); }
-{integer_junk}					{ jsonpath_yyerror(NULL, "trailing junk after numeric literal"); }
-{decimal_junk}					{ jsonpath_yyerror(NULL, "trailing junk after numeric literal"); }
-{real_junk}						{ jsonpath_yyerror(NULL, "trailing junk after numeric literal"); }
-
+{realfail}						{
+									jsonpath_yyerror(
+										NULL, escontext,
+										"invalid numeric literal");
+									yyterminate();
+								}
+{integer_junk}					{
+									jsonpath_yyerror(
+										NULL, escontext,
+										"trailing junk after numeric literal");
+									yyterminate();
+								}
+{decimal_junk}					{
+									jsonpath_yyerror(
+										NULL, escontext,
+										"trailing junk after numeric literal");
+									yyterminate();
+								}
+{real_junk}						{
+									jsonpath_yyerror(
+										NULL, escontext,
+										"trailing junk after numeric literal");
+									yyterminate();
+								}
 \"								{
 									addchar(true, '\0');
 									BEGIN xq;
@@ -281,18 +328,23 @@ hex_fail	\\x{hex_dig}{0,1}
 /* LCOV_EXCL_STOP */
 
 void
-jsonpath_yyerror(JsonPathParseResult **result, const char *message)
+jsonpath_yyerror(JsonPathParseResult **result, struct Node *escontext,
+				 const char *message)
 {
+	/* don't overwrite escontext if it's already been set */
+	if (SOFT_ERROR_OCCURRED(escontext))
+		return;
+
 	if (*yytext == YY_END_OF_BUFFER_CHAR)
 	{
-		ereport(ERROR,
+		errsave(escontext,
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 /* translator: %s is typically "syntax error" */
 				 errmsg("%s at end of jsonpath input", _(message))));
 	}
 	else
 	{
-		ereport(ERROR,
+		errsave(escontext,
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 /* translator: first %s is typically "syntax error" */
 				 errmsg("%s at or near \"%s\" of jsonpath input",
@@ -463,14 +515,14 @@ addchar(bool init, char c)
 
 /* Interface to jsonpath parser */
 JsonPathParseResult *
-parsejsonpath(const char *str, int len)
+parsejsonpath(const char *str, int len, struct Node *escontext)
 {
 	JsonPathParseResult	*parseresult;
 
 	jsonpath_scanner_init(str, len);
 
-	if (jsonpath_yyparse((void *) &parseresult) != 0)
-		jsonpath_yyerror(NULL, "bogus input"); /* shouldn't happen */
+	if (jsonpath_yyparse((void *) &parseresult, escontext) != 0)
+		jsonpath_yyerror(NULL, escontext, "bogus input"); /* shouldn't happen */
 
 	jsonpath_scanner_finish();
 
@@ -478,27 +530,36 @@ parsejsonpath(const char *str, int len)
 }
 
 /* Turn hex character into integer */
-static int
-hexval(char c)
+static bool
+hexval(char c, int *result, struct Node *escontext)
 {
 	if (c >= '0' && c <= '9')
-		return c - '0';
+	{
+		*result = c - '0';
+		return true;
+	}
 	if (c >= 'a' && c <= 'f')
-		return c - 'a' + 0xA;
+	{
+		*result = c - 'a' + 0xA;
+		return true;
+	}
 	if (c >= 'A' && c <= 'F')
-		return c - 'A' + 0xA;
-	jsonpath_yyerror(NULL, "invalid hexadecimal digit");
-	return 0; /* not reached */
+	{
+		*result = c - 'A' + 0xA;
+		return true;
+	}
+	jsonpath_yyerror(NULL, escontext, "invalid hexadecimal digit");
+	return false;
 }
 
 /* Add given unicode character to scanstring */
-static void
-addUnicodeChar(int ch)
+static bool
+addUnicodeChar(int ch, struct Node *escontext)
 {
 	if (ch == 0)
 	{
 		/* We can't allow this, since our TEXT type doesn't */
-		ereport(ERROR,
+		ereturn(escontext, false,
 				(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
 				 errmsg("unsupported Unicode escape sequence"),
 				  errdetail("\\u0000 cannot be converted to text.")));
@@ -507,30 +568,42 @@ addUnicodeChar(int ch)
 	{
 		char		cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
 
-		pg_unicode_to_server(ch, (unsigned char *) cbuf);
+		/*
+		 * If we're trapping the error status, call the noerror form of the
+		 * conversion function. Otherwise call the normal form which provides
+		 * more detailed errors.
+		 */
+
+		if (! escontext  || ! IsA(escontext, ErrorSaveContext))
+			pg_unicode_to_server(ch, (unsigned char *) cbuf);
+		else if (!pg_unicode_to_server_noerror(ch, (unsigned char *) cbuf))
+			ereturn(escontext, false,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("could not convert unicode to server encoding")));
 		addstring(false, cbuf, strlen(cbuf));
 	}
+	return true;
 }
 
 /* Add unicode character, processing any surrogate pairs */
-static void
-addUnicode(int ch, int *hi_surrogate)
+static bool
+addUnicode(int ch, int *hi_surrogate, struct Node *escontext)
 {
 	if (is_utf16_surrogate_first(ch))
 	{
 		if (*hi_surrogate != -1)
-			ereport(ERROR,
+			ereturn(escontext, false,
 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 					 errmsg("invalid input syntax for type %s", "jsonpath"),
 					 errdetail("Unicode high surrogate must not follow "
 							   "a high surrogate.")));
 		*hi_surrogate = ch;
-		return;
+		return true;
 	}
 	else if (is_utf16_surrogate_second(ch))
 	{
 		if (*hi_surrogate == -1)
-			ereport(ERROR,
+			ereturn(escontext, false,
 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 					 errmsg("invalid input syntax for type %s", "jsonpath"),
 					 errdetail("Unicode low surrogate must follow a high "
@@ -540,22 +613,22 @@ addUnicode(int ch, int *hi_surrogate)
 	}
 	else if (*hi_surrogate != -1)
 	{
-		ereport(ERROR,
+		ereturn(escontext, false,
 				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 				 errmsg("invalid input syntax for type %s", "jsonpath"),
 				 errdetail("Unicode low surrogate must follow a high "
 						   "surrogate.")));
 	}
 
-	addUnicodeChar(ch);
+	return addUnicodeChar(ch, escontext);
 }
 
 /*
  * parseUnicode was adopted from json_lex_string() in
  * src/backend/utils/adt/json.c
  */
-static void
-parseUnicode(char *s, int l)
+static bool
+parseUnicode(char *s, int l, struct Node *escontext)
 {
 	int			i = 2;
 	int			hi_surrogate = -1;
@@ -563,41 +636,57 @@ parseUnicode(char *s, int l)
 	for (i = 2; i < l; i += 2)	/* skip '\u' */
 	{
 		int			ch = 0;
-		int			j;
+		int			j, si;
 
 		if (s[i] == '{')	/* parse '\u{XX...}' */
 		{
 			while (s[++i] != '}' && i < l)
-				ch = (ch << 4) | hexval(s[i]);
+			{
+				if (!hexval(s[i], &si, escontext))
+					return false;
+				ch = (ch << 4) | si;
+			}
 			i++;	/* skip '}' */
 		}
 		else		/* parse '\uXXXX' */
 		{
 			for (j = 0; j < 4 && i < l; j++)
-				ch = (ch << 4) | hexval(s[i++]);
+			{
+				if (!hexval(s[i++], &si, escontext))
+					return false;
+				ch = (ch << 4) | si;
+			}
 		}
 
-		addUnicode(ch, &hi_surrogate);
+		if (! addUnicode(ch, &hi_surrogate, escontext))
+			return false;
 	}
 
 	if (hi_surrogate != -1)
 	{
-		ereport(ERROR,
+		ereturn(escontext, false,
 				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 				 errmsg("invalid input syntax for type %s", "jsonpath"),
 				 errdetail("Unicode low surrogate must follow a high "
 						   "surrogate.")));
 	}
+
+	return true;
 }
 
 /* Parse sequence of hex-encoded characters */
-static void
-parseHexChar(char *s)
+static bool
+parseHexChar(char *s, struct Node *escontext)
 {
-	int			ch = (hexval(s[2]) << 4) |
-					  hexval(s[3]);
+	int s2, s3, ch;
+	if (!hexval(s[2], &s2, escontext))
+		return false;
+	if (!hexval(s[3], &s3, escontext))
+		return false;
 
-	addUnicodeChar(ch);
+	ch = (s2 << 4) | s3;
+
+	return addUnicodeChar(ch, escontext);
 }
 
 /*
diff --git a/src/include/utils/jsonpath.h b/src/include/utils/jsonpath.h
index 13f60cdc09..b5035ceb19 100644
--- a/src/include/utils/jsonpath.h
+++ b/src/include/utils/jsonpath.h
@@ -254,8 +254,11 @@ typedef struct JsonPathParseResult
 	bool		lax;
 } JsonPathParseResult;
 
-extern JsonPathParseResult *parsejsonpath(const char *str, int len);
+extern JsonPathParseResult *parsejsonpath(const char *str, int len,
+										  struct Node *escontext);
+
+extern bool jspConvertRegexFlags(uint32 xflags, int *result,
+								 struct Node *escontext);
 
-extern int	jspConvertRegexFlags(uint32 xflags);
 
 #endif
diff --git a/src/test/regress/expected/jsonpath.out b/src/test/regress/expected/jsonpath.out
index fdaac58367..ca0cdf1ab2 100644
--- a/src/test/regress/expected/jsonpath.out
+++ b/src/test/regress/expected/jsonpath.out
@@ -1032,3 +1032,21 @@ select '1?(2>3)'::jsonpath;
  (1)?(2 > 3)
 (1 row)
 
+-- test non-error-throwing API
+SELECT str as jsonpath,
+       pg_input_is_valid(str,'jsonpath') as ok,
+       pg_input_error_message(str,'jsonpath') as errmsg
+FROM unnest(ARRAY['$ ? (@ like_regex "pattern" flag "smixq")'::text,
+                  '$ ? (@ like_regex "pattern" flag "a")',
+                  '@ + 1',
+                  '00',
+                  '1a']) str;
+                 jsonpath                  | ok |                                errmsg                                 
+-------------------------------------------+----+-----------------------------------------------------------------------
+ $ ? (@ like_regex "pattern" flag "smixq") | t  | 
+ $ ? (@ like_regex "pattern" flag "a")     | f  | invalid input syntax for type jsonpath
+ @ + 1                                     | f  | @ is not allowed in root expressions
+ 00                                        | f  | trailing junk after numeric literal at or near "00" of jsonpath input
+ 1a                                        | f  | trailing junk after numeric literal at or near "1a" of jsonpath input
+(5 rows)
+
diff --git a/src/test/regress/sql/jsonpath.sql b/src/test/regress/sql/jsonpath.sql
index d491714614..99d21d2af7 100644
--- a/src/test/regress/sql/jsonpath.sql
+++ b/src/test/regress/sql/jsonpath.sql
@@ -187,3 +187,14 @@ select '1..e3'::jsonpath;
 select '(1.).e'::jsonpath;
 select '(1.).e3'::jsonpath;
 select '1?(2>3)'::jsonpath;
+
+-- test non-error-throwing API
+
+SELECT str as jsonpath,
+       pg_input_is_valid(str,'jsonpath') as ok,
+       pg_input_error_message(str,'jsonpath') as errmsg
+FROM unnest(ARRAY['$ ? (@ like_regex "pattern" flag "smixq")'::text,
+                  '$ ? (@ like_regex "pattern" flag "a")',
+                  '@ + 1',
+                  '00',
+                  '1a']) str;

Reply via email to