From e4a7291c41019f8b8dcf49e851c7d7805057ed67 Mon Sep 17 00:00:00 2001
From: Amit Langote <amitlan@postgresql.org>
Date: Fri, 21 Jun 2024 20:55:31 +0900
Subject: [PATCH v2 1/2] SQL/JSON: Disallow incompatible values in ON
 ERROR/EMPTY

Currently, the grammar allows specifying any of the all supported
values in the ON ERROR and ON EMPTY clause for each SQL/JSON function
that supports those clauses.  But the semantics of each of those
function allows only a subset of the values for any given function.
So, check during parse analysis that the provided value is valid for
the given function and throw a syntax error if not.

Reported-by: Jian He <jian.universality@gmail.com>
Discussion: https://postgr.es/m/CACJufxFgWGqpESSYzyJ6tSurr3vFYBSNEmCfkGyB_dMdptFnZQ%40mail.gmail.com
---
 doc/src/sgml/func.sgml                        |   6 +-
 src/backend/parser/parse_expr.c               | 126 ++++++++++++++++--
 src/backend/parser/parse_jsontable.c          |   2 +-
 .../regress/expected/sqljson_jsontable.out    |  25 +++-
 .../regress/expected/sqljson_queryfuncs.out   |  16 +++
 src/test/regress/sql/sqljson_jsontable.sql    |   6 +
 src/test/regress/sql/sqljson_queryfuncs.sql   |   5 +
 7 files changed, 172 insertions(+), 14 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 2609269610..bc54fc2a40 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -18929,7 +18929,7 @@ DETAIL:  Missing "]" after array dimensions.
 JSON_TABLE (
     <replaceable>context_item</replaceable>, <replaceable>path_expression</replaceable> <optional> AS <replaceable>json_path_name</replaceable> </optional> <optional> PASSING { <replaceable>value</replaceable> AS <replaceable>varname</replaceable> } <optional>, ...</optional> </optional>
     COLUMNS ( <replaceable class="parameter">json_table_column</replaceable> <optional>, ...</optional> )
-    <optional> { <literal>ERROR</literal> | <literal>EMPTY</literal> } <literal>ON ERROR</literal> </optional>
+    <optional> { <literal>ERROR</literal> | <literal>EMPTY</literal> <optional>ARRAY</optional>} <literal>ON ERROR</literal> </optional>
 )
 
 <phrase>
@@ -18941,8 +18941,8 @@ where <replaceable class="parameter">json_table_column</replaceable> is:
         <optional> PATH <replaceable>path_expression</replaceable> </optional>
         <optional> { WITHOUT | WITH { CONDITIONAL | <optional>UNCONDITIONAL</optional> } } <optional> ARRAY </optional> WRAPPER </optional>
         <optional> { KEEP | OMIT } QUOTES <optional> ON SCALAR STRING </optional> </optional>
-        <optional> { ERROR | NULL | EMPTY { ARRAY | OBJECT } | DEFAULT <replaceable>expression</replaceable> } ON EMPTY </optional>
-        <optional> { ERROR | NULL | EMPTY { ARRAY | OBJECT } | DEFAULT <replaceable>expression</replaceable> } ON ERROR </optional>
+        <optional> { ERROR | NULL | EMPTY { <optional>ARRAY</optional> | OBJECT } | DEFAULT <replaceable>expression</replaceable> } ON EMPTY </optional>
+        <optional> { ERROR | NULL | EMPTY { <optional>ARRAY</optional> | OBJECT } | DEFAULT <replaceable>expression</replaceable> } ON ERROR </optional>
   | <replaceable>name</replaceable> <replaceable>type</replaceable> EXISTS <optional> PATH <replaceable>path_expression</replaceable> </optional>
         <optional> { ERROR | TRUE | FALSE | UNKNOWN } ON ERROR </optional>
   | NESTED <optional> PATH </optional> <replaceable>path_expression</replaceable> <optional> AS <replaceable>json_path_name</replaceable> </optional> COLUMNS ( <replaceable>json_table_column</replaceable> <optional>, ...</optional> )
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 699d0be751..5b576be172 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -4321,14 +4321,124 @@ transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func)
 	}
 
 	/* OMIT QUOTES is meaningless when strings are wrapped. */
-	if (func->op == JSON_QUERY_OP &&
-		func->quotes == JS_QUOTES_OMIT &&
-		(func->wrapper == JSW_CONDITIONAL ||
-		 func->wrapper == JSW_UNCONDITIONAL))
-		ereport(ERROR,
-				errcode(ERRCODE_SYNTAX_ERROR),
-				errmsg("SQL/JSON QUOTES behavior must not be specified when WITH WRAPPER is used"),
-				parser_errposition(pstate, func->location));
+	if (func->op == JSON_QUERY_OP)
+	{
+		if (func->quotes == JS_QUOTES_OMIT &&
+			(func->wrapper == JSW_CONDITIONAL ||
+			 func->wrapper == JSW_UNCONDITIONAL))
+			ereport(ERROR,
+					errcode(ERRCODE_SYNTAX_ERROR),
+					errmsg("SQL/JSON QUOTES behavior must not be specified when WITH WRAPPER is used"),
+					parser_errposition(pstate, func->location));
+		if (func->on_empty != NULL &&
+			func->on_empty->btype != JSON_BEHAVIOR_ERROR &&
+			func->on_empty->btype != JSON_BEHAVIOR_NULL &&
+			func->on_empty->btype != JSON_BEHAVIOR_EMPTY &&
+			func->on_empty->btype != JSON_BEHAVIOR_EMPTY_ARRAY &&
+			func->on_empty->btype != JSON_BEHAVIOR_EMPTY_OBJECT &&
+			func->on_empty->btype != JSON_BEHAVIOR_DEFAULT)
+		{
+			if (func->column_name == NULL)
+				ereport(ERROR,
+						errcode(ERRCODE_SYNTAX_ERROR),
+						errmsg("invalid ON EMPTY behavior"),
+						errdetail("Only ERROR, NULL, EMPTY [ ARRAY ], EMPTY OBJECT, or DEFAULT expression is allowed in ON EMPTY for JSON_QUERY()."),
+						parser_errposition(pstate, func->on_empty->location));
+			else
+				ereport(ERROR,
+						errcode(ERRCODE_SYNTAX_ERROR),
+						errmsg("invalid ON EMPTY behavior for column \"%s\"",
+								func->column_name),
+						errdetail("Only ERROR, NULL, EMPTY [ ARRAY ], EMPTY OBJECT, or DEFAULT expression is allowed in ON EMPTY for formatted columns."),
+						parser_errposition(pstate, func->on_empty->location));
+		}
+		if (func->on_error != NULL &&
+			func->on_error->btype != JSON_BEHAVIOR_ERROR &&
+			func->on_error->btype != JSON_BEHAVIOR_NULL &&
+			func->on_error->btype != JSON_BEHAVIOR_EMPTY &&
+			func->on_error->btype != JSON_BEHAVIOR_EMPTY_ARRAY &&
+			func->on_error->btype != JSON_BEHAVIOR_EMPTY_OBJECT &&
+			func->on_error->btype != JSON_BEHAVIOR_DEFAULT)
+		{
+			if (func->column_name == NULL)
+				ereport(ERROR,
+						errcode(ERRCODE_SYNTAX_ERROR),
+						errmsg("invalid ON ERROR behavior"),
+						errdetail("Only ERROR, NULL, EMPTY [ ARRAY ], EMPTY OBJECT, or DEFAULT expression is allowed in ON ERROR for JSON_QUERY()."),
+						parser_errposition(pstate, func->on_error->location));
+			else
+				ereport(ERROR,
+						errcode(ERRCODE_SYNTAX_ERROR),
+						errmsg("invalid ON ERROR behavior for column \"%s\"",
+								func->column_name),
+						errdetail("Only ERROR, NULL, EMPTY [ ARRAY ], EMPTY OBJECT, or DEFAULT expression is allowed in ON ERROR for formatted columns."),
+						parser_errposition(pstate, func->on_error->location));
+		}
+	}
+
+	/* Check that ON ERROR/EMPTY behavior values are valid for the function. */
+	if (func->op == JSON_EXISTS_OP &&
+		func->on_error != NULL &&
+		func->on_error->btype != JSON_BEHAVIOR_ERROR &&
+		func->on_error->btype != JSON_BEHAVIOR_TRUE &&
+		func->on_error->btype != JSON_BEHAVIOR_FALSE &&
+		func->on_error->btype != JSON_BEHAVIOR_UNKNOWN)
+	{
+		if (func->column_name == NULL)
+			ereport(ERROR,
+					errcode(ERRCODE_SYNTAX_ERROR),
+					errmsg("invalid ON ERROR behavior"),
+					errdetail("Only ERROR, TRUE, FALSE, or UNKNOWN is allowed in ON ERROR for JSON_EXISTS()."),
+					parser_errposition(pstate, func->on_error->location));
+		else
+			ereport(ERROR,
+					errcode(ERRCODE_SYNTAX_ERROR),
+					errmsg("invalid ON ERROR behavior for column \"%s\"",
+							func->column_name),
+					errdetail("Only ERROR, TRUE, FALSE, or UNKNOWN is allowed in ON ERROR for EXISTS columns."),
+					parser_errposition(pstate, func->on_error->location));
+	}
+	if (func->op == JSON_VALUE_OP)
+	{
+		if (func->on_empty != NULL &&
+			func->on_empty->btype != JSON_BEHAVIOR_ERROR &&
+			func->on_empty->btype != JSON_BEHAVIOR_NULL &&
+			func->on_empty->btype != JSON_BEHAVIOR_DEFAULT)
+		{
+			if (func->column_name == NULL)
+				ereport(ERROR,
+						errcode(ERRCODE_SYNTAX_ERROR),
+						errmsg("invalid ON EMPTY behavior"),
+						errdetail("Only ERROR, NULL, or DEFAULT expression is allowed in ON EMPTY for JSON_VALUE()."),
+						parser_errposition(pstate, func->on_empty->location));
+			else
+				ereport(ERROR,
+						errcode(ERRCODE_SYNTAX_ERROR),
+						errmsg("invalid ON EMPTY behavior for column \"%s\"",
+								func->column_name),
+						errdetail("Only ERROR, NULL, or DEFAULT expression is allowed in ON EMPTY for scalar columns."),
+						parser_errposition(pstate, func->on_empty->location));
+		}
+		if (func->on_error != NULL &&
+			func->on_error->btype != JSON_BEHAVIOR_ERROR &&
+			 func->on_error->btype != JSON_BEHAVIOR_NULL &&
+			 func->on_error->btype != JSON_BEHAVIOR_DEFAULT)
+		{
+			if (func->column_name == NULL)
+				ereport(ERROR,
+						errcode(ERRCODE_SYNTAX_ERROR),
+						errmsg("invalid ON ERROR behavior"),
+						errdetail("Only ERROR, NULL, or DEFAULT expression is allowed in ON ERROR for JSON_VALUE()."),
+						parser_errposition(pstate, func->on_error->location));
+			else
+				ereport(ERROR,
+						errcode(ERRCODE_SYNTAX_ERROR),
+						errmsg("invalid ON ERROR behavior for column \"%s\"",
+								func->column_name),
+						errdetail("Only ERROR, NULL, or DEFAULT expression is allowed in ON ERROR for scalar columns."),
+						parser_errposition(pstate, func->on_error->location));
+		}
+	}
 
 	jsexpr = makeNode(JsonExpr);
 	jsexpr->location = func->location;
diff --git a/src/backend/parser/parse_jsontable.c b/src/backend/parser/parse_jsontable.c
index b2519c2f32..8a72e498e8 100644
--- a/src/backend/parser/parse_jsontable.c
+++ b/src/backend/parser/parse_jsontable.c
@@ -92,7 +92,7 @@ transformJsonTable(ParseState *pstate, JsonTable *jt)
 		ereport(ERROR,
 				errcode(ERRCODE_SYNTAX_ERROR),
 				errmsg("invalid ON ERROR behavior"),
-				errdetail("Only EMPTY or ERROR is allowed in the top-level ON ERROR clause."),
+				errdetail("Only EMPTY [ ARRAY ] or ERROR is allowed in the top-level ON ERROR clause."),
 				parser_errposition(pstate, jt->on_error->location));
 
 	cxt.pathNameId = 0;
diff --git a/src/test/regress/expected/sqljson_jsontable.out b/src/test/regress/expected/sqljson_jsontable.out
index cf43442891..b1da088785 100644
--- a/src/test/regress/expected/sqljson_jsontable.out
+++ b/src/test/regress/expected/sqljson_jsontable.out
@@ -9,12 +9,12 @@ SELECT * FROM JSON_TABLE('[]', 'strict $.a' COLUMNS (js2 int PATH '$') DEFAULT 1
 ERROR:  invalid ON ERROR behavior
 LINE 1: ...BLE('[]', 'strict $.a' COLUMNS (js2 int PATH '$') DEFAULT 1 ...
                                                              ^
-DETAIL:  Only EMPTY or ERROR is allowed in the top-level ON ERROR clause.
+DETAIL:  Only EMPTY [ ARRAY ] or ERROR is allowed in the top-level ON ERROR clause.
 SELECT * FROM JSON_TABLE('[]', 'strict $.a' COLUMNS (js2 int PATH '$') NULL ON ERROR);
 ERROR:  invalid ON ERROR behavior
 LINE 1: ...BLE('[]', 'strict $.a' COLUMNS (js2 int PATH '$') NULL ON ER...
                                                              ^
-DETAIL:  Only EMPTY or ERROR is allowed in the top-level ON ERROR clause.
+DETAIL:  Only EMPTY [ ARRAY ] or ERROR is allowed in the top-level ON ERROR clause.
 SELECT * FROM JSON_TABLE('[]', 'strict $.a' COLUMNS (js2 int PATH '$') EMPTY ON ERROR);
  js2 
 -----
@@ -1057,3 +1057,24 @@ CREATE OR REPLACE VIEW public.jsonb_table_view7 AS
         ) sub
 DROP VIEW jsonb_table_view7;
 DROP TABLE s;
+-- Test ON ERROR / EMPTY value validity for the function and column types; all fail
+SELECT * FROM JSON_TABLE(jsonb '1', '$' COLUMNS (a int) NULL ON ERROR);	-- fail
+ERROR:  invalid ON ERROR behavior
+LINE 1: ... * FROM JSON_TABLE(jsonb '1', '$' COLUMNS (a int) NULL ON ER...
+                                                             ^
+DETAIL:  Only EMPTY [ ARRAY ] or ERROR is allowed in the top-level ON ERROR clause.
+SELECT * FROM JSON_TABLE(jsonb '1', '$' COLUMNS (a int true on empty));
+ERROR:  invalid ON EMPTY behavior for column "a"
+LINE 1: ...T * FROM JSON_TABLE(jsonb '1', '$' COLUMNS (a int true on em...
+                                                             ^
+DETAIL:  Only ERROR, NULL, or DEFAULT expression is allowed in ON EMPTY for scalar columns.
+SELECT * FROM JSON_TABLE(jsonb '1', '$' COLUMNS (a int omit quotes true on error));
+ERROR:  invalid ON ERROR behavior for column "a"
+LINE 1: ...N_TABLE(jsonb '1', '$' COLUMNS (a int omit quotes true on er...
+                                                             ^
+DETAIL:  Only ERROR, NULL, EMPTY [ ARRAY ], EMPTY OBJECT, or DEFAULT expression is allowed in ON ERROR for formatted columns.
+SELECT * FROM JSON_TABLE(jsonb '1', '$' COLUMNS (a int exists empty object on error));
+ERROR:  invalid ON ERROR behavior for column "a"
+LINE 1: ...M JSON_TABLE(jsonb '1', '$' COLUMNS (a int exists empty obje...
+                                                             ^
+DETAIL:  Only ERROR, TRUE, FALSE, or UNKNOWN is allowed in ON ERROR for EXISTS columns.
diff --git a/src/test/regress/expected/sqljson_queryfuncs.out b/src/test/regress/expected/sqljson_queryfuncs.out
index ea136eefa7..a8a2ea7204 100644
--- a/src/test/regress/expected/sqljson_queryfuncs.out
+++ b/src/test/regress/expected/sqljson_queryfuncs.out
@@ -1389,3 +1389,19 @@ SELECT JSON_VALUE(jsonb '123', '$' RETURNING queryfuncs_char2_chk DEFAULT 1 ON E
 (1 row)
 
 DROP DOMAIN queryfuncs_char2, queryfuncs_char2_chk;
+-- Test ON ERROR / EMPTY value validity for the function; all fail.
+SELECT JSON_EXISTS(jsonb '1', '$' DEFAULT 1 ON ERROR);
+ERROR:  invalid ON ERROR behavior
+LINE 1: SELECT JSON_EXISTS(jsonb '1', '$' DEFAULT 1 ON ERROR);
+                                          ^
+DETAIL:  Only ERROR, TRUE, FALSE, or UNKNOWN is allowed in ON ERROR for JSON_EXISTS().
+SELECT JSON_VALUE(jsonb '1', '$' EMPTY ON ERROR);
+ERROR:  invalid ON ERROR behavior
+LINE 1: SELECT JSON_VALUE(jsonb '1', '$' EMPTY ON ERROR);
+                                         ^
+DETAIL:  Only ERROR, NULL, or DEFAULT expression is allowed in ON ERROR for JSON_VALUE().
+SELECT JSON_QUERY(jsonb '1', '$' TRUE ON ERROR);
+ERROR:  invalid ON ERROR behavior
+LINE 1: SELECT JSON_QUERY(jsonb '1', '$' TRUE ON ERROR);
+                                         ^
+DETAIL:  Only ERROR, NULL, EMPTY [ ARRAY ], EMPTY OBJECT, or DEFAULT expression is allowed in ON ERROR for JSON_QUERY().
diff --git a/src/test/regress/sql/sqljson_jsontable.sql b/src/test/regress/sql/sqljson_jsontable.sql
index 70bff37a6d..4562f08aed 100644
--- a/src/test/regress/sql/sqljson_jsontable.sql
+++ b/src/test/regress/sql/sqljson_jsontable.sql
@@ -518,3 +518,9 @@ SELECT sub.* FROM s,
 \sv jsonb_table_view7
 DROP VIEW jsonb_table_view7;
 DROP TABLE s;
+
+-- Test ON ERROR / EMPTY value validity for the function and column types; all fail
+SELECT * FROM JSON_TABLE(jsonb '1', '$' COLUMNS (a int) NULL ON ERROR);	-- fail
+SELECT * FROM JSON_TABLE(jsonb '1', '$' COLUMNS (a int true on empty));
+SELECT * FROM JSON_TABLE(jsonb '1', '$' COLUMNS (a int omit quotes true on error));
+SELECT * FROM JSON_TABLE(jsonb '1', '$' COLUMNS (a int exists empty object on error));
diff --git a/src/test/regress/sql/sqljson_queryfuncs.sql b/src/test/regress/sql/sqljson_queryfuncs.sql
index 0d6482f384..de2c043af0 100644
--- a/src/test/regress/sql/sqljson_queryfuncs.sql
+++ b/src/test/regress/sql/sqljson_queryfuncs.sql
@@ -474,3 +474,8 @@ SELECT JSON_VALUE(jsonb '123', '$' RETURNING queryfuncs_char2 DEFAULT 1 ON ERROR
 SELECT JSON_VALUE(jsonb '123', '$' RETURNING queryfuncs_char2_chk ERROR ON ERROR);
 SELECT JSON_VALUE(jsonb '123', '$' RETURNING queryfuncs_char2_chk DEFAULT 1 ON ERROR);
 DROP DOMAIN queryfuncs_char2, queryfuncs_char2_chk;
+
+-- Test ON ERROR / EMPTY value validity for the function; all fail.
+SELECT JSON_EXISTS(jsonb '1', '$' DEFAULT 1 ON ERROR);
+SELECT JSON_VALUE(jsonb '1', '$' EMPTY ON ERROR);
+SELECT JSON_QUERY(jsonb '1', '$' TRUE ON ERROR);
-- 
2.43.0

