On 2022-11-21 Mo 00:26, Tom Lane wrote:
> Corey Huinker <corey.huin...@gmail.com> writes:
>> On Tue, Nov 15, 2022 at 11:36 AM Andrew Dunstan <and...@dunslane.net> wrote:
>>> Nikita,
>>> just checking in, are you making progress on this? I think we really
>>> need to get this reviewed and committed ASAP if we are to have a chance
>>> to get the SQL/JSON stuff reworked to use it in time for release 16.
>> I'm making an attempt at this or something very similar to it. I don't yet
>> have a patch ready.
> Cool.  We can't delay too much longer on this if we want to have
> a credible feature in v16.  Although I want a minimal initial
> patch, there will still be a ton of incremental work to do after
> the core capability is reviewed and committed, so there's no
> time to lose.
>
>                       


OK, here's a set of minimal patches based on Nikita's earlier work and
also some work by my colleague Amul Sul. It tries to follow Tom's
original outline at [1], and do as little else as possible.

Patch 1 introduces the IOCallContext node. The caller should set the
no_error_throw flag and clear the error_found flag, which will be set by
a conforming IO function if an error is found. It also includes a string
field for an error message. I haven't used that, it's more there to
stimulate discussion. Robert suggested to me that maybe it should be an
ErrorData, but I'm not sure how we would use it.

Patch 2 introduces InputFunctionCallContext(), which is similar to
InputFunctionCall() but with an extra context parameter. Note that it's
ok for an input function to return a NULL to this function if an error
is found.

Patches 3 and 4 modify the bool_in() and int4in() functions respectively
to handle an IOContextCall appropriately if provided one in their
fcinfo.context.

Patch 5 introduces COPY FROM ... NULL_ON_ERROR which, in addition to
being useful in itself, provides a test of the previous patches.

I hope we can get a fairly quick agreement so that work can being on
adjusting at least those things needed for the SQL/JSON patches ASAP.
Our goal should be to adjust all the core input functions, but that's
not quite so urgent, and can be completed in parallel with the SQL/JSON
work.


cheers


andrew

[1] https://www.postgresql.org/message-id/13351.1661965592%40sss.pgh.pa.us




--
Andrew Dunstan
EDB: https://www.enterprisedb.com
From e84cf9461fe3898c0326e7c369c65fc4ef1761c6 Mon Sep 17 00:00:00 2001
From: Andrew Dunstan <and...@dunslane.net>
Date: Wed, 30 Nov 2022 10:44:27 -0500
Subject: [PATCH 1/5] Add IOCallContext node

---
 src/include/nodes/primnodes.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 74f228d959..3e2ecd11b3 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1692,4 +1692,21 @@ typedef struct OnConflictExpr
 	List	   *exclRelTlist;	/* tlist of the EXCLUDED pseudo relation */
 } OnConflictExpr;
 
+/*----------
+ * IOCallContext - passed to an input function as FunctionCallInfo.context
+ *
+ * if the field no_error_throw is set then a conforming IO function will
+ * set the field error_found on error rather than calling ereport(ERROR ...)
+ *
+ */
+typedef struct IOCallContext
+{
+	pg_node_attr(nodetag_only) /* don't need copy etc. support functions */
+
+	NodeTag		type;
+	bool        no_error_throw; /* set by caller to suppress errors */
+	bool        error_found;    /* cleared by caller, set by IO function */
+	char       *error_message;
+} IOCallContext;
+
 #endif							/* PRIMNODES_H */
-- 
2.34.1

From b6b277f5dd3837ab9ac2df819f4e67f39d790d1f Mon Sep 17 00:00:00 2001
From: Andrew Dunstan <and...@dunslane.net>
Date: Wed, 30 Nov 2022 10:46:03 -0500
Subject: [PATCH 2/5] Add InputFunctionCallContext()

---
 src/backend/utils/fmgr/fmgr.c | 50 +++++++++++++++++++++++++++++++++++
 src/include/fmgr.h            |  3 +++
 2 files changed, 53 insertions(+)

diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c
index 3c210297aa..bd91ae1dac 100644
--- a/src/backend/utils/fmgr/fmgr.c
+++ b/src/backend/utils/fmgr/fmgr.c
@@ -24,6 +24,7 @@
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
+#include "nodes/primnodes.h"
 #include "pgstat.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
@@ -1548,6 +1549,55 @@ InputFunctionCall(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod)
 	return result;
 }
 
+Datum
+InputFunctionCallContext(FmgrInfo *flinfo, char *str,
+						 Oid typioparam, int32 typmod,
+						 void * ctxt)
+{
+	LOCAL_FCINFO(fcinfo, 3);
+	Datum		result;
+
+	if (str == NULL && flinfo->fn_strict)
+		return (Datum) 0;		/* just return null result */
+
+	InitFunctionCallInfoData(*fcinfo, flinfo, 3, InvalidOid, NULL, NULL);
+
+	fcinfo->args[0].value = CStringGetDatum(str);
+	fcinfo->args[0].isnull = false;
+	fcinfo->args[1].value = ObjectIdGetDatum(typioparam);
+	fcinfo->args[1].isnull = false;
+	fcinfo->args[2].value = Int32GetDatum(typmod);
+	fcinfo->args[2].isnull = false;
+
+	fcinfo->context = ctxt;
+
+	result = FunctionCallInvoke(fcinfo);
+
+	/* NULL is OK if we found an error and expect the caller to handle it */
+	if (ctxt && IsA(ctxt, IOCallContext))
+	{
+		IOCallContext *ioc = (IOCallContext *) ctxt;
+		if (ioc->no_error_throw && ioc->error_found)
+			return result;
+	}
+
+	/* Otherwise should get null result if and only if str is NULL */
+	if (str == NULL)
+	{
+		if (!fcinfo->isnull)
+			elog(ERROR, "input function %u returned non-NULL",
+				 flinfo->fn_oid);
+	}
+	else
+	{
+		if (fcinfo->isnull)
+			elog(ERROR, "input function %u returned NULL",
+				 flinfo->fn_oid);
+	}
+
+	return result;
+}
+
 /*
  * Call a previously-looked-up datatype output function.
  *
diff --git a/src/include/fmgr.h b/src/include/fmgr.h
index 380a82b9de..4d7cb34441 100644
--- a/src/include/fmgr.h
+++ b/src/include/fmgr.h
@@ -702,6 +702,9 @@ extern Datum InputFunctionCall(FmgrInfo *flinfo, char *str,
 							   Oid typioparam, int32 typmod);
 extern Datum OidInputFunctionCall(Oid functionId, char *str,
 								  Oid typioparam, int32 typmod);
+extern Datum InputFunctionCallContext(FmgrInfo *flinfo, char *str,
+									  Oid typioparam, int32 typmod,
+									  void * ctxt);
 extern char *OutputFunctionCall(FmgrInfo *flinfo, Datum val);
 extern char *OidOutputFunctionCall(Oid functionId, Datum val);
 extern Datum ReceiveFunctionCall(FmgrInfo *flinfo, fmStringInfo buf,
-- 
2.34.1

From ff2164dd77fde04b961f4bbbc9a1e4d9830733ff Mon Sep 17 00:00:00 2001
From: Andrew Dunstan <and...@dunslane.net>
Date: Wed, 30 Nov 2022 11:32:30 -0500
Subject: [PATCH 3/5] Add safe error handling to bool_in

---
 src/backend/utils/adt/bool.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/backend/utils/adt/bool.c b/src/backend/utils/adt/bool.c
index cd7335287f..d4ca519c9b 100644
--- a/src/backend/utils/adt/bool.c
+++ b/src/backend/utils/adt/bool.c
@@ -18,6 +18,8 @@
 #include <ctype.h>
 
 #include "libpq/pqformat.h"
+#include "nodes/nodes.h"
+#include "nodes/primnodes.h"
 #include "utils/builtins.h"
 
 /*
@@ -148,6 +150,16 @@ boolin(PG_FUNCTION_ARGS)
 	if (parse_bool_with_len(str, len, &result))
 		PG_RETURN_BOOL(result);
 
+	if (fcinfo->context && IsA(fcinfo->context, IOCallContext))
+	{
+		IOCallContext *ioc = (IOCallContext *) fcinfo->context;
+		if (ioc->no_error_throw)
+		{
+			ioc->error_found = true;
+			PG_RETURN_BOOL(false);
+		}
+	}
+
 	ereport(ERROR,
 			(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 			 errmsg("invalid input syntax for type %s: \"%s\"",
-- 
2.34.1

From 77abcdc861c24e1c41e8ef82ea89fa55d0149934 Mon Sep 17 00:00:00 2001
From: Andrew Dunstan <and...@dunslane.net>
Date: Wed, 30 Nov 2022 14:19:08 -0500
Subject: [PATCH 4/5] Add error safety to int4in

---
 src/backend/utils/adt/int.c      |  2 +-
 src/backend/utils/adt/numutils.c | 28 +++++++++++++++++++++++++++-
 src/include/utils/builtins.h     |  1 +
 3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c
index 42ddae99ef..719a17498d 100644
--- a/src/backend/utils/adt/int.c
+++ b/src/backend/utils/adt/int.c
@@ -291,7 +291,7 @@ int4in(PG_FUNCTION_ARGS)
 {
 	char	   *num = PG_GETARG_CSTRING(0);
 
-	PG_RETURN_INT32(pg_strtoint32(num));
+	PG_RETURN_INT32(pg_strtoint32_context(num, fcinfo->context));
 }
 
 /*
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index 834ec0b588..f6d9a75cd8 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -19,6 +19,7 @@
 #include <ctype.h>
 
 #include "common/int.h"
+#include "nodes/primnodes.h"
 #include "utils/builtins.h"
 #include "port/pg_bitutils.h"
 
@@ -171,8 +172,13 @@ invalid_syntax:
  * representation of the most negative number, which can't be represented as a
  * positive number.
  */
+int32 pg_strtoint32(const char *s)
+{
+	return pg_strtoint32_context(s, NULL);
+}
+
 int32
-pg_strtoint32(const char *s)
+pg_strtoint32_context(const char *s, void * context)
 {
 	const char *ptr = s;
 	int32		tmp = 0;
@@ -223,12 +229,32 @@ pg_strtoint32(const char *s)
 	return tmp;
 
 out_of_range:
+	if (context && IsA(context, IOCallContext))
+	{
+		IOCallContext *ioc = (IOCallContext *) context;
+		if (ioc->no_error_throw)
+		{
+			ioc->error_found = true;
+			return 0;
+		}
+	}
+
 	ereport(ERROR,
 			(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
 			 errmsg("value \"%s\" is out of range for type %s",
 					s, "integer")));
 
 invalid_syntax:
+	if (context && IsA(context, IOCallContext))
+	{
+		IOCallContext *ioc = (IOCallContext *) context;
+		if (ioc->no_error_throw)
+		{
+			ioc->error_found = true;
+			return 0;
+		}
+	}
+
 	ereport(ERROR,
 			(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 			 errmsg("invalid input syntax for type %s: \"%s\"",
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 81631f1645..eb24716a77 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -45,6 +45,7 @@ extern int	namestrcmp(Name name, const char *str);
 /* numutils.c */
 extern int16 pg_strtoint16(const char *s);
 extern int32 pg_strtoint32(const char *s);
+extern int32 pg_strtoint32_context(const char *s, void *);
 extern int64 pg_strtoint64(const char *s);
 extern int	pg_itoa(int16 i, char *a);
 extern int	pg_ultoa_n(uint32 value, char *a);
-- 
2.34.1

From a8b0161d6ddbdf7fc636ac4c6fc189e38b3ec08d Mon Sep 17 00:00:00 2001
From: Andrew Dunstan <and...@dunslane.net>
Date: Wed, 30 Nov 2022 13:44:55 -0500
Subject: [PATCH 5/5] Add COPY FROM ... NULL ON ERROR ...

---
 doc/src/sgml/ref/copy.sgml               | 13 ++++++++++
 src/backend/commands/copy.c              | 20 +++++++++++++++
 src/backend/commands/copyfrom.c          | 28 +++++++++++++++++++++
 src/backend/commands/copyfromparse.c     | 32 +++++++++++++++++++-----
 src/include/commands/copy.h              |  1 +
 src/include/commands/copyfrom_internal.h |  3 +++
 src/test/regress/expected/copy.out       | 13 ++++++++++
 src/test/regress/sql/copy.sql            | 12 +++++++++
 8 files changed, 116 insertions(+), 6 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index c25b52d0cb..f660ec6599 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -42,6 +42,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
     FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
     FORCE_NOT_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] )
     FORCE_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] )
+    NULL_ON_ERROR ( <replaceable class="parameter">column_name</replaceable> [, ...] )
     ENCODING '<replaceable class="parameter">encoding_name</replaceable>'
 </synopsis>
  </refsynopsisdiv>
@@ -356,6 +357,18 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><literal>NULL_ON_ERROR</literal></term>
+    <listitem>
+     <para>
+      If an error occurs in datatype's input function of the one of
+      specified columns, set column value to <literal>NULL</literal>.
+      This option is allowed only in <command>COPY FROM</command>, and
+      only when using <literal>CSV</literal> format.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><literal>ENCODING</literal></term>
     <listitem>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index db4c9dbc23..b25d737c49 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -539,6 +539,19 @@ ProcessCopyOptions(ParseState *pstate,
 								defel->defname),
 						 parser_errposition(pstate, defel->location)));
 		}
+		else if (strcmp(defel->defname, "null_on_error") == 0)
+		{
+			if (opts_out->null_on_error)
+				errorConflictingDefElem(defel, pstate);
+			if (defel->arg && IsA(defel->arg, List))
+				opts_out->null_on_error = castNode(List, defel->arg);
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("argument to option \"%s\" must be a list of column names",
+								defel->defname),
+						 parser_errposition(pstate, defel->location)));
+		}
 		else if (strcmp(defel->defname, "encoding") == 0)
 		{
 			if (opts_out->file_encoding >= 0)
@@ -701,6 +714,13 @@ ProcessCopyOptions(ParseState *pstate,
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("CSV quote character must not appear in the NULL specification")));
+
+	/* Check null_on_error */
+	if (opts_out->null_on_error != NIL && !is_from)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY null on error only available using COPY FROM")));
+
 }
 
 /*
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index a079c70152..f2a0bd599a 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1349,6 +1349,7 @@ BeginCopyFrom(ParseState *pstate,
 	Oid			in_func_oid;
 	int		   *defmap;
 	ExprState **defexprs;
+	bool	   *null_on_error_flags;
 	MemoryContext oldcontext;
 	bool		volatile_defexprs;
 	const int	progress_cols[] = {
@@ -1460,6 +1461,33 @@ BeginCopyFrom(ParseState *pstate,
 		}
 	}
 
+	/* Convert NULL_ON_ERROR name list to per-column flags, check validity */
+	null_on_error_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
+	cstate->null_on_error = null_on_error_flags;
+	if (cstate->opts.null_on_error)
+	{
+		List	   *attnums;
+		ListCell   *cur;
+
+
+		attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.null_on_error);
+
+		foreach(cur, attnums)
+		{
+			int			attnum = lfirst_int(cur);
+			Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
+
+			if (!list_member_int(cstate->attnumlist, attnum))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+						 errmsg("NULL_ON_ERROR column \"%s\" not referenced by COPY",
+								NameStr(attr->attname))));
+			null_on_error_flags[attnum - 1] = true;
+		}
+		cstate->io_context = makeNode(IOCallContext);
+		cstate->io_context->no_error_throw = true;
+	}
+
 	/* Use client encoding when ENCODING option is not specified. */
 	if (cstate->opts.file_encoding < 0)
 		cstate->file_encoding = pg_get_client_encoding();
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 097414ef12..9c82ae1d09 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -897,6 +897,7 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 			int			attnum = lfirst_int(cur);
 			int			m = attnum - 1;
 			Form_pg_attribute att = TupleDescAttr(tupDesc, m);
+			bool null_on_error = cstate->null_on_error[m];
 
 			if (fieldno >= fldct)
 				ereport(ERROR,
@@ -938,12 +939,31 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 
 			cstate->cur_attname = NameStr(att->attname);
 			cstate->cur_attval = string;
-			values[m] = InputFunctionCall(&in_functions[m],
-										  string,
-										  typioparams[m],
-										  att->atttypmod);
-			if (string != NULL)
-				nulls[m] = false;
+
+			if (null_on_error)
+			{
+				IOCallContext *ioc = cstate->io_context;
+				ioc->error_found = false;
+				values[m] = InputFunctionCallContext(&in_functions[m],
+													 string,
+													 typioparams[m],
+													 att->atttypmod,
+													 ioc);
+				if (ioc->error_found)
+					nulls[m] = true;
+				else if (string != NULL)
+					nulls[m] = false;
+			}
+			else
+			{
+				values[m] = InputFunctionCall(&in_functions[m],
+											  string,
+											  typioparams[m],
+											  att->atttypmod);
+				if (string != NULL)
+					nulls[m] = false;
+			}
+
 			cstate->cur_attname = NULL;
 			cstate->cur_attval = NULL;
 		}
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index b77b935005..de2198dc09 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -59,6 +59,7 @@ typedef struct CopyFormatOptions
 	bool	   *force_null_flags;	/* per-column CSV FN flags */
 	bool		convert_selectively;	/* do selective binary conversion? */
 	List	   *convert_select; /* list of column names (can be NIL) */
+	List	   *null_on_error;	/* list of column names */
 } CopyFormatOptions;
 
 /* These are private in commands/copy[from|to].c */
diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h
index 8d9cc5accd..8ad532997d 100644
--- a/src/include/commands/copyfrom_internal.h
+++ b/src/include/commands/copyfrom_internal.h
@@ -16,6 +16,7 @@
 
 #include "commands/copy.h"
 #include "commands/trigger.h"
+#include "nodes/primnodes.h"
 
 /*
  * Represents the different source cases we need to worry about at
@@ -74,6 +75,7 @@ typedef struct CopyFromStateData
 	char	   *filename;		/* filename, or NULL for STDIN */
 	bool		is_program;		/* is 'filename' a program to popen? */
 	copy_data_source_cb data_source_cb; /* function for reading data */
+	bool       *null_on_error;  /* which attnums to set null on input error */
 
 	CopyFormatOptions opts;
 	bool	   *convert_select_flags;	/* per-column CSV/TEXT CS flags */
@@ -93,6 +95,7 @@ typedef struct CopyFromStateData
 
 	AttrNumber	num_defaults;
 	FmgrInfo   *in_functions;	/* array of input functions for each attrs */
+	IOCallContext *io_context;  /* used for null_on_error input*/
 	Oid		   *typioparams;	/* array of element types for in_functions */
 	int		   *defmap;			/* array of default att numbers */
 	ExprState **defexprs;		/* array of default att expressions */
diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out
index 3fad1c52d1..96e51101ce 100644
--- a/src/test/regress/expected/copy.out
+++ b/src/test/regress/expected/copy.out
@@ -240,3 +240,16 @@ SELECT * FROM header_copytest ORDER BY a;
 (5 rows)
 
 drop table header_copytest;
+create table null_on_error_copytest(i int, b bool, t tsvector);
+copy null_on_error_copytest from stdin with (null_on_error(i, b, t));
+copy null_on_error_copytest from stdin with (null_on_error(i, b, t));
+ERROR:  syntax error in tsvector: "b:c"
+CONTEXT:  COPY null_on_error_copytest, line 2, column t: "b:c"
+select * from null_on_error_copytest;
+ i | b |  t  
+---+---+-----
+ 1 |   | 'a'
+   | t | 'b'
+(2 rows)
+
+drop table null_on_error_copytest;
diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql
index 285022e07c..7a895c2ba1 100644
--- a/src/test/regress/sql/copy.sql
+++ b/src/test/regress/sql/copy.sql
@@ -268,3 +268,15 @@ a	c	b
 
 SELECT * FROM header_copytest ORDER BY a;
 drop table header_copytest;
+
+create table null_on_error_copytest(i int, b bool, t tsvector);
+copy null_on_error_copytest from stdin with (null_on_error(i, b, t));
+1	a 	a
+err	1 	b
+\.
+copy null_on_error_copytest from stdin with (null_on_error(i, b, t));
+2	a 	a
+3	1 	b:c
+\.
+select * from null_on_error_copytest;
+drop table null_on_error_copytest;
-- 
2.34.1

Reply via email to