On 2022-11-21 Mo 00:26, Tom Lane wrote: > Corey Huinker <corey.huin...@gmail.com> writes: >> On Tue, Nov 15, 2022 at 11:36 AM Andrew Dunstan <and...@dunslane.net> wrote: >>> Nikita, >>> just checking in, are you making progress on this? I think we really >>> need to get this reviewed and committed ASAP if we are to have a chance >>> to get the SQL/JSON stuff reworked to use it in time for release 16. >> I'm making an attempt at this or something very similar to it. I don't yet >> have a patch ready. > Cool. We can't delay too much longer on this if we want to have > a credible feature in v16. Although I want a minimal initial > patch, there will still be a ton of incremental work to do after > the core capability is reviewed and committed, so there's no > time to lose. > >
OK, here's a set of minimal patches based on Nikita's earlier work and also some work by my colleague Amul Sul. It tries to follow Tom's original outline at [1], and do as little else as possible. Patch 1 introduces the IOCallContext node. The caller should set the no_error_throw flag and clear the error_found flag, which will be set by a conforming IO function if an error is found. It also includes a string field for an error message. I haven't used that, it's more there to stimulate discussion. Robert suggested to me that maybe it should be an ErrorData, but I'm not sure how we would use it. Patch 2 introduces InputFunctionCallContext(), which is similar to InputFunctionCall() but with an extra context parameter. Note that it's ok for an input function to return a NULL to this function if an error is found. Patches 3 and 4 modify the bool_in() and int4in() functions respectively to handle an IOContextCall appropriately if provided one in their fcinfo.context. Patch 5 introduces COPY FROM ... NULL_ON_ERROR which, in addition to being useful in itself, provides a test of the previous patches. I hope we can get a fairly quick agreement so that work can being on adjusting at least those things needed for the SQL/JSON patches ASAP. Our goal should be to adjust all the core input functions, but that's not quite so urgent, and can be completed in parallel with the SQL/JSON work. cheers andrew [1] https://www.postgresql.org/message-id/13351.1661965592%40sss.pgh.pa.us -- Andrew Dunstan EDB: https://www.enterprisedb.com
From e84cf9461fe3898c0326e7c369c65fc4ef1761c6 Mon Sep 17 00:00:00 2001 From: Andrew Dunstan <and...@dunslane.net> Date: Wed, 30 Nov 2022 10:44:27 -0500 Subject: [PATCH 1/5] Add IOCallContext node --- src/include/nodes/primnodes.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 74f228d959..3e2ecd11b3 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -1692,4 +1692,21 @@ typedef struct OnConflictExpr List *exclRelTlist; /* tlist of the EXCLUDED pseudo relation */ } OnConflictExpr; +/*---------- + * IOCallContext - passed to an input function as FunctionCallInfo.context + * + * if the field no_error_throw is set then a conforming IO function will + * set the field error_found on error rather than calling ereport(ERROR ...) + * + */ +typedef struct IOCallContext +{ + pg_node_attr(nodetag_only) /* don't need copy etc. support functions */ + + NodeTag type; + bool no_error_throw; /* set by caller to suppress errors */ + bool error_found; /* cleared by caller, set by IO function */ + char *error_message; +} IOCallContext; + #endif /* PRIMNODES_H */ -- 2.34.1
From b6b277f5dd3837ab9ac2df819f4e67f39d790d1f Mon Sep 17 00:00:00 2001 From: Andrew Dunstan <and...@dunslane.net> Date: Wed, 30 Nov 2022 10:46:03 -0500 Subject: [PATCH 2/5] Add InputFunctionCallContext() --- src/backend/utils/fmgr/fmgr.c | 50 +++++++++++++++++++++++++++++++++++ src/include/fmgr.h | 3 +++ 2 files changed, 53 insertions(+) diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c index 3c210297aa..bd91ae1dac 100644 --- a/src/backend/utils/fmgr/fmgr.c +++ b/src/backend/utils/fmgr/fmgr.c @@ -24,6 +24,7 @@ #include "miscadmin.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "nodes/primnodes.h" #include "pgstat.h" #include "utils/acl.h" #include "utils/builtins.h" @@ -1548,6 +1549,55 @@ InputFunctionCall(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod) return result; } +Datum +InputFunctionCallContext(FmgrInfo *flinfo, char *str, + Oid typioparam, int32 typmod, + void * ctxt) +{ + LOCAL_FCINFO(fcinfo, 3); + Datum result; + + if (str == NULL && flinfo->fn_strict) + return (Datum) 0; /* just return null result */ + + InitFunctionCallInfoData(*fcinfo, flinfo, 3, InvalidOid, NULL, NULL); + + fcinfo->args[0].value = CStringGetDatum(str); + fcinfo->args[0].isnull = false; + fcinfo->args[1].value = ObjectIdGetDatum(typioparam); + fcinfo->args[1].isnull = false; + fcinfo->args[2].value = Int32GetDatum(typmod); + fcinfo->args[2].isnull = false; + + fcinfo->context = ctxt; + + result = FunctionCallInvoke(fcinfo); + + /* NULL is OK if we found an error and expect the caller to handle it */ + if (ctxt && IsA(ctxt, IOCallContext)) + { + IOCallContext *ioc = (IOCallContext *) ctxt; + if (ioc->no_error_throw && ioc->error_found) + return result; + } + + /* Otherwise should get null result if and only if str is NULL */ + if (str == NULL) + { + if (!fcinfo->isnull) + elog(ERROR, "input function %u returned non-NULL", + flinfo->fn_oid); + } + else + { + if (fcinfo->isnull) + elog(ERROR, "input function %u returned NULL", + flinfo->fn_oid); + } + + return result; +} + /* * Call a previously-looked-up datatype output function. * diff --git a/src/include/fmgr.h b/src/include/fmgr.h index 380a82b9de..4d7cb34441 100644 --- a/src/include/fmgr.h +++ b/src/include/fmgr.h @@ -702,6 +702,9 @@ extern Datum InputFunctionCall(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod); extern Datum OidInputFunctionCall(Oid functionId, char *str, Oid typioparam, int32 typmod); +extern Datum InputFunctionCallContext(FmgrInfo *flinfo, char *str, + Oid typioparam, int32 typmod, + void * ctxt); extern char *OutputFunctionCall(FmgrInfo *flinfo, Datum val); extern char *OidOutputFunctionCall(Oid functionId, Datum val); extern Datum ReceiveFunctionCall(FmgrInfo *flinfo, fmStringInfo buf, -- 2.34.1
From ff2164dd77fde04b961f4bbbc9a1e4d9830733ff Mon Sep 17 00:00:00 2001 From: Andrew Dunstan <and...@dunslane.net> Date: Wed, 30 Nov 2022 11:32:30 -0500 Subject: [PATCH 3/5] Add safe error handling to bool_in --- src/backend/utils/adt/bool.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/backend/utils/adt/bool.c b/src/backend/utils/adt/bool.c index cd7335287f..d4ca519c9b 100644 --- a/src/backend/utils/adt/bool.c +++ b/src/backend/utils/adt/bool.c @@ -18,6 +18,8 @@ #include <ctype.h> #include "libpq/pqformat.h" +#include "nodes/nodes.h" +#include "nodes/primnodes.h" #include "utils/builtins.h" /* @@ -148,6 +150,16 @@ boolin(PG_FUNCTION_ARGS) if (parse_bool_with_len(str, len, &result)) PG_RETURN_BOOL(result); + if (fcinfo->context && IsA(fcinfo->context, IOCallContext)) + { + IOCallContext *ioc = (IOCallContext *) fcinfo->context; + if (ioc->no_error_throw) + { + ioc->error_found = true; + PG_RETURN_BOOL(false); + } + } + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type %s: \"%s\"", -- 2.34.1
From 77abcdc861c24e1c41e8ef82ea89fa55d0149934 Mon Sep 17 00:00:00 2001 From: Andrew Dunstan <and...@dunslane.net> Date: Wed, 30 Nov 2022 14:19:08 -0500 Subject: [PATCH 4/5] Add error safety to int4in --- src/backend/utils/adt/int.c | 2 +- src/backend/utils/adt/numutils.c | 28 +++++++++++++++++++++++++++- src/include/utils/builtins.h | 1 + 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c index 42ddae99ef..719a17498d 100644 --- a/src/backend/utils/adt/int.c +++ b/src/backend/utils/adt/int.c @@ -291,7 +291,7 @@ int4in(PG_FUNCTION_ARGS) { char *num = PG_GETARG_CSTRING(0); - PG_RETURN_INT32(pg_strtoint32(num)); + PG_RETURN_INT32(pg_strtoint32_context(num, fcinfo->context)); } /* diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c index 834ec0b588..f6d9a75cd8 100644 --- a/src/backend/utils/adt/numutils.c +++ b/src/backend/utils/adt/numutils.c @@ -19,6 +19,7 @@ #include <ctype.h> #include "common/int.h" +#include "nodes/primnodes.h" #include "utils/builtins.h" #include "port/pg_bitutils.h" @@ -171,8 +172,13 @@ invalid_syntax: * representation of the most negative number, which can't be represented as a * positive number. */ +int32 pg_strtoint32(const char *s) +{ + return pg_strtoint32_context(s, NULL); +} + int32 -pg_strtoint32(const char *s) +pg_strtoint32_context(const char *s, void * context) { const char *ptr = s; int32 tmp = 0; @@ -223,12 +229,32 @@ pg_strtoint32(const char *s) return tmp; out_of_range: + if (context && IsA(context, IOCallContext)) + { + IOCallContext *ioc = (IOCallContext *) context; + if (ioc->no_error_throw) + { + ioc->error_found = true; + return 0; + } + } + ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("value \"%s\" is out of range for type %s", s, "integer"))); invalid_syntax: + if (context && IsA(context, IOCallContext)) + { + IOCallContext *ioc = (IOCallContext *) context; + if (ioc->no_error_throw) + { + ioc->error_found = true; + return 0; + } + } + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type %s: \"%s\"", diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 81631f1645..eb24716a77 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -45,6 +45,7 @@ extern int namestrcmp(Name name, const char *str); /* numutils.c */ extern int16 pg_strtoint16(const char *s); extern int32 pg_strtoint32(const char *s); +extern int32 pg_strtoint32_context(const char *s, void *); extern int64 pg_strtoint64(const char *s); extern int pg_itoa(int16 i, char *a); extern int pg_ultoa_n(uint32 value, char *a); -- 2.34.1
From a8b0161d6ddbdf7fc636ac4c6fc189e38b3ec08d Mon Sep 17 00:00:00 2001 From: Andrew Dunstan <and...@dunslane.net> Date: Wed, 30 Nov 2022 13:44:55 -0500 Subject: [PATCH 5/5] Add COPY FROM ... NULL ON ERROR ... --- doc/src/sgml/ref/copy.sgml | 13 ++++++++++ src/backend/commands/copy.c | 20 +++++++++++++++ src/backend/commands/copyfrom.c | 28 +++++++++++++++++++++ src/backend/commands/copyfromparse.c | 32 +++++++++++++++++++----- src/include/commands/copy.h | 1 + src/include/commands/copyfrom_internal.h | 3 +++ src/test/regress/expected/copy.out | 13 ++++++++++ src/test/regress/sql/copy.sql | 12 +++++++++ 8 files changed, 116 insertions(+), 6 deletions(-) diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index c25b52d0cb..f660ec6599 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -42,6 +42,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * } FORCE_NOT_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] ) FORCE_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] ) + NULL_ON_ERROR ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ENCODING '<replaceable class="parameter">encoding_name</replaceable>' </synopsis> </refsynopsisdiv> @@ -356,6 +357,18 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable </listitem> </varlistentry> + <varlistentry> + <term><literal>NULL_ON_ERROR</literal></term> + <listitem> + <para> + If an error occurs in datatype's input function of the one of + specified columns, set column value to <literal>NULL</literal>. + This option is allowed only in <command>COPY FROM</command>, and + only when using <literal>CSV</literal> format. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><literal>ENCODING</literal></term> <listitem> diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index db4c9dbc23..b25d737c49 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -539,6 +539,19 @@ ProcessCopyOptions(ParseState *pstate, defel->defname), parser_errposition(pstate, defel->location))); } + else if (strcmp(defel->defname, "null_on_error") == 0) + { + if (opts_out->null_on_error) + errorConflictingDefElem(defel, pstate); + if (defel->arg && IsA(defel->arg, List)) + opts_out->null_on_error = castNode(List, defel->arg); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("argument to option \"%s\" must be a list of column names", + defel->defname), + parser_errposition(pstate, defel->location))); + } else if (strcmp(defel->defname, "encoding") == 0) { if (opts_out->file_encoding >= 0) @@ -701,6 +714,13 @@ ProcessCopyOptions(ParseState *pstate, ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("CSV quote character must not appear in the NULL specification"))); + + /* Check null_on_error */ + if (opts_out->null_on_error != NIL && !is_from) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY null on error only available using COPY FROM"))); + } /* diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index a079c70152..f2a0bd599a 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -1349,6 +1349,7 @@ BeginCopyFrom(ParseState *pstate, Oid in_func_oid; int *defmap; ExprState **defexprs; + bool *null_on_error_flags; MemoryContext oldcontext; bool volatile_defexprs; const int progress_cols[] = { @@ -1460,6 +1461,33 @@ BeginCopyFrom(ParseState *pstate, } } + /* Convert NULL_ON_ERROR name list to per-column flags, check validity */ + null_on_error_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool)); + cstate->null_on_error = null_on_error_flags; + if (cstate->opts.null_on_error) + { + List *attnums; + ListCell *cur; + + + attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.null_on_error); + + foreach(cur, attnums) + { + int attnum = lfirst_int(cur); + Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1); + + if (!list_member_int(cstate->attnumlist, attnum)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("NULL_ON_ERROR column \"%s\" not referenced by COPY", + NameStr(attr->attname)))); + null_on_error_flags[attnum - 1] = true; + } + cstate->io_context = makeNode(IOCallContext); + cstate->io_context->no_error_throw = true; + } + /* Use client encoding when ENCODING option is not specified. */ if (cstate->opts.file_encoding < 0) cstate->file_encoding = pg_get_client_encoding(); diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index 097414ef12..9c82ae1d09 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -897,6 +897,7 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext, int attnum = lfirst_int(cur); int m = attnum - 1; Form_pg_attribute att = TupleDescAttr(tupDesc, m); + bool null_on_error = cstate->null_on_error[m]; if (fieldno >= fldct) ereport(ERROR, @@ -938,12 +939,31 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext, cstate->cur_attname = NameStr(att->attname); cstate->cur_attval = string; - values[m] = InputFunctionCall(&in_functions[m], - string, - typioparams[m], - att->atttypmod); - if (string != NULL) - nulls[m] = false; + + if (null_on_error) + { + IOCallContext *ioc = cstate->io_context; + ioc->error_found = false; + values[m] = InputFunctionCallContext(&in_functions[m], + string, + typioparams[m], + att->atttypmod, + ioc); + if (ioc->error_found) + nulls[m] = true; + else if (string != NULL) + nulls[m] = false; + } + else + { + values[m] = InputFunctionCall(&in_functions[m], + string, + typioparams[m], + att->atttypmod); + if (string != NULL) + nulls[m] = false; + } + cstate->cur_attname = NULL; cstate->cur_attval = NULL; } diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index b77b935005..de2198dc09 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -59,6 +59,7 @@ typedef struct CopyFormatOptions bool *force_null_flags; /* per-column CSV FN flags */ bool convert_selectively; /* do selective binary conversion? */ List *convert_select; /* list of column names (can be NIL) */ + List *null_on_error; /* list of column names */ } CopyFormatOptions; /* These are private in commands/copy[from|to].c */ diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h index 8d9cc5accd..8ad532997d 100644 --- a/src/include/commands/copyfrom_internal.h +++ b/src/include/commands/copyfrom_internal.h @@ -16,6 +16,7 @@ #include "commands/copy.h" #include "commands/trigger.h" +#include "nodes/primnodes.h" /* * Represents the different source cases we need to worry about at @@ -74,6 +75,7 @@ typedef struct CopyFromStateData char *filename; /* filename, or NULL for STDIN */ bool is_program; /* is 'filename' a program to popen? */ copy_data_source_cb data_source_cb; /* function for reading data */ + bool *null_on_error; /* which attnums to set null on input error */ CopyFormatOptions opts; bool *convert_select_flags; /* per-column CSV/TEXT CS flags */ @@ -93,6 +95,7 @@ typedef struct CopyFromStateData AttrNumber num_defaults; FmgrInfo *in_functions; /* array of input functions for each attrs */ + IOCallContext *io_context; /* used for null_on_error input*/ Oid *typioparams; /* array of element types for in_functions */ int *defmap; /* array of default att numbers */ ExprState **defexprs; /* array of default att expressions */ diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out index 3fad1c52d1..96e51101ce 100644 --- a/src/test/regress/expected/copy.out +++ b/src/test/regress/expected/copy.out @@ -240,3 +240,16 @@ SELECT * FROM header_copytest ORDER BY a; (5 rows) drop table header_copytest; +create table null_on_error_copytest(i int, b bool, t tsvector); +copy null_on_error_copytest from stdin with (null_on_error(i, b, t)); +copy null_on_error_copytest from stdin with (null_on_error(i, b, t)); +ERROR: syntax error in tsvector: "b:c" +CONTEXT: COPY null_on_error_copytest, line 2, column t: "b:c" +select * from null_on_error_copytest; + i | b | t +---+---+----- + 1 | | 'a' + | t | 'b' +(2 rows) + +drop table null_on_error_copytest; diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql index 285022e07c..7a895c2ba1 100644 --- a/src/test/regress/sql/copy.sql +++ b/src/test/regress/sql/copy.sql @@ -268,3 +268,15 @@ a c b SELECT * FROM header_copytest ORDER BY a; drop table header_copytest; + +create table null_on_error_copytest(i int, b bool, t tsvector); +copy null_on_error_copytest from stdin with (null_on_error(i, b, t)); +1 a a +err 1 b +\. +copy null_on_error_copytest from stdin with (null_on_error(i, b, t)); +2 a a +3 1 b:c +\. +select * from null_on_error_copytest; +drop table null_on_error_copytest; -- 2.34.1