On 31.08.2022 20:14, Tom Lane wrote:
Robert Haas<robertmh...@gmail.com> writes:
On Wed, Aug 31, 2022 at 1:06 PM Tom Lane<t...@sss.pgh.pa.us> wrote:
The currently proposed patchset hacks up a relatively small number
of core datatypes to be able to do that. But it's just a hack
and there's no prospect of extension types being able to join
in the fun. I think where we need to start, for v16, is making
an API design that will let any datatype have this functionality.
(I don't say that we'd convert every datatype to do so right away;
in the long run we should, but I'm content to start with just the
same core types touched here.) Beside the JSON stuff, there is
another even more pressing application for such behavior, namely
the often-requested COPY functionality to be able to shunt bad data
off somewhere without losing the entire transfer. In the COPY case
I think we'd want to be able to capture the error message that
would have been issued, which means the current patches are not
at all appropriate as a basis for that API design: they're just
returning a bool without any details.
I would be in favor of making more of an effort than just a few token
data types. The initial patch could just touch a few, but once the
infrastructure is in place we should really make a sweep through the
tree and tidy up.
Sure, but my point is that we can do that in a time-extended fashion
rather than having a flag day where everything must be updated.
The initial patch just needs to update a few types as proof of concept.
And here is a quick POC patch with an example for COPY and float4:
=# CREATE TABLE test (i int, f float4);
CREATE TABLE
=# COPY test (f) FROM stdin WITH (null_on_error (f));
1
err
2
\.
COPY 3
=# SELECT f FROM test;
f
---
1
2
(3 rows)
=# COPY test (i) FROM stdin WITH (null_on_error (i));
ERROR: input function for datatype "integer" does not support error handling
PG_RETURN_ERROR() is a reincarnation of ereport_safe() macro for returning
ErrorData, which was present in older versions (~v18) of SQL/JSON patches.
Later it was replaced with `bool *have_error` and less magical
`if (have_error) ... else ereport(...)`.
Obviously, this needs a separate thread.
--
Nikita Glukhov
Postgres Professional:http://www.postgrespro.com
The Russian Postgres Company
From 537e65e1ebcccdf4a760d3aeeeea743c88611e7c Mon Sep 17 00:00:00 2001
From: Nikita Glukhov <n.glu...@postgrespro.ru>
Date: Wed, 31 Aug 2022 22:24:33 +0300
Subject: [PATCH 1/5] Introduce PG_RETURN_ERROR and FuncCallError node
---
src/include/nodes/execnodes.h | 7 +++++++
src/include/utils/elog.h | 25 +++++++++++++++++++++++++
2 files changed, 32 insertions(+)
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 01b1727fc09..b401d354656 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -2729,4 +2729,11 @@ typedef struct LimitState
TupleTableSlot *last_slot; /* slot for evaluation of ties */
} LimitState;
+typedef struct FuncCallError
+{
+ NodeTag type;
+ ErrorData *error; /* place where function should put
+ * error data instead of throwing it */
+} FuncCallError;
+
#endif /* EXECNODES_H */
diff --git a/src/include/utils/elog.h b/src/include/utils/elog.h
index 56398176901..5fd3deed61f 100644
--- a/src/include/utils/elog.h
+++ b/src/include/utils/elog.h
@@ -159,6 +159,31 @@
#define ereport(elevel, ...) \
ereport_domain(elevel, TEXTDOMAIN, __VA_ARGS__)
+/*
+ * PG_RETURN_ERROR() -- special macro for copying error info into the
+ * FuncCallError node, if it was as FunctionCallInfo.context,
+ * instead of throwing it with ordinary ereport().
+ *
+ * This is intended for handling of errors of categories like
+ * ERRCODE_DATA_EXCEPTION without PG_TRY/PG_CATCH and substransactions,
+ * but not for errors like ERRCODE_OUT_OF_MEMORY.
+ */
+#define PG_RETURN_ERROR(...) \
+ do { \
+ if (fcinfo->context && IsA(fcinfo->context, FuncCallError)) { \
+ pg_prevent_errno_in_scope(); \
+ \
+ errstart(ERROR, TEXTDOMAIN); \
+ (__VA_ARGS__); \
+ castNode(FuncCallError, fcinfo->context)->error = CopyErrorData(); \
+ FlushErrorState(); \
+ \
+ PG_RETURN_NULL(); \
+ } else { \
+ ereport(ERROR, (__VA_ARGS__)); \
+ } \
+ } while (0)
+
#define TEXTDOMAIN NULL
extern bool message_level_is_interesting(int elevel);
--
2.25.1
From 7831b2c571784a614dd4ee4b48730187b0c0a8d1 Mon Sep 17 00:00:00 2001
From: Nikita Glukhov <n.glu...@postgrespro.ru>
Date: Wed, 31 Aug 2022 23:02:06 +0300
Subject: [PATCH 2/5] Introduce pg_proc.proissafe and func_is_safe()
---
src/backend/utils/cache/lsyscache.c | 19 +++++++++++++++++++
src/include/catalog/pg_proc.h | 3 +++
src/include/utils/lsyscache.h | 1 +
3 files changed, 23 insertions(+)
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index a16a63f4957..ff3005077b2 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -1830,6 +1830,25 @@ get_func_leakproof(Oid funcid)
return result;
}
+/*
+ * func_is_safe
+ * Given procedure id, return the function's proissafe flag.
+ */
+bool
+func_is_safe(Oid funcid)
+{
+ HeapTuple tp;
+ bool result;
+
+ tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
+ if (!HeapTupleIsValid(tp))
+ elog(ERROR, "cache lookup failed for function %u", funcid);
+
+ result = ((Form_pg_proc) GETSTRUCT(tp))->proissafe;
+ ReleaseSysCache(tp);
+ return result;
+}
+
/*
* get_func_support
*
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 76310d4cc9a..2ae8942b014 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -70,6 +70,9 @@ CATALOG(pg_proc,1255,ProcedureRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(81,Proce
/* returns a set? */
bool proretset BKI_DEFAULT(f);
+ /* can return error info through FuncCallError instead of throwing it? */
+ bool proissafe BKI_DEFAULT(f);
+
/* see PROVOLATILE_ categories below */
char provolatile BKI_DEFAULT(i);
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index 50f02883052..c66365fdf2c 100644
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -130,6 +130,7 @@ extern char func_volatile(Oid funcid);
extern char func_parallel(Oid funcid);
extern char get_func_prokind(Oid funcid);
extern bool get_func_leakproof(Oid funcid);
+extern bool func_is_safe(Oid funcid);
extern RegProcedure get_func_support(Oid funcid);
extern Oid get_relname_relid(const char *relname, Oid relnamespace);
extern char *get_rel_name(Oid relid);
--
2.25.1
From 4eba0bd771b85abef94125f374261d7734178657 Mon Sep 17 00:00:00 2001
From: Nikita Glukhov <n.glu...@postgrespro.ru>
Date: Wed, 31 Aug 2022 22:28:09 +0300
Subject: [PATCH 3/5] Use PG_RETURN_ERROR in float4in()
---
src/backend/utils/adt/float.c | 9 +++++----
src/include/catalog/pg_proc.dat | 2 +-
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c
index fc8f39a7a98..e3bde72b328 100644
--- a/src/backend/utils/adt/float.c
+++ b/src/backend/utils/adt/float.c
@@ -25,6 +25,7 @@
#include "common/shortest_dec.h"
#include "libpq/pqformat.h"
#include "miscadmin.h"
+#include "nodes/execnodes.h"
#include "utils/array.h"
#include "utils/float.h"
#include "utils/fmgrprotos.h"
@@ -183,7 +184,7 @@ float4in(PG_FUNCTION_ARGS)
* strtod() on different platforms.
*/
if (*num == '\0')
- ereport(ERROR,
+ PG_RETURN_ERROR(
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s: \"%s\"",
"real", orig_num)));
@@ -257,13 +258,13 @@ float4in(PG_FUNCTION_ARGS)
(val >= HUGE_VALF || val <= -HUGE_VALF)
#endif
)
- ereport(ERROR,
+ PG_RETURN_ERROR(
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("\"%s\" is out of range for type real",
orig_num)));
}
else
- ereport(ERROR,
+ PG_RETURN_ERROR(
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s: \"%s\"",
"real", orig_num)));
@@ -275,7 +276,7 @@ float4in(PG_FUNCTION_ARGS)
/* if there is any junk left at the end of the string, bail out */
if (*endptr != '\0')
- ereport(ERROR,
+ PG_RETURN_ERROR(
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s: \"%s\"",
"real", orig_num)));
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index be47583122b..b25bfa55ab3 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -585,7 +585,7 @@
{ oid => '200', descr => 'I/O',
proname => 'float4in', prorettype => 'float4', proargtypes => 'cstring',
- prosrc => 'float4in' },
+ proissafe => 't', prosrc => 'float4in' },
{ oid => '201', descr => 'I/O',
proname => 'float4out', prorettype => 'cstring', proargtypes => 'float4',
prosrc => 'float4out' },
--
2.25.1
From b8001f469e83df048e972dcd17120929996ba5fa Mon Sep 17 00:00:00 2001
From: Nikita Glukhov <n.glu...@postgrespro.ru>
Date: Wed, 31 Aug 2022 22:28:44 +0300
Subject: [PATCH 4/5] Introduce InputFunctionCallOptError()
---
src/backend/utils/fmgr/fmgr.c | 38 +++++++++++++++++++++++++++++++++--
src/include/fmgr.h | 3 +++
2 files changed, 39 insertions(+), 2 deletions(-)
diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c
index a9dd068095b..fbea0df61df 100644
--- a/src/backend/utils/fmgr/fmgr.c
+++ b/src/backend/utils/fmgr/fmgr.c
@@ -1511,11 +1511,14 @@ OidFunctionCall9Coll(Oid functionId, Oid collation, Datum arg1, Datum arg2,
* function anyway if it's not strict. So this is almost but not quite
* the same as FunctionCall3.
*/
-Datum
-InputFunctionCall(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod)
+static inline Datum
+InputFunctionCallInternal(FmgrInfo *flinfo, char *str,
+ Oid typioparam, int32 typmod,
+ ErrorData **edata)
{
LOCAL_FCINFO(fcinfo, 3);
Datum result;
+ FuncCallError *fcerror;
if (str == NULL && flinfo->fn_strict)
return (Datum) 0; /* just return null result */
@@ -1529,8 +1532,22 @@ InputFunctionCall(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod)
fcinfo->args[2].value = Int32GetDatum(typmod);
fcinfo->args[2].isnull = false;
+ if (edata)
+ {
+ fcerror = makeNode(FuncCallError);
+ fcinfo->context = (Node *) fcerror;
+ }
+
result = FunctionCallInvoke(fcinfo);
+ if (edata)
+ {
+ *edata = fcerror->error;
+ pfree(fcerror);
+ if (*edata)
+ return (Datum) 0;
+ }
+
/* Should get null result if and only if str is NULL */
if (str == NULL)
{
@@ -1548,6 +1565,23 @@ InputFunctionCall(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod)
return result;
}
+Datum
+InputFunctionCall(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod)
+{
+ return InputFunctionCallInternal(flinfo, str, typioparam, typmod, NULL);
+}
+
+Datum
+InputFunctionCallOptError(FmgrInfo *flinfo, char *str, Oid typioparam,
+ int32 typmod, ErrorData **edata)
+{
+ if (edata)
+ return InputFunctionCallInternal(flinfo, str, typioparam, typmod, edata);
+ else
+ return InputFunctionCall(flinfo, str, typioparam, typmod);
+}
+
+
/*
* Call a previously-looked-up datatype output function.
*
diff --git a/src/include/fmgr.h b/src/include/fmgr.h
index 380a82b9de3..8f18f2408e1 100644
--- a/src/include/fmgr.h
+++ b/src/include/fmgr.h
@@ -711,6 +711,9 @@ extern Datum OidReceiveFunctionCall(Oid functionId, fmStringInfo buf,
extern bytea *SendFunctionCall(FmgrInfo *flinfo, Datum val);
extern bytea *OidSendFunctionCall(Oid functionId, Datum val);
+extern Datum InputFunctionCallOptError(FmgrInfo *flinfo, char *str,
+ Oid typioparam, int32 typmod,
+ ErrorData **edata);
/*
* Routines in fmgr.c
--
2.25.1
From 1fc1b4231b079078c303be75864c4ef71d0742a1 Mon Sep 17 00:00:00 2001
From: Nikita Glukhov <n.glu...@postgrespro.ru>
Date: Wed, 31 Aug 2022 22:29:47 +0300
Subject: [PATCH 5/5] Introduce NULL_ON_ERROR option for COPY FROM
---
src/backend/commands/copy.c | 20 +++++++++++++++++
src/backend/commands/copyfrom.c | 33 ++++++++++++++++++++++++++++
src/backend/commands/copyfromparse.c | 21 +++++++++++++-----
src/include/commands/copy.h | 2 ++
src/test/regress/expected/copy.out | 15 +++++++++++++
src/test/regress/sql/copy.sql | 13 +++++++++++
6 files changed, 99 insertions(+), 5 deletions(-)
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 49924e476af..ce7e875f655 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -551,6 +551,19 @@ ProcessCopyOptions(ParseState *pstate,
defel->defname),
parser_errposition(pstate, defel->location)));
}
+ else if (strcmp(defel->defname, "null_on_error") == 0)
+ {
+ if (opts_out->null_on_error)
+ errorConflictingDefElem(defel, pstate);
+ if (defel->arg && IsA(defel->arg, List))
+ opts_out->null_on_error = castNode(List, defel->arg);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("argument to option \"%s\" must be a list of column names",
+ defel->defname),
+ parser_errposition(pstate, defel->location)));
+ }
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
@@ -701,6 +714,13 @@ ProcessCopyOptions(ParseState *pstate,
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("CSV quote character must not appear in the NULL specification")));
+
+ /* Check null_on_error */
+ if (opts_out->null_on_error != NIL && !is_from)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("COPY null on error only available using COPY FROM")));
+
}
/*
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index e8bb168aea8..99b39d8d7c2 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -47,6 +47,7 @@
#include "rewrite/rewriteHandler.h"
#include "storage/fd.h"
#include "tcop/tcopprot.h"
+#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/portal.h"
@@ -1316,6 +1317,29 @@ BeginCopyFrom(ParseState *pstate,
}
}
+ /* Convert NULL_ON_ERROR name list to per-column flags, check validity */
+ cstate->opts.null_on_error_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
+ if (cstate->opts.null_on_error)
+ {
+ List *attnums;
+ ListCell *cur;
+
+ attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.null_on_error);
+
+ foreach(cur, attnums)
+ {
+ int attnum = lfirst_int(cur);
+ Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
+
+ if (!list_member_int(cstate->attnumlist, attnum))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("NULL_ON_ERROR column \"%s\" not referenced by COPY",
+ NameStr(attr->attname))));
+ cstate->opts.null_on_error_flags[attnum - 1] = true;
+ }
+ }
+
/* Use client encoding when ENCODING option is not specified. */
if (cstate->opts.file_encoding < 0)
cstate->file_encoding = pg_get_client_encoding();
@@ -1417,6 +1441,15 @@ BeginCopyFrom(ParseState *pstate,
&in_func_oid, &typioparams[attnum - 1]);
fmgr_info(in_func_oid, &in_functions[attnum - 1]);
+ /* check whether input function supports returning errors */
+ if (cstate->opts.null_on_error_flags[attnum - 1] &&
+ !func_is_safe(in_func_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("input function for datatype \"%s\" does not support error handling",
+ format_type_be(att->atttypid))));
+
+
/* Get default info if needed */
if (!list_member_int(cstate->attnumlist, attnum) && !att->attgenerated)
{
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 097414ef12d..04a36de444c 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -897,6 +897,8 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
int attnum = lfirst_int(cur);
int m = attnum - 1;
Form_pg_attribute att = TupleDescAttr(tupDesc, m);
+ ErrorData *edata = NULL;
+ ErrorData **p_edata;
if (fieldno >= fldct)
ereport(ERROR,
@@ -938,11 +940,20 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
cstate->cur_attname = NameStr(att->attname);
cstate->cur_attval = string;
- values[m] = InputFunctionCall(&in_functions[m],
- string,
- typioparams[m],
- att->atttypmod);
- if (string != NULL)
+
+ p_edata = cstate->opts.null_on_error_flags[m] ? &edata : NULL;
+
+ values[m] = InputFunctionCallOptError(&in_functions[m],
+ string,
+ typioparams[m],
+ att->atttypmod,
+ p_edata);
+ if (edata)
+ {
+ FreeErrorData(edata);
+ nulls[m] = true;
+ }
+ else if (string != NULL)
nulls[m] = false;
cstate->cur_attname = NULL;
cstate->cur_attval = NULL;
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index cb0096aeb67..359fb4cc9e0 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -59,6 +59,8 @@ typedef struct CopyFormatOptions
bool *force_null_flags; /* per-column CSV FN flags */
bool convert_selectively; /* do selective binary conversion? */
List *convert_select; /* list of column names (can be NIL) */
+ List *null_on_error; /* list of column names */
+ bool *null_on_error_flags; /* per-column NOE flags */
} CopyFormatOptions;
/* These are private in commands/copy[from|to].c */
diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out
index 3fad1c52d1f..82839d2f67a 100644
--- a/src/test/regress/expected/copy.out
+++ b/src/test/regress/expected/copy.out
@@ -240,3 +240,18 @@ SELECT * FROM header_copytest ORDER BY a;
(5 rows)
drop table header_copytest;
+create table null_on_error_copytest(i int, f float4);
+copy null_on_error_copytest from stdin with (null_on_error (i, f));
+ERROR: input function for datatype "integer" does not support error handling
+copy null_on_error_copytest from stdin with (null_on_error (f));
+copy null_on_error_copytest from stdin with (null_on_error (f));
+ERROR: invalid input syntax for type integer: "err"
+CONTEXT: COPY null_on_error_copytest, line 2, column i: "err"
+select * from null_on_error_copytest;
+ i | f
+---+-----
+ 1 | 2.3
+ 2 |
+(2 rows)
+
+drop table null_on_error_copytest;
diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql
index 285022e07c6..914e29d5ba1 100644
--- a/src/test/regress/sql/copy.sql
+++ b/src/test/regress/sql/copy.sql
@@ -268,3 +268,16 @@ a c b
SELECT * FROM header_copytest ORDER BY a;
drop table header_copytest;
+
+create table null_on_error_copytest(i int, f float4);
+copy null_on_error_copytest from stdin with (null_on_error (i, f));
+copy null_on_error_copytest from stdin with (null_on_error (f));
+1 2.3
+2 err
+\.
+copy null_on_error_copytest from stdin with (null_on_error (f));
+1 2.3
+err 4.5
+\.
+select * from null_on_error_copytest;
+drop table null_on_error_copytest;
--
2.25.1