On 2023-02-06 15:00, Tom Lane wrote:
Andres Freund <and...@anarazel.de> writes:
On February 5, 2023 9:12:17 PM PST, Tom Lane <t...@sss.pgh.pa.us> wrote:
Damir Belyalov <dam.be...@gmail.com> writes:
InputFunctionCallSafe() is good for detecting errors from input-functions but there are such errors from NextCopyFrom () that can not be detected with InputFunctionCallSafe(), e.g. "wrong number of columns in row''.

If you want to deal with those, then there's more work to be done to make those bits non-error-throwing. But there's a very finite amount of code
involved and no obvious reason why it couldn't be done.

I'm not even sure it makes sense to avoid that kind of error. And
invalid column count or such is something quite different than failing
some data type input routine, or falling a constraint.

I think it could be reasonable to put COPY's overall-line-format
requirements on the same level as datatype input format violations.
I agree that trying to trap every kind of error is a bad idea,
for largely the same reason that the soft-input-errors patches
only trap certain kinds of errors: it's too hard to tell whether
an error is an "internal" error that it's scary to continue past.

Is it a bad idea to limit the scope of allowing errors to 'soft' errors in InputFunctionCallSafe()?

I think it could be still useful for some usecases.

diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql

  +-- tests for IGNORE_DATATYPE_ERRORS option
  +CREATE TABLE check_ign_err (n int, m int[], k int);
  +COPY check_ign_err FROM STDIN WITH IGNORE_DATATYPE_ERRORS;
  +1  {1} 1
  +a  {2} 2
  +3  {3} 3333333333
  +4  {a, 4}  4
  +
  +5  {5} 5
  +\.
  +SELECT * FROM check_ign_err;

diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
  index 090ef6c7a8..08e8056fc1 100644

  +-- tests for IGNORE_DATATYPE_ERRORS option
  +CREATE TABLE check_ign_err (n int, m int[], k int);
  +COPY check_ign_err FROM STDIN WITH IGNORE_DATATYPE_ERRORS;
  +WARNING:  invalid input syntax for type integer: "a"
  +WARNING:  value "3333333333" is out of range for type integer
  +WARNING:  invalid input syntax for type integer: "a"
  +WARNING:  invalid input syntax for type integer: ""
  +SELECT * FROM check_ign_err;
  + n |  m  | k
  +---+-----+---
  + 1 | {1} | 1
  + 5 | {5} | 5
  +(2 rows)

--
Regards,

--
Atsushi Torikoshi
NTT DATA CORPORATION
From 16877d4cdd64db5f85bed9cd559e618d8211e598 Mon Sep 17 00:00:00 2001
From: Atsushi Torikoshi <torikos...@oss.nttdata.com>
Date: Mon, 27 Feb 2023 12:02:16 +0900
Subject: [PATCH v1] Add COPY option IGNORE_DATATYPE_ERRORS

---
 src/backend/commands/copy.c              |  8 ++++++++
 src/backend/commands/copyfrom.c          | 11 +++++++++++
 src/backend/commands/copyfromparse.c     | 12 ++++++++++--
 src/backend/parser/gram.y                |  8 +++++++-
 src/bin/psql/tab-complete.c              |  3 ++-
 src/include/commands/copy.h              |  1 +
 src/include/commands/copyfrom_internal.h |  2 ++
 src/include/parser/kwlist.h              |  1 +
 src/test/regress/expected/copy2.out      | 14 ++++++++++++++
 src/test/regress/sql/copy2.sql           | 12 ++++++++++++
 10 files changed, 68 insertions(+), 4 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index e34f583ea7..2f1cfb3f4d 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -410,6 +410,7 @@ ProcessCopyOptions(ParseState *pstate,
 	bool		format_specified = false;
 	bool		freeze_specified = false;
 	bool		header_specified = false;
+	bool		ignore_datatype_errors_specified= false;
 	ListCell   *option;
 
 	/* Support external use for option sanity checking */
@@ -449,6 +450,13 @@ ProcessCopyOptions(ParseState *pstate,
 			freeze_specified = true;
 			opts_out->freeze = defGetBoolean(defel);
 		}
+		else if (strcmp(defel->defname, "ignore_datatype_errors") == 0)
+		{
+			if (ignore_datatype_errors_specified)
+				errorConflictingDefElem(defel, pstate);
+			ignore_datatype_errors_specified= true;
+			opts_out->ignore_datatype_errors = defGetBoolean(defel);
+		}
 		else if (strcmp(defel->defname, "delimiter") == 0)
 		{
 			if (opts_out->delim)
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index af52faca6d..24eec6a27d 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -959,6 +959,7 @@ CopyFrom(CopyFromState cstate)
 	{
 		TupleTableSlot *myslot;
 		bool		skip_tuple;
+		ErrorSaveContext escontext = {T_ErrorSaveContext};
 
 		CHECK_FOR_INTERRUPTS();
 
@@ -991,10 +992,20 @@ CopyFrom(CopyFromState cstate)
 
 		ExecClearTuple(myslot);
 
+		if (cstate->opts.ignore_datatype_errors)
+		{
+			escontext.details_wanted = true;
+			cstate->escontext = escontext;
+		}
+
 		/* Directly store the values/nulls array in the slot */
 		if (!NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull))
 			break;
 
+		/* Soft error occured, skip this tuple */
+		if(cstate->escontext.error_occurred)
+			continue;
+
 		ExecStoreVirtualTuple(myslot);
 
 		/*
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 91b564c2bc..12b1780fd6 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -70,6 +70,7 @@
 #include "libpq/pqformat.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
+#include "nodes/miscnodes.h"
 #include "pgstat.h"
 #include "port/pg_bswap.h"
 #include "utils/builtins.h"
@@ -938,10 +939,17 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 
 			cstate->cur_attname = NameStr(att->attname);
 			cstate->cur_attval = string;
-			values[m] = InputFunctionCall(&in_functions[m],
+			if (!InputFunctionCallSafe(&in_functions[m],
 										  string,
 										  typioparams[m],
-										  att->atttypmod);
+										  att->atttypmod,
+										  (Node *) &cstate->escontext,
+										  &values[m]))
+			{
+					ereport(WARNING,
+							  errmsg("%s", cstate->escontext.error_data->message));
+					return true;
+			}
 			if (string != NULL)
 				nulls[m] = false;
 			cstate->cur_attname = NULL;
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index a0138382a1..d79d293c0d 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -701,7 +701,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 
 	HANDLER HAVING HEADER_P HOLD HOUR_P
 
-	IDENTITY_P IF_P ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P INCLUDE
+	IDENTITY_P IF_P IGNORE_DATATYPE_ERRORS ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P INCLUDE
 	INCLUDING INCREMENT INDEX INDEXES INHERIT INHERITS INITIALLY INLINE_P
 	INNER_P INOUT INPUT_P INSENSITIVE INSERT INSTEAD INT_P INTEGER
 	INTERSECT INTERVAL INTO INVOKER IS ISNULL ISOLATION
@@ -3378,6 +3378,10 @@ copy_opt_item:
 				{
 					$$ = makeDefElem("freeze", (Node *) makeBoolean(true), @1);
 				}
+			| IGNORE_DATATYPE_ERRORS
+				{
+					$$ = makeDefElem("ignore_datatype_errors", (Node *)makeBoolean(true), @1);
+				}
 			| DELIMITER opt_as Sconst
 				{
 					$$ = makeDefElem("delimiter", (Node *) makeString($3), @1);
@@ -16821,6 +16825,7 @@ unreserved_keyword:
 			| HOUR_P
 			| IDENTITY_P
 			| IF_P
+			| IGNORE_DATATYPE_ERRORS
 			| IMMEDIATE
 			| IMMUTABLE
 			| IMPLICIT_P
@@ -17375,6 +17380,7 @@ bare_label_keyword:
 			| HOLD
 			| IDENTITY_P
 			| IF_P
+			| IGNORE_DATATYPE_ERRORS
 			| ILIKE
 			| IMMEDIATE
 			| IMMUTABLE
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 5e1882eaea..a363351d3b 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -2857,7 +2857,8 @@ psql_completion(const char *text, int start, int end)
 	else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "("))
 		COMPLETE_WITH("FORMAT", "FREEZE", "DELIMITER", "NULL",
 					  "HEADER", "QUOTE", "ESCAPE", "FORCE_QUOTE",
-					  "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING");
+					  "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING",
+					  "IGNORE_DATATYPE_ERRORS");
 
 	/* Complete COPY <sth> FROM|TO filename WITH (FORMAT */
 	else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "FORMAT"))
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 8e5f6ff148..a7eb0f8883 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -42,6 +42,7 @@ typedef struct CopyFormatOptions
 								 * -1 if not specified */
 	bool		binary;			/* binary format? */
 	bool		freeze;			/* freeze rows on loading? */
+	bool		ignore_datatype_errors;  /* ignore rows with datatype errors */
 	bool		csv_mode;		/* Comma Separated Value format? */
 	CopyHeaderChoice header_line;	/* header line? */
 	char	   *null_print;		/* NULL marker string (server encoding!) */
diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h
index 7b1c4327bd..d74c633481 100644
--- a/src/include/commands/copyfrom_internal.h
+++ b/src/include/commands/copyfrom_internal.h
@@ -16,6 +16,7 @@
 
 #include "commands/copy.h"
 #include "commands/trigger.h"
+#include "nodes/miscnodes.h"
 
 /*
  * Represents the different source cases we need to worry about at
@@ -94,6 +95,7 @@ typedef struct CopyFromStateData
 	AttrNumber	num_defaults;
 	FmgrInfo   *in_functions;	/* array of input functions for each attrs */
 	Oid		   *typioparams;	/* array of element types for in_functions */
+	ErrorSaveContext escontext; /* soft error trapper during in_functions execution */
 	int		   *defmap;			/* array of default att numbers */
 	ExprState **defexprs;		/* array of default att expressions */
 	bool		volatile_defexprs;	/* is any of defexprs volatile? */
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index bb36213e6f..1d7f9efbc0 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -196,6 +196,7 @@ PG_KEYWORD("hold", HOLD, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("hour", HOUR_P, UNRESERVED_KEYWORD, AS_LABEL)
 PG_KEYWORD("identity", IDENTITY_P, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("if", IF_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("ignore_datatype_errors", IGNORE_DATATYPE_ERRORS, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("ilike", ILIKE, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
 PG_KEYWORD("immediate", IMMEDIATE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("immutable", IMMUTABLE, UNRESERVED_KEYWORD, BARE_LABEL)
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 090ef6c7a8..08e8056fc1 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -666,6 +666,20 @@ SELECT * FROM instead_of_insert_tbl;
 (2 rows)
 
 COMMIT;
+-- tests for IGNORE_DATATYPE_ERRORS option
+CREATE TABLE check_ign_err (n int, m int[], k int);
+COPY check_ign_err FROM STDIN WITH IGNORE_DATATYPE_ERRORS;
+WARNING:  invalid input syntax for type integer: "a"
+WARNING:  value "3333333333" is out of range for type integer
+WARNING:  invalid input syntax for type integer: "a"
+WARNING:  invalid input syntax for type integer: ""
+SELECT * FROM check_ign_err;
+ n |  m  | k 
+---+-----+---
+ 1 | {1} | 1
+ 5 | {5} | 5
+(2 rows)
+
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index b0de82c3aa..380adfce96 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -464,6 +464,18 @@ test1
 SELECT * FROM instead_of_insert_tbl;
 COMMIT;
 
+-- tests for IGNORE_DATATYPE_ERRORS option
+CREATE TABLE check_ign_err (n int, m int[], k int);
+COPY check_ign_err FROM STDIN WITH IGNORE_DATATYPE_ERRORS;
+1	{1}	1
+a	{2}	2
+3	{3}	3333333333
+4	{a, 4}	4
+
+5	{5}	5
+\.
+SELECT * FROM check_ign_err;
+
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
-- 
2.25.1

Reply via email to