On 2023-03-17 21:23, torikoshia wrote:
On 2023-03-07 18:09, Daniel Gustafsson wrote:
On 7 Mar 2023, at 09:35, Damir Belyalov <dam.be...@gmail.com> wrote:

I felt just logging "Error: %ld" would make people wonder the meaning of
the %ld. Logging something like ""Error: %ld data type errors were
found" might be clearer.

Thanks. For more clearance change the message to: "Errors were found: %".

I'm not convinced that this adds enough clarity to assist the user. We also shouldn't use "error" in a WARNING log since the user has explicitly asked to
skip rows on error, so it's not an error per se.
+1

How about something like:

  ereport(WARNING,
          (errmsg("%ld rows were skipped due to data type
incompatibility", cstate->ignored_errors),
           errhint("Skipped rows can be inspected in the database log
for reprocessing.")));
Since skipped rows cannot be inspected in the log when
log_error_verbosity is set to terse,
it might be better without this errhint.

Removed errhint.

Modified some codes since v3 couldn't be applied HEAD anymore.

Also modified v3 patch as below:

65 +   if (cstate->opts.ignore_datatype_errors)
66 +       cstate->ignored_errors = 0;
67 +

It seems not necessary since cstate is initialized by palloc0() in BeginCopyFrom().

134 +               ereport(LOG,
135 + errmsg("%s", cstate->escontext.error_data->message));
136 +
137 +               return true;

Since LOG means 'Reports information of interest to administrators'
according to the manual[1], datatype error should not be logged as
LOG. I put it back in WARNING.

[1] https://www.postgresql.org/docs/current/runtime-config-logging.html#RUNTIME-CONFIG-SEVERITY-LEVELS


--
Regards,

--
Atsushi Torikoshi
NTT DATA CORPORATION
From 6764d7e0f21ca266d7426cb922fd00e5138ec857 Mon Sep 17 00:00:00 2001
From: Atsushi Torikoshi <torikos...@oss.nttdata.com>
Date: Wed, 22 Mar 2023 22:00:15 +0900
Subject: [PATCH v4] Add new COPY option IGNORE_DATATYPE_ERRORS

Add new COPY option IGNORE_DATATYPE_ERRORS.

Currently entire COPY fails even when there is one unexpected
data regarding data type or range.
IGNORE_DATATYPE_ERRORS ignores these errors and skips them and
COPY data which don't contain problem.

This patch uses the soft error handling infrastructure, which
is introduced by d9f7f5d32f20.

Author: Damir Belyalov, Atsushi Torikoshi

---
 doc/src/sgml/ref/copy.sgml               | 12 ++++++++++++
 src/backend/commands/copy.c              |  8 ++++++++
 src/backend/commands/copyfrom.c          | 20 ++++++++++++++++++++
 src/backend/commands/copyfromparse.c     | 19 +++++++++++++++----
 src/backend/parser/gram.y                |  8 +++++++-
 src/include/commands/copy.h              |  1 +
 src/include/commands/copyfrom_internal.h |  3 +++
 src/include/parser/kwlist.h              |  1 +
 src/test/regress/expected/copy2.out      | 15 +++++++++++++++
 src/test/regress/sql/copy2.sql           | 12 ++++++++++++
 10 files changed, 94 insertions(+), 5 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 5e591ed2e6..168b1c05d9 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -34,6 +34,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
 
     FORMAT <replaceable class="parameter">format_name</replaceable>
     FREEZE [ <replaceable class="parameter">boolean</replaceable> ]
+    IGNORE_DATATYPE_ERRORS [ <replaceable class="parameter">boolean</replaceable> ]
     DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
     NULL '<replaceable class="parameter">null_string</replaceable>'
     HEADER [ <replaceable class="parameter">boolean</replaceable> | MATCH ]
@@ -234,6 +235,17 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><literal>IGNORE_DATATYPE_ERRORS</literal></term>
+    <listitem>
+     <para>
+      Drops rows that contain malformed data while copying. These are rows
+      with columns where the data type's input-function raises an error.
+      Outputs warnings about rows with incorrect data to system logfile.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><literal>DELIMITER</literal></term>
     <listitem>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index f14fae3308..02d911abbe 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -419,6 +419,7 @@ ProcessCopyOptions(ParseState *pstate,
 	bool		format_specified = false;
 	bool		freeze_specified = false;
 	bool		header_specified = false;
+	bool		ignore_datatype_errors_specified = false;
 	ListCell   *option;
 
 	/* Support external use for option sanity checking */
@@ -458,6 +459,13 @@ ProcessCopyOptions(ParseState *pstate,
 			freeze_specified = true;
 			opts_out->freeze = defGetBoolean(defel);
 		}
+		else if (strcmp(defel->defname, "ignore_datatype_errors") == 0)
+		{
+			if (ignore_datatype_errors_specified)
+				errorConflictingDefElem(defel, pstate);
+			ignore_datatype_errors_specified = true;
+			opts_out->ignore_datatype_errors = defGetBoolean(defel);
+		}
 		else if (strcmp(defel->defname, "delimiter") == 0)
 		{
 			if (opts_out->delim)
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 80bca79cd0..85c47f54b2 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -953,6 +953,7 @@ CopyFrom(CopyFromState cstate)
 	{
 		TupleTableSlot *myslot;
 		bool		skip_tuple;
+		ErrorSaveContext escontext = {T_ErrorSaveContext};
 
 		CHECK_FOR_INTERRUPTS();
 
@@ -985,9 +986,28 @@ CopyFrom(CopyFromState cstate)
 
 		ExecClearTuple(myslot);
 
+		if (cstate->opts.ignore_datatype_errors)
+		{
+			escontext.details_wanted = true;
+			cstate->escontext = escontext;
+		}
+
 		/* Directly store the values/nulls array in the slot */
 		if (!NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull))
+		{
+			if (cstate->opts.ignore_datatype_errors && cstate->ignored_errors > 0)
+				ereport(WARNING,
+						errmsg("%zd rows were skipped due to data type incompatibility",
+							cstate->ignored_errors));
 			break;
+		}
+
+		/* Soft error occured, skip this tuple */
+		if (cstate->escontext.error_occurred)
+		{
+			ExecClearTuple(myslot);
+			continue;
+		}
 
 		ExecStoreVirtualTuple(myslot);
 
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 3853902a16..b06c44e298 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -70,6 +70,7 @@
 #include "libpq/pqformat.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
+#include "nodes/miscnodes.h"
 #include "pgstat.h"
 #include "port/pg_bswap.h"
 #include "utils/builtins.h"
@@ -956,10 +957,20 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 				values[m] = ExecEvalExpr(defexprs[m], econtext, &nulls[m]);
 			}
 			else
-				values[m] = InputFunctionCall(&in_functions[m],
-											  string,
-											  typioparams[m],
-											  att->atttypmod);
+				/* If IGNORE_DATATYPE_ERRORS is enabled skip rows with datatype errors */
+				if (!InputFunctionCallSafe(&in_functions[m],
+										   string,
+										   typioparams[m],
+										   att->atttypmod,
+										   (Node *) &cstate->escontext,
+										   &values[m]))
+				{
+					cstate->ignored_errors++;
+
+					ereport(WARNING,
+							errmsg("%s", cstate->escontext.error_data->message));
+					return true;
+				}
 
 			cstate->cur_attname = NULL;
 			cstate->cur_attval = NULL;
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index efe88ccf9d..22bf63b42b 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -701,7 +701,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 
 	HANDLER HAVING HEADER_P HOLD HOUR_P
 
-	IDENTITY_P IF_P ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P INCLUDE
+	IDENTITY_P IF_P IGNORE_DATATYPE_ERRORS ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P INCLUDE
 	INCLUDING INCREMENT INDENT INDEX INDEXES INHERIT INHERITS INITIALLY INLINE_P
 	INNER_P INOUT INPUT_P INSENSITIVE INSERT INSTEAD INT_P INTEGER
 	INTERSECT INTERVAL INTO INVOKER IS ISNULL ISOLATION
@@ -3378,6 +3378,10 @@ copy_opt_item:
 				{
 					$$ = makeDefElem("freeze", (Node *) makeBoolean(true), @1);
 				}
+			| IGNORE_DATATYPE_ERRORS
+				{
+					$$ = makeDefElem("ignore_datatype_errors", (Node *)makeBoolean(true), @1);
+				}
 			| DELIMITER opt_as Sconst
 				{
 					$$ = makeDefElem("delimiter", (Node *) makeString($3), @1);
@@ -16827,6 +16831,7 @@ unreserved_keyword:
 			| HOUR_P
 			| IDENTITY_P
 			| IF_P
+			| IGNORE_DATATYPE_ERRORS
 			| IMMEDIATE
 			| IMMUTABLE
 			| IMPLICIT_P
@@ -17382,6 +17387,7 @@ bare_label_keyword:
 			| HOLD
 			| IDENTITY_P
 			| IF_P
+			| IGNORE_DATATYPE_ERRORS
 			| ILIKE
 			| IMMEDIATE
 			| IMMUTABLE
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 33175868f6..c2e55ac21f 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -42,6 +42,7 @@ typedef struct CopyFormatOptions
 								 * -1 if not specified */
 	bool		binary;			/* binary format? */
 	bool		freeze;			/* freeze rows on loading? */
+	bool		ignore_datatype_errors;  /* ignore rows with datatype errors */
 	bool		csv_mode;		/* Comma Separated Value format? */
 	CopyHeaderChoice header_line;	/* header line? */
 	char	   *null_print;		/* NULL marker string (server encoding!) */
diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h
index ac2c16f8b8..1164c71631 100644
--- a/src/include/commands/copyfrom_internal.h
+++ b/src/include/commands/copyfrom_internal.h
@@ -16,6 +16,7 @@
 
 #include "commands/copy.h"
 #include "commands/trigger.h"
+#include "nodes/miscnodes.h"
 
 /*
  * Represents the different source cases we need to worry about at
@@ -94,6 +95,8 @@ typedef struct CopyFromStateData
 								 * default value */
 	FmgrInfo   *in_functions;	/* array of input functions for each attrs */
 	Oid		   *typioparams;	/* array of element types for in_functions */
+	ErrorSaveContext escontext; /* soft error trapper during in_functions execution */
+	int64		ignored_errors;	/* total number of ignored errors */
 	int		   *defmap;			/* array of default att numbers related to
 								 * missing att */
 	ExprState **defexprs;		/* array of default att expressions for all
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index 753e9ee174..5ea159e879 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -196,6 +196,7 @@ PG_KEYWORD("hold", HOLD, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("hour", HOUR_P, UNRESERVED_KEYWORD, AS_LABEL)
 PG_KEYWORD("identity", IDENTITY_P, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("if", IF_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("ignore_datatype_errors", IGNORE_DATATYPE_ERRORS, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("ilike", ILIKE, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
 PG_KEYWORD("immediate", IMMEDIATE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("immutable", IMMUTABLE, UNRESERVED_KEYWORD, BARE_LABEL)
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 8e33eee719..a6bf3d66a4 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -666,6 +666,21 @@ SELECT * FROM instead_of_insert_tbl;
 (2 rows)
 
 COMMIT;
+-- tests for IGNORE_DATATYPE_ERRORS option
+CREATE TABLE check_ign_err (n int, m int[], k int);
+COPY check_ign_err FROM STDIN WITH IGNORE_DATATYPE_ERRORS;
+WARNING:  invalid input syntax for type integer: "a"
+WARNING:  value "3333333333" is out of range for type integer
+WARNING:  invalid input syntax for type integer: "a"
+WARNING:  invalid input syntax for type integer: ""
+WARNING:  4 rows were skipped due to data type incompatibility
+SELECT * FROM check_ign_err;
+ n |  m  | k 
+---+-----+---
+ 1 | {1} | 1
+ 5 | {5} | 5
+(2 rows)
+
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index d759635068..c934029314 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -464,6 +464,18 @@ test1
 SELECT * FROM instead_of_insert_tbl;
 COMMIT;
 
+-- tests for IGNORE_DATATYPE_ERRORS option
+CREATE TABLE check_ign_err (n int, m int[], k int);
+COPY check_ign_err FROM STDIN WITH IGNORE_DATATYPE_ERRORS;
+1	{1}	1
+a	{2}	2
+3	{3}	3333333333
+4	{a, 4}	4
+
+5	{5}	5
+\.
+SELECT * FROM check_ign_err;
+
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;

base-commit: d69c404c4cc5985d8ae5b5ed38bed3400b317f82
-- 
2.25.1

Reply via email to