On Tue, Mar 26, 2024 at 9:56 AM Masahiko Sawada <sawada.m...@gmail.com> wrote:
>
> > > errmsg("data type incompatibility at line %llu for column %s: \"%s\"",
>
> > > I guess it would be better to make the log message clearer to convey
> > > what we did for the malformed row. For example, how about something
> > > like "skipping row due to data type incompatibility at line %llu for
> > > column %s: \"s\""?
> >
> > The summary message which gets printed at the end says that "NOTICE:
> > 6 rows were skipped due to data type incompatibility". Isn't this
> > enough? If someone is using ON_ERROR 'ignore', it's quite natural that
> > such rows get skipped softly and the summary message can help them,
> > no?
>
> I think that in the main log message we should mention what happened
> (or is happening) or what we did (or are doing). If the message "data
> type incompatibility ..." was in the DETAIL message with the main
> message saying something like "skipping row at line %llu for column
> %s: ...", it would make sense to me. But the current message seems not
> to be clear to me and consistent with other NOTICE messages. Also, the
> last summary line would not be written if the user cancelled, and
> someone other than person who used ON_ERROR 'ignore' might check the
> server logs later.

Agree. I changed the NOTICE message to what you've suggested. Thanks.

--
Bharath Rupireddy
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com
From da8c02dace865ea9b02f19968056f25069d8aa91 Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Date: Tue, 26 Mar 2024 05:51:55 +0000
Subject: [PATCH v11] Add detailed info when COPY skips soft errors

This commit emits individual info like line number and column name
when COPY skips soft errors. Because, the summary containing the
total rows skipped isn't enough for the users to know what exactly
are the malformed rows in the input data.

This commit also adds a new option LOG_VERBOSITY to control the
verbosity of logged messages when COPY command skips soft errors.
This option if required can also be extended to control other COPY
related log messages. A value of 'verbose' can be used to emit
more informative messages by the command, while the value of
'default (which is the default) can be used to not log any
additional messages. More values such as 'terse', 'row_details'
etc. can be added based on the need  to the LOG_VERBOSITY option.
To see the individual info added by this commit when COPY skips
soft errors, one needs to set LOG_VERBOSITY to 'verbose'.

Author: Bharath Rupireddy
Reviewed-by: Michael Paquier, Masahiko Sawada
Reviewed-by: Atsushi Torikoshi
Discussion: https://www.postgresql.org/message-id/CALj2ACUk700cYhx1ATRQyRw-fBM%2BaRo6auRAitKGff7XNmYfqQ%40mail.gmail.com
---
 doc/src/sgml/ref/copy.sgml           | 26 ++++++++++++++++--
 src/backend/commands/copy.c          | 38 ++++++++++++++++++++++++++
 src/backend/commands/copyfrom.c      | 10 +++----
 src/backend/commands/copyfromparse.c | 35 ++++++++++++++++++++++++
 src/bin/psql/tab-complete.c          |  6 +++-
 src/include/commands/copy.h          | 11 ++++++++
 src/test/regress/expected/copy2.out  | 41 +++++++++++++++++++++++++++-
 src/test/regress/sql/copy2.sql       | 24 +++++++++++++++-
 src/tools/pgindent/typedefs.list     |  1 +
 9 files changed, 181 insertions(+), 11 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 6c83e30ed0..ecbbf5f94a 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -45,6 +45,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
     FORCE_NULL { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
     ON_ERROR '<replaceable class="parameter">error_action</replaceable>'
     ENCODING '<replaceable class="parameter">encoding_name</replaceable>'
+    LOG_VERBOSITY [ <replaceable class="parameter">mode</replaceable> ]
 </synopsis>
  </refsynopsisdiv>
 
@@ -400,8 +401,12 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       when the <literal>FORMAT</literal> is <literal>text</literal> or <literal>csv</literal>.
      </para>
      <para>
-      A <literal>NOTICE</literal> message containing the ignored row count is emitted at the end
-      of the <command>COPY FROM</command> if at least one row was discarded.
+      A <literal>NOTICE</literal> message containing the ignored row count is
+      emitted at the end of the <command>COPY FROM</command> if at least one
+      row was discarded. When <literal>LOG_VERBOSITY</literal> option is set to
+      <literal>verbose</literal>, a <literal>NOTICE</literal> message
+      containing the line of the input file and the column name whose input
+      conversion has failed is emitted for each discarded row.
      </para>
     </listitem>
    </varlistentry>
@@ -418,6 +423,23 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><literal>LOG_VERBOSITY</literal></term>
+    <listitem>
+     <para>
+      Sets the verbosity of some of the messages logged by a
+      <command>COPY</command> command.
+      A <replaceable class="parameter">mode</replaceable> value of
+      <literal>verbose</literal> can be used to emit more informative messages.
+      <literal>default</literal> will not log any additional messages.
+     </para>
+     <para>
+      This is currently used in <command>COPY FROM</command> command when
+      <literal>ON_ERROR</literal> is set to <literal>ignore</literal>.
+      </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><literal>WHERE</literal></term>
     <listitem>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 28cf8b040a..67d5c3f7d0 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -422,6 +422,36 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from)
 	return COPY_ON_ERROR_STOP;	/* keep compiler quiet */
 }
 
+/*
+ * Extract a CopyLogVerbosityChoice value from a DefElem.
+ */
+static CopyLogVerbosityChoice
+defGetCopyLogVerbosityChoice(DefElem *def, ParseState *pstate)
+{
+	char	   *sval;
+
+	/*
+	 * If no parameter value given, assume the default value.
+	 */
+	if (def->arg == NULL)
+		return COPY_LOG_VERBOSITY_DEFAULT;
+
+	/*
+	 * Allow "default", or "verbose" values.
+	 */
+	sval = defGetString(def);
+	if (pg_strcasecmp(sval, "default") == 0)
+		return COPY_LOG_VERBOSITY_DEFAULT;
+	if (pg_strcasecmp(sval, "verbose") == 0)
+		return COPY_LOG_VERBOSITY_VERBOSE;
+
+	ereport(ERROR,
+			(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+			 errmsg("COPY LOG_VERBOSITY \"%s\" not recognized", sval),
+			 parser_errposition(pstate, def->location)));
+	return COPY_LOG_VERBOSITY_DEFAULT;	/* keep compiler quiet */
+}
+
 /*
  * Process the statement option list for COPY.
  *
@@ -448,6 +478,7 @@ ProcessCopyOptions(ParseState *pstate,
 	bool		freeze_specified = false;
 	bool		header_specified = false;
 	bool		on_error_specified = false;
+	bool		log_verbosity_specified = false;
 	ListCell   *option;
 
 	/* Support external use for option sanity checking */
@@ -607,6 +638,13 @@ ProcessCopyOptions(ParseState *pstate,
 			on_error_specified = true;
 			opts_out->on_error = defGetCopyOnErrorChoice(defel, pstate, is_from);
 		}
+		else if (strcmp(defel->defname, "log_verbosity") == 0)
+		{
+			if (log_verbosity_specified)
+				errorConflictingDefElem(defel, pstate);
+			log_verbosity_specified = true;
+			opts_out->log_verbosity = defGetCopyLogVerbosityChoice(defel, pstate);
+		}
 		else
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 8908a440e1..06bc14636d 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -101,8 +101,6 @@ typedef struct CopyMultiInsertInfo
 
 
 /* non-export function prototypes */
-static char *limit_printout_length(const char *str);
-
 static void ClosePipeFromProgram(CopyFromState cstate);
 
 /*
@@ -141,7 +139,7 @@ CopyFromErrorCallback(void *arg)
 			/* error is relevant to a particular column */
 			char	   *attval;
 
-			attval = limit_printout_length(cstate->cur_attval);
+			attval = CopyLimitPrintoutLength(cstate->cur_attval);
 			errcontext("COPY %s, line %llu, column %s: \"%s\"",
 					   cstate->cur_relname,
 					   (unsigned long long) cstate->cur_lineno,
@@ -168,7 +166,7 @@ CopyFromErrorCallback(void *arg)
 			{
 				char	   *lineval;
 
-				lineval = limit_printout_length(cstate->line_buf.data);
+				lineval = CopyLimitPrintoutLength(cstate->line_buf.data);
 				errcontext("COPY %s, line %llu: \"%s\"",
 						   cstate->cur_relname,
 						   (unsigned long long) cstate->cur_lineno, lineval);
@@ -189,8 +187,8 @@ CopyFromErrorCallback(void *arg)
  *
  * Returns a pstrdup'd copy of the input.
  */
-static char *
-limit_printout_length(const char *str)
+char *
+CopyLimitPrintoutLength(const char *str)
 {
 #define MAX_COPY_DATA_DISPLAY 100
 
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 5682d5d054..7ddd27f5c6 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -967,7 +967,42 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 											(Node *) cstate->escontext,
 											&values[m]))
 			{
+				Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP);
+
 				cstate->num_errors++;
+
+				if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
+				{
+					/*
+					 * Since we emit line number and column info in the below
+					 * notice message, we suppress error context information
+					 * other than the relation name.
+					 */
+					Assert(!cstate->relname_only);
+					cstate->relname_only = true;
+
+					if (cstate->cur_attval)
+					{
+						char	   *attval;
+
+						attval = CopyLimitPrintoutLength(cstate->cur_attval);
+						ereport(NOTICE,
+								errmsg("skipping row due to data type incompatibility at line %llu for column %s: \"%s\"",
+									   (unsigned long long) cstate->cur_lineno,
+									   cstate->cur_attname,
+									   attval));
+						pfree(attval);
+					}
+					else
+						ereport(NOTICE,
+								errmsg("skipping row due to data type incompatibility at line %llu for column %s: null input",
+									   (unsigned long long) cstate->cur_lineno,
+									   cstate->cur_attname));
+
+					/* reset relname_only */
+					cstate->relname_only = false;
+				}
+
 				return true;
 			}
 
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 56d723de8a..bfbb3899ad 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -2903,7 +2903,7 @@ psql_completion(const char *text, int start, int end)
 		COMPLETE_WITH("FORMAT", "FREEZE", "DELIMITER", "NULL",
 					  "HEADER", "QUOTE", "ESCAPE", "FORCE_QUOTE",
 					  "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", "DEFAULT",
-					  "ON_ERROR");
+					  "ON_ERROR", "LOG_VERBOSITY");
 
 	/* Complete COPY <sth> FROM|TO filename WITH (FORMAT */
 	else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "FORMAT"))
@@ -2913,6 +2913,10 @@ psql_completion(const char *text, int start, int end)
 	else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "ON_ERROR"))
 		COMPLETE_WITH("stop", "ignore");
 
+	/* Complete COPY <sth> FROM filename WITH (LOG_VERBOSITY */
+	else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "LOG_VERBOSITY"))
+		COMPLETE_WITH("default", "verbose");
+
 	/* Complete COPY <sth> FROM <sth> WITH (<options>) */
 	else if (Matches("COPY|\\copy", MatchAny, "FROM", MatchAny, "WITH", MatchAny))
 		COMPLETE_WITH("WHERE");
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index b3da3cb0be..141fd48dc1 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -40,6 +40,15 @@ typedef enum CopyOnErrorChoice
 	COPY_ON_ERROR_IGNORE,		/* ignore errors */
 } CopyOnErrorChoice;
 
+/*
+ * Represents verbosity of logged messages by COPY command.
+ */
+typedef enum CopyLogVerbosityChoice
+{
+	COPY_LOG_VERBOSITY_DEFAULT = 0, /* logs no additional messages, default */
+	COPY_LOG_VERBOSITY_VERBOSE, /* logs additional messages */
+} CopyLogVerbosityChoice;
+
 /*
  * A struct to hold COPY options, in a parsed form. All of these are related
  * to formatting, except for 'freeze', which doesn't really belong here, but
@@ -73,6 +82,7 @@ typedef struct CopyFormatOptions
 	bool	   *force_null_flags;	/* per-column CSV FN flags */
 	bool		convert_selectively;	/* do selective binary conversion? */
 	CopyOnErrorChoice on_error; /* what to do when error happened */
+	CopyLogVerbosityChoice log_verbosity;	/* verbosity of logged messages */
 	List	   *convert_select; /* list of column names (can be NIL) */
 } CopyFormatOptions;
 
@@ -97,6 +107,7 @@ extern bool NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 extern bool NextCopyFromRawFields(CopyFromState cstate,
 								  char ***fields, int *nfields);
 extern void CopyFromErrorCallback(void *arg);
+extern char *CopyLimitPrintoutLength(const char *str);
 
 extern uint64 CopyFrom(CopyFromState cstate);
 
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index f98c2d1c4e..0735551539 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -81,6 +81,10 @@ COPY x from stdin (on_error ignore, on_error ignore);
 ERROR:  conflicting or redundant options
 LINE 1: COPY x from stdin (on_error ignore, on_error ignore);
                                             ^
+COPY x from stdin (log_verbosity 'default', log_verbosity 'verbose');
+ERROR:  conflicting or redundant options
+LINE 1: COPY x from stdin (log_verbosity 'default', log_verbosity 'v...
+                                                    ^
 -- incorrect options
 COPY x to stdin (format BINARY, delimiter ',');
 ERROR:  cannot specify DELIMITER in BINARY mode
@@ -108,6 +112,10 @@ COPY x to stdin (format BINARY, on_error unsupported);
 ERROR:  COPY ON_ERROR cannot be used with COPY TO
 LINE 1: COPY x to stdin (format BINARY, on_error unsupported);
                                         ^
+COPY x to stdout (log_verbosity 'unsupported');
+ERROR:  COPY LOG_VERBOSITY "unsupported" not recognized
+LINE 1: COPY x to stdout (log_verbosity 'unsupported');
+                          ^
 -- too many columns in column list: should fail
 COPY x (a, b, c, d, e, d, c) from stdin;
 ERROR:  column "d" specified more than once
@@ -729,8 +737,31 @@ CREATE TABLE check_ign_err (n int, m int[], k int);
 COPY check_ign_err FROM STDIN WITH (on_error stop);
 ERROR:  invalid input syntax for type integer: "a"
 CONTEXT:  COPY check_ign_err, line 2, column n: "a"
-COPY check_ign_err FROM STDIN WITH (on_error ignore);
+-- want context for notices
+\set SHOW_CONTEXT always
+COPY check_ign_err FROM STDIN WITH (on_error ignore, log_verbosity verbose);
+NOTICE:  skipping row due to data type incompatibility at line 2 for column n: "a"
+CONTEXT:  COPY check_ign_err
+NOTICE:  skipping row due to data type incompatibility at line 3 for column k: "3333333333"
+CONTEXT:  COPY check_ign_err
+NOTICE:  skipping row due to data type incompatibility at line 4 for column m: "{a, 4}"
+CONTEXT:  COPY check_ign_err
+NOTICE:  skipping row due to data type incompatibility at line 5 for column n: ""
+CONTEXT:  COPY check_ign_err
+NOTICE:  skipping row due to data type incompatibility at line 7 for column m: "a"
+CONTEXT:  COPY check_ign_err
+NOTICE:  skipping row due to data type incompatibility at line 8 for column k: "a"
+CONTEXT:  COPY check_ign_err
 NOTICE:  6 rows were skipped due to data type incompatibility
+-- tests for on_error option with log_verbosity and null constraint via domain
+CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
+CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
+COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity verbose);
+NOTICE:  skipping row due to data type incompatibility at line 2 for column l: null input
+CONTEXT:  COPY check_ign_err2
+NOTICE:  1 row was skipped due to data type incompatibility
+-- reset context choice
+\set SHOW_CONTEXT errors
 SELECT * FROM check_ign_err;
  n |  m  | k 
 ---+-----+---
@@ -739,6 +770,12 @@ SELECT * FROM check_ign_err;
  8 | {8} | 8
 (3 rows)
 
+SELECT * FROM check_ign_err2;
+ n |  m  | k |   l   
+---+-----+---+-------
+ 1 | {1} | 1 | 'foo'
+(1 row)
+
 -- test datatype error that can't be handled as soft: should fail
 CREATE TABLE hard_err(foo widget);
 COPY hard_err FROM STDIN WITH (on_error ignore);
@@ -767,6 +804,8 @@ DROP VIEW instead_of_insert_tbl_view;
 DROP VIEW instead_of_insert_tbl_view_2;
 DROP FUNCTION fun_instead_of_insert_tbl();
 DROP TABLE check_ign_err;
+DROP TABLE check_ign_err2;
+DROP DOMAIN dcheck_ign_err2;
 DROP TABLE hard_err;
 --
 -- COPY FROM ... DEFAULT
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index afaaa37e52..624d531fd6 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -67,6 +67,7 @@ COPY x from stdin (force_null (a), force_null (b));
 COPY x from stdin (convert_selectively (a), convert_selectively (b));
 COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii');
 COPY x from stdin (on_error ignore, on_error ignore);
+COPY x from stdin (log_verbosity 'default', log_verbosity 'verbose');
 
 -- incorrect options
 COPY x to stdin (format BINARY, delimiter ',');
@@ -80,6 +81,7 @@ COPY x to stdin (format CSV, force_not_null(a));
 COPY x to stdout (format TEXT, force_null(a));
 COPY x to stdin (format CSV, force_null(a));
 COPY x to stdin (format BINARY, on_error unsupported);
+COPY x to stdout (log_verbosity 'unsupported');
 
 -- too many columns in column list: should fail
 COPY x (a, b, c, d, e, d, c) from stdin;
@@ -508,7 +510,11 @@ a	{2}	2
 
 5	{5}	5
 \.
-COPY check_ign_err FROM STDIN WITH (on_error ignore);
+
+-- want context for notices
+\set SHOW_CONTEXT always
+
+COPY check_ign_err FROM STDIN WITH (on_error ignore, log_verbosity verbose);
 1	{1}	1
 a	{2}	2
 3	{3}	3333333333
@@ -519,8 +525,22 @@ a	{2}	2
 7	{7}	a
 8	{8}	8
 \.
+
+-- tests for on_error option with log_verbosity and null constraint via domain
+CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
+CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
+COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity verbose);
+1	{1}	1	'foo'
+2	{2}	2	\N
+\.
+
+-- reset context choice
+\set SHOW_CONTEXT errors
+
 SELECT * FROM check_ign_err;
 
+SELECT * FROM check_ign_err2;
+
 -- test datatype error that can't be handled as soft: should fail
 CREATE TABLE hard_err(foo widget);
 COPY hard_err FROM STDIN WITH (on_error ignore);
@@ -552,6 +572,8 @@ DROP VIEW instead_of_insert_tbl_view;
 DROP VIEW instead_of_insert_tbl_view_2;
 DROP FUNCTION fun_instead_of_insert_tbl();
 DROP TABLE check_ign_err;
+DROP TABLE check_ign_err2;
+DROP DOMAIN dcheck_ign_err2;
 DROP TABLE hard_err;
 
 --
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 4679660837..20724ebf33 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -480,6 +480,7 @@ CopyFromState
 CopyFromStateData
 CopyHeaderChoice
 CopyInsertMethod
+CopyLogVerbosityChoice
 CopyMultiInsertBuffer
 CopyMultiInsertInfo
 CopyOnErrorChoice
-- 
2.34.1

Reply via email to