From 415842714f5b3a3f09f6af4a193398801d8bdf8c Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Date: Wed, 13 Mar 2024 14:00:38 +0000
Subject: [PATCH v8 2/2] Add detailed info when COPY skips soft errors

This commit emits individual info like line number and column name
when COPY skips soft errors. Because, the summary containing the
total rows skipped isn't enough for the users to know what exactly
are the malformed rows in the input data.

Author: Bharath Rupireddy
Reviewed-by: Michael Paquier, Masahiko Sawada
Reviewed-by: Atsushi Torikoshi
Discussion: https://www.postgresql.org/message-id/CALj2ACUk700cYhx1ATRQyRw-fBM%2BaRo6auRAitKGff7XNmYfqQ%40mail.gmail.com
---
 doc/src/sgml/ref/copy.sgml           | 12 ++++++++--
 src/backend/commands/copyfrom.c      |  4 +---
 src/backend/commands/copyfromparse.c | 35 ++++++++++++++++++++++++++++
 src/include/commands/copy.h          |  1 +
 src/test/regress/expected/copy2.out  | 18 +++++++++++++-
 src/test/regress/sql/copy2.sql       |  9 ++++++-
 6 files changed, 72 insertions(+), 7 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index eba9b8f64e..bdd6580721 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -398,8 +398,12 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       when the <literal>FORMAT</literal> is <literal>text</literal> or <literal>csv</literal>.
      </para>
      <para>
-      A <literal>NOTICE</literal> message containing the ignored row count is emitted at the end
-      of the <command>COPY FROM</command> if at least one row was discarded.
+      A <literal>NOTICE</literal> message containing the ignored row count is
+      emitted at the end of the <command>COPY FROM</command> if at least one
+      row was discarded. When <literal>LOG_VERBOSITY</literal> option is set to
+      <literal>verbose</literal>, a <literal>NOTICE</literal> message
+      containing the line of the input file and the column name whose input
+      conversion has failed is emitted for each discarded row.
      </para>
     </listitem>
    </varlistentry>
@@ -426,6 +430,10 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       <literal>verbose</literal> can be used to emit more informative messages.
       <literal>default</literal> will not log any additional messages.
      </para>
+     <para>
+      This is currently used in <command>COPY FROM</command> command when
+      <literal>ON_ERROR</literal> is set to <literal>ignore</literal>.
+      </para>
     </listitem>
    </varlistentry>
 
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 8908a440e1..fc5bc86ac7 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -101,8 +101,6 @@ typedef struct CopyMultiInsertInfo
 
 
 /* non-export function prototypes */
-static char *limit_printout_length(const char *str);
-
 static void ClosePipeFromProgram(CopyFromState cstate);
 
 /*
@@ -189,7 +187,7 @@ CopyFromErrorCallback(void *arg)
  *
  * Returns a pstrdup'd copy of the input.
  */
-static char *
+char *
 limit_printout_length(const char *str)
 {
 #define MAX_COPY_DATA_DISPLAY 100
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 5682d5d054..01ab1de9bd 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -967,7 +967,42 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 											(Node *) cstate->escontext,
 											&values[m]))
 			{
+				Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP);
+
 				cstate->num_errors++;
+
+				if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
+				{
+					/*
+					 * Since we emit line number and column info in the below
+					 * notice message, we suppress error context information
+					 * other than the relation name.
+					 */
+					Assert(!cstate->relname_only);
+					cstate->relname_only = true;
+
+					if (cstate->cur_attval)
+					{
+						char	   *attval;
+
+						attval = limit_printout_length(cstate->cur_attval);
+						ereport(NOTICE,
+								errmsg("data type incompatibility at line %llu for column %s: \"%s\"",
+									   (unsigned long long) cstate->cur_lineno,
+									   cstate->cur_attname,
+									   attval));
+						pfree(attval);
+					}
+					else
+						ereport(NOTICE,
+								errmsg("data type incompatibility at line %llu for column %s: null input",
+									   (unsigned long long) cstate->cur_lineno,
+									   cstate->cur_attname));
+
+					/* reset relname_only */
+					cstate->relname_only = false;
+				}
+
 				return true;
 			}
 
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 99d183fa4d..9c539772a5 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -107,6 +107,7 @@ extern bool NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 extern bool NextCopyFromRawFields(CopyFromState cstate,
 								  char ***fields, int *nfields);
 extern void CopyFromErrorCallback(void *arg);
+extern char *limit_printout_length(const char *str);
 
 extern uint64 CopyFrom(CopyFromState cstate);
 
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index bb37a2ac70..832b8b210f 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -737,8 +737,24 @@ CREATE TABLE check_ign_err (n int, m int[], k int);
 COPY check_ign_err FROM STDIN WITH (on_error stop);
 ERROR:  invalid input syntax for type integer: "a"
 CONTEXT:  COPY check_ign_err, line 2, column n: "a"
-COPY check_ign_err FROM STDIN WITH (on_error ignore);
+-- want context for notices
+\set SHOW_CONTEXT always
+COPY check_ign_err FROM STDIN WITH (on_error ignore, log_verbosity verbose);
+NOTICE:  data type incompatibility at line 2 for column n: "a"
+CONTEXT:  COPY check_ign_err
+NOTICE:  data type incompatibility at line 3 for column k: "3333333333"
+CONTEXT:  COPY check_ign_err
+NOTICE:  data type incompatibility at line 4 for column m: "{a, 4}"
+CONTEXT:  COPY check_ign_err
+NOTICE:  data type incompatibility at line 5 for column n: ""
+CONTEXT:  COPY check_ign_err
+NOTICE:  data type incompatibility at line 7 for column m: "a"
+CONTEXT:  COPY check_ign_err
+NOTICE:  data type incompatibility at line 8 for column k: "a"
+CONTEXT:  COPY check_ign_err
 NOTICE:  6 rows were skipped due to data type incompatibility
+-- reset context choice
+\set SHOW_CONTEXT errors
 SELECT * FROM check_ign_err;
  n |  m  | k 
 ---+-----+---
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index 4cd3ae577d..d290bea265 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -510,7 +510,11 @@ a	{2}	2
 
 5	{5}	5
 \.
-COPY check_ign_err FROM STDIN WITH (on_error ignore);
+
+-- want context for notices
+\set SHOW_CONTEXT always
+
+COPY check_ign_err FROM STDIN WITH (on_error ignore, log_verbosity verbose);
 1	{1}	1
 a	{2}	2
 3	{3}	3333333333
@@ -521,6 +525,9 @@ a	{2}	2
 7	{7}	a
 8	{8}	8
 \.
+-- reset context choice
+\set SHOW_CONTEXT errors
+
 SELECT * FROM check_ign_err;
 
 -- test datatype error that can't be handled as soft: should fail
-- 
2.34.1

