On Mon, Mar 25, 2024 at 10:42 AM Masahiko Sawada <sawada.m...@gmail.com> wrote: > > The current approach, eliminating the duplicated information in > CONTEXT, seems good to me.
Thanks for looking into it. > One question about the latest (v8) patch: > > + else > + ereport(NOTICE, > + errmsg("data type incompatibility at > line %llu for column %s: null input", > + (unsigned long long) > cstate->cur_lineno, > + cstate->cur_attname)); > + > > How can we reach this path? It seems we don't cover this path by the tests. Tests don't cover that part, but it can be hit with something like [1]. I've added a test for this. Note the use of domain to provide an indirect way of providing null constraint check. Otherwise, COPY FROM fails early in CopyFrom->ExecConstraints if the NOT NULL constraint is directly provided next to the column in the table [2]. Please see the attached v9 patch set. [1] create domain dcheck_ign_err2 varchar(15) NOT NULL; CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2); COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity verbose); 1 {1} 1 'foo' 2 {2} 2 \N \. [2] CREATE TABLE check_ign_err3 (n int, m int[], k int, l varchar(15) NOT NULL); postgres=# COPY check_ign_err3 FROM STDIN WITH (on_error ignore, log_verbosity verbose); Enter data to be copied followed by a newline. End with a backslash and a period on a line by itself, or an EOF signal. >> 1 {1} 1 'foo' 2 {2} 2 \N>> >> \. ERROR: null value in column "l" of relation "check_ign_err3" violates not-null constraint DETAIL: Failing row contains (2, {2}, 2, null). CONTEXT: COPY check_ign_err3, line 2: "2 {2} 2 \N" -- Bharath Rupireddy PostgreSQL Contributors Team RDS Open Source Databases Amazon Web Services: https://aws.amazon.com
From bf0c1a166025c6ed1b88233eb8fc20df881d95ca Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Date: Mon, 25 Mar 2024 10:55:00 +0000 Subject: [PATCH v9 1/2] Add LOG_VERBOSITY option to COPY command This commit adds a new option LOG_VERBOSITY to set the verbosity of logged messages by COPY command. A value of 'verbose' can be used to emit more informative messages by the command, while the value of 'default (which is the default) can be used to not log any additional messages. More values such as 'terse', 'row_details' etc. can be added based on the need to the LOG_VERBOSITY option. An upcoming commit for emitting more info on soft errors by COPY FROM command with ON_ERROR 'ignore' uses this. Author: Bharath Rupireddy Reviewed-by: Michael Paquier, Masahiko Sawada Reviewed-by: Atsushi Torikoshi Discussion: https://www.postgresql.org/message-id/CALj2ACXNA0focNeriYRvQQaCGc4CsTuOnFbzF9LqTKNWxuJdhA%40mail.gmail.com --- doc/src/sgml/ref/copy.sgml | 14 +++++++++++ src/backend/commands/copy.c | 38 +++++++++++++++++++++++++++++ src/bin/psql/tab-complete.c | 6 ++++- src/include/commands/copy.h | 10 ++++++++ src/test/regress/expected/copy2.out | 8 ++++++ src/test/regress/sql/copy2.sql | 2 ++ src/tools/pgindent/typedefs.list | 1 + 7 files changed, 78 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 6c83e30ed0..4c307efb54 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -45,6 +45,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable FORCE_NULL { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * } ON_ERROR '<replaceable class="parameter">error_action</replaceable>' ENCODING '<replaceable class="parameter">encoding_name</replaceable>' + LOG_VERBOSITY [ <replaceable class="parameter">mode</replaceable> ] </synopsis> </refsynopsisdiv> @@ -418,6 +419,19 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable </listitem> </varlistentry> + <varlistentry> + <term><literal>LOG_VERBOSITY</literal></term> + <listitem> + <para> + Sets the verbosity of some of the messages logged by a + <command>COPY</command> command. + A <replaceable class="parameter">mode</replaceable> value of + <literal>verbose</literal> can be used to emit more informative messages. + <literal>default</literal> will not log any additional messages. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><literal>WHERE</literal></term> <listitem> diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 28cf8b040a..67d5c3f7d0 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -422,6 +422,36 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from) return COPY_ON_ERROR_STOP; /* keep compiler quiet */ } +/* + * Extract a CopyLogVerbosityChoice value from a DefElem. + */ +static CopyLogVerbosityChoice +defGetCopyLogVerbosityChoice(DefElem *def, ParseState *pstate) +{ + char *sval; + + /* + * If no parameter value given, assume the default value. + */ + if (def->arg == NULL) + return COPY_LOG_VERBOSITY_DEFAULT; + + /* + * Allow "default", or "verbose" values. + */ + sval = defGetString(def); + if (pg_strcasecmp(sval, "default") == 0) + return COPY_LOG_VERBOSITY_DEFAULT; + if (pg_strcasecmp(sval, "verbose") == 0) + return COPY_LOG_VERBOSITY_VERBOSE; + + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("COPY LOG_VERBOSITY \"%s\" not recognized", sval), + parser_errposition(pstate, def->location))); + return COPY_LOG_VERBOSITY_DEFAULT; /* keep compiler quiet */ +} + /* * Process the statement option list for COPY. * @@ -448,6 +478,7 @@ ProcessCopyOptions(ParseState *pstate, bool freeze_specified = false; bool header_specified = false; bool on_error_specified = false; + bool log_verbosity_specified = false; ListCell *option; /* Support external use for option sanity checking */ @@ -607,6 +638,13 @@ ProcessCopyOptions(ParseState *pstate, on_error_specified = true; opts_out->on_error = defGetCopyOnErrorChoice(defel, pstate, is_from); } + else if (strcmp(defel->defname, "log_verbosity") == 0) + { + if (log_verbosity_specified) + errorConflictingDefElem(defel, pstate); + log_verbosity_specified = true; + opts_out->log_verbosity = defGetCopyLogVerbosityChoice(defel, pstate); + } else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 56d723de8a..bfbb3899ad 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -2903,7 +2903,7 @@ psql_completion(const char *text, int start, int end) COMPLETE_WITH("FORMAT", "FREEZE", "DELIMITER", "NULL", "HEADER", "QUOTE", "ESCAPE", "FORCE_QUOTE", "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", "DEFAULT", - "ON_ERROR"); + "ON_ERROR", "LOG_VERBOSITY"); /* Complete COPY <sth> FROM|TO filename WITH (FORMAT */ else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "FORMAT")) @@ -2913,6 +2913,10 @@ psql_completion(const char *text, int start, int end) else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "ON_ERROR")) COMPLETE_WITH("stop", "ignore"); + /* Complete COPY <sth> FROM filename WITH (LOG_VERBOSITY */ + else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "LOG_VERBOSITY")) + COMPLETE_WITH("default", "verbose"); + /* Complete COPY <sth> FROM <sth> WITH (<options>) */ else if (Matches("COPY|\\copy", MatchAny, "FROM", MatchAny, "WITH", MatchAny)) COMPLETE_WITH("WHERE"); diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index b3da3cb0be..99d183fa4d 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -40,6 +40,15 @@ typedef enum CopyOnErrorChoice COPY_ON_ERROR_IGNORE, /* ignore errors */ } CopyOnErrorChoice; +/* + * Represents verbosity of logged messages by COPY command. + */ +typedef enum CopyLogVerbosityChoice +{ + COPY_LOG_VERBOSITY_DEFAULT = 0, /* logs no additional messages, default */ + COPY_LOG_VERBOSITY_VERBOSE, /* logs additional messages */ +} CopyLogVerbosityChoice; + /* * A struct to hold COPY options, in a parsed form. All of these are related * to formatting, except for 'freeze', which doesn't really belong here, but @@ -73,6 +82,7 @@ typedef struct CopyFormatOptions bool *force_null_flags; /* per-column CSV FN flags */ bool convert_selectively; /* do selective binary conversion? */ CopyOnErrorChoice on_error; /* what to do when error happened */ + CopyLogVerbosityChoice log_verbosity; /* verbosity of logged messages */ List *convert_select; /* list of column names (can be NIL) */ } CopyFormatOptions; diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index f98c2d1c4e..bb37a2ac70 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -81,6 +81,10 @@ COPY x from stdin (on_error ignore, on_error ignore); ERROR: conflicting or redundant options LINE 1: COPY x from stdin (on_error ignore, on_error ignore); ^ +COPY x from stdin (log_verbosity 'default', log_verbosity 'verbose'); +ERROR: conflicting or redundant options +LINE 1: COPY x from stdin (log_verbosity 'default', log_verbosity 'v... + ^ -- incorrect options COPY x to stdin (format BINARY, delimiter ','); ERROR: cannot specify DELIMITER in BINARY mode @@ -108,6 +112,10 @@ COPY x to stdin (format BINARY, on_error unsupported); ERROR: COPY ON_ERROR cannot be used with COPY TO LINE 1: COPY x to stdin (format BINARY, on_error unsupported); ^ +COPY x to stdout (log_verbosity 'unsupported'); +ERROR: COPY LOG_VERBOSITY "unsupported" not recognized +LINE 1: COPY x to stdout (log_verbosity 'unsupported'); + ^ -- too many columns in column list: should fail COPY x (a, b, c, d, e, d, c) from stdin; ERROR: column "d" specified more than once diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index afaaa37e52..4cd3ae577d 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -67,6 +67,7 @@ COPY x from stdin (force_null (a), force_null (b)); COPY x from stdin (convert_selectively (a), convert_selectively (b)); COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii'); COPY x from stdin (on_error ignore, on_error ignore); +COPY x from stdin (log_verbosity 'default', log_verbosity 'verbose'); -- incorrect options COPY x to stdin (format BINARY, delimiter ','); @@ -80,6 +81,7 @@ COPY x to stdin (format CSV, force_not_null(a)); COPY x to stdout (format TEXT, force_null(a)); COPY x to stdin (format CSV, force_null(a)); COPY x to stdin (format BINARY, on_error unsupported); +COPY x to stdout (log_verbosity 'unsupported'); -- too many columns in column list: should fail COPY x (a, b, c, d, e, d, c) from stdin; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index e2a0525dd4..2651a1f199 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -480,6 +480,7 @@ CopyFromState CopyFromStateData CopyHeaderChoice CopyInsertMethod +CopyLogVerbosityChoice CopyMultiInsertBuffer CopyMultiInsertInfo CopyOnErrorChoice -- 2.34.1
From 7b6fd050469cf26afa4ede08120b123b620a551a Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Date: Mon, 25 Mar 2024 11:00:59 +0000 Subject: [PATCH v9 2/2] Add detailed info when COPY skips soft errors This commit emits individual info like line number and column name when COPY skips soft errors. Because, the summary containing the total rows skipped isn't enough for the users to know what exactly are the malformed rows in the input data. Author: Bharath Rupireddy Reviewed-by: Michael Paquier, Masahiko Sawada Reviewed-by: Atsushi Torikoshi Discussion: https://www.postgresql.org/message-id/CALj2ACUk700cYhx1ATRQyRw-fBM%2BaRo6auRAitKGff7XNmYfqQ%40mail.gmail.com --- doc/src/sgml/ref/copy.sgml | 12 ++++++++-- src/backend/commands/copyfrom.c | 4 +--- src/backend/commands/copyfromparse.c | 35 ++++++++++++++++++++++++++++ src/include/commands/copy.h | 1 + src/test/regress/expected/copy2.out | 33 +++++++++++++++++++++++++- src/test/regress/sql/copy2.sql | 22 ++++++++++++++++- 6 files changed, 100 insertions(+), 7 deletions(-) diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 4c307efb54..ecbbf5f94a 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -401,8 +401,12 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable when the <literal>FORMAT</literal> is <literal>text</literal> or <literal>csv</literal>. </para> <para> - A <literal>NOTICE</literal> message containing the ignored row count is emitted at the end - of the <command>COPY FROM</command> if at least one row was discarded. + A <literal>NOTICE</literal> message containing the ignored row count is + emitted at the end of the <command>COPY FROM</command> if at least one + row was discarded. When <literal>LOG_VERBOSITY</literal> option is set to + <literal>verbose</literal>, a <literal>NOTICE</literal> message + containing the line of the input file and the column name whose input + conversion has failed is emitted for each discarded row. </para> </listitem> </varlistentry> @@ -429,6 +433,10 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable <literal>verbose</literal> can be used to emit more informative messages. <literal>default</literal> will not log any additional messages. </para> + <para> + This is currently used in <command>COPY FROM</command> command when + <literal>ON_ERROR</literal> is set to <literal>ignore</literal>. + </para> </listitem> </varlistentry> diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index 8908a440e1..fc5bc86ac7 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -101,8 +101,6 @@ typedef struct CopyMultiInsertInfo /* non-export function prototypes */ -static char *limit_printout_length(const char *str); - static void ClosePipeFromProgram(CopyFromState cstate); /* @@ -189,7 +187,7 @@ CopyFromErrorCallback(void *arg) * * Returns a pstrdup'd copy of the input. */ -static char * +char * limit_printout_length(const char *str) { #define MAX_COPY_DATA_DISPLAY 100 diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index 5682d5d054..01ab1de9bd 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -967,7 +967,42 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext, (Node *) cstate->escontext, &values[m])) { + Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP); + cstate->num_errors++; + + if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE) + { + /* + * Since we emit line number and column info in the below + * notice message, we suppress error context information + * other than the relation name. + */ + Assert(!cstate->relname_only); + cstate->relname_only = true; + + if (cstate->cur_attval) + { + char *attval; + + attval = limit_printout_length(cstate->cur_attval); + ereport(NOTICE, + errmsg("data type incompatibility at line %llu for column %s: \"%s\"", + (unsigned long long) cstate->cur_lineno, + cstate->cur_attname, + attval)); + pfree(attval); + } + else + ereport(NOTICE, + errmsg("data type incompatibility at line %llu for column %s: null input", + (unsigned long long) cstate->cur_lineno, + cstate->cur_attname)); + + /* reset relname_only */ + cstate->relname_only = false; + } + return true; } diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index 99d183fa4d..9c539772a5 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -107,6 +107,7 @@ extern bool NextCopyFrom(CopyFromState cstate, ExprContext *econtext, extern bool NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields); extern void CopyFromErrorCallback(void *arg); +extern char *limit_printout_length(const char *str); extern uint64 CopyFrom(CopyFromState cstate); diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index bb37a2ac70..d80d45dce4 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -737,8 +737,31 @@ CREATE TABLE check_ign_err (n int, m int[], k int); COPY check_ign_err FROM STDIN WITH (on_error stop); ERROR: invalid input syntax for type integer: "a" CONTEXT: COPY check_ign_err, line 2, column n: "a" -COPY check_ign_err FROM STDIN WITH (on_error ignore); +-- want context for notices +\set SHOW_CONTEXT always +COPY check_ign_err FROM STDIN WITH (on_error ignore, log_verbosity verbose); +NOTICE: data type incompatibility at line 2 for column n: "a" +CONTEXT: COPY check_ign_err +NOTICE: data type incompatibility at line 3 for column k: "3333333333" +CONTEXT: COPY check_ign_err +NOTICE: data type incompatibility at line 4 for column m: "{a, 4}" +CONTEXT: COPY check_ign_err +NOTICE: data type incompatibility at line 5 for column n: "" +CONTEXT: COPY check_ign_err +NOTICE: data type incompatibility at line 7 for column m: "a" +CONTEXT: COPY check_ign_err +NOTICE: data type incompatibility at line 8 for column k: "a" +CONTEXT: COPY check_ign_err NOTICE: 6 rows were skipped due to data type incompatibility +-- tests for on_error option with log_verbosity and null constraint via domain +CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL; +CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2); +COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity verbose); +NOTICE: data type incompatibility at line 2 for column l: null input +CONTEXT: COPY check_ign_err2 +NOTICE: 1 row was skipped due to data type incompatibility +-- reset context choice +\set SHOW_CONTEXT errors SELECT * FROM check_ign_err; n | m | k ---+-----+--- @@ -747,6 +770,12 @@ SELECT * FROM check_ign_err; 8 | {8} | 8 (3 rows) +SELECT * FROM check_ign_err2; + n | m | k | l +---+-----+---+------- + 1 | {1} | 1 | 'foo' +(1 row) + -- test datatype error that can't be handled as soft: should fail CREATE TABLE hard_err(foo widget); COPY hard_err FROM STDIN WITH (on_error ignore); @@ -775,6 +804,8 @@ DROP VIEW instead_of_insert_tbl_view; DROP VIEW instead_of_insert_tbl_view_2; DROP FUNCTION fun_instead_of_insert_tbl(); DROP TABLE check_ign_err; +DROP TABLE check_ign_err2; +DROP DOMAIN dcheck_ign_err2; DROP TABLE hard_err; -- -- COPY FROM ... DEFAULT diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index 4cd3ae577d..624d531fd6 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -510,7 +510,11 @@ a {2} 2 5 {5} 5 \. -COPY check_ign_err FROM STDIN WITH (on_error ignore); + +-- want context for notices +\set SHOW_CONTEXT always + +COPY check_ign_err FROM STDIN WITH (on_error ignore, log_verbosity verbose); 1 {1} 1 a {2} 2 3 {3} 3333333333 @@ -521,8 +525,22 @@ a {2} 2 7 {7} a 8 {8} 8 \. + +-- tests for on_error option with log_verbosity and null constraint via domain +CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL; +CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2); +COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity verbose); +1 {1} 1 'foo' +2 {2} 2 \N +\. + +-- reset context choice +\set SHOW_CONTEXT errors + SELECT * FROM check_ign_err; +SELECT * FROM check_ign_err2; + -- test datatype error that can't be handled as soft: should fail CREATE TABLE hard_err(foo widget); COPY hard_err FROM STDIN WITH (on_error ignore); @@ -554,6 +572,8 @@ DROP VIEW instead_of_insert_tbl_view; DROP VIEW instead_of_insert_tbl_view_2; DROP FUNCTION fun_instead_of_insert_tbl(); DROP TABLE check_ign_err; +DROP TABLE check_ign_err2; +DROP DOMAIN dcheck_ign_err2; DROP TABLE hard_err; -- -- 2.34.1