On 2024-07-24 19:43, torikoshia wrote:
On 2024-07-23 08:57, Michael Paquier wrote:
On Mon, Jul 22, 2024 at 03:07:46PM -0700, Masahiko Sawada wrote:
I'm slightly concerned that users might not want to see the NOTICE
message for every scan. Unlike COPY FROM, scanning a file via file_fdw
could be frequent.

Agreed.

Yeah, I also have some concerns regarding the noise that this could
produce if called on a foreign table on a regular basis.  The verbose
mode is disabled by default so I don't see why we should not allow it
if the relation owner wants to show it.

Perhaps we should first do a silence mode for log_verbosity to skip
the NOTICE produced at the end of the COPY FROM summarizing the whole?

I like this idea.
If there are no objections, I'm going to make a patch for this.

Attached patches.
0001 adds new option 'silent' to log_verbosity and 0002 adds on_error and log_verbosity options to file_fdw.


I'm going to continue developing the patch(e.g. add doc, measure
performance degradation) when people also think this feature is worth adding.

Here is a quick performance test result.

I loaded 1,000,000 rows using pgbench_accounts to a file and counted the number of rows using file_fdw on different conditions and compared the execution times on my laptop.

The changed conditions are below:
- source code: HEAD/patch applied
- data: no error data/all row occur data conversion error at the 1st column
- file_fdw options: on_error=stop/on_error=ignore

There seems no significant difference in performance between HEAD and the patch applied with on_error option specified as either ignore/stop when data has no error. OTOH when all rows occur data conversion error, it is significantly faster than other cases:

# HAED(e950fe58bd0)
## data:no error

=# create foreign table t1 (a int, b int, c int, t text) server f_fdw options (filename 'pgb_ac', format 'csv');
=# select count(*) from t1;

1567.569 ms
1675.112 ms
1555.782 ms
1547.676 ms
1660.221 ms

# patch applied
## data:no error, on_error:stop

=# create foreign table t1 (a int, b int, c int, t text) server f_fdw options (filename 'pgb_ac', format 'csv', on_error 'stop');
=# select count(*) from t1;

1580.656 ms
1623.784 ms
1596.947 ms
1652.307 ms
1613.607 ms

## data:no error, on_error:ignore

=# create foreign table t1 (a int, b int, c int, t text) server f_fdw options (filename 'pgb_ac', format 'csv', on_error 'ignore');
=# select count(*) from t1;

1575.718 ms
1597.464 ms
1596.540 ms
1665.818 ms
1595.453 ms

#### data:all rows contain error, on_error:ignore

=# create foreign table t1 (a int, b int, c int, t text) server f_fdw options (filename 'pgb_ac', format 'csv', on_error 'ignore');
=# select count(*) from t1;

914.537 ms
907.506 ms
912.768 ms
913.769 ms
914.327 ms


--
Regards,

--
Atsushi Torikoshi
NTT DATA Group Corporation
From c32e24d09679814469a31beedb99e37c7ad0e21a Mon Sep 17 00:00:00 2001
From: Atsushi Torikoshi <torikos...@oss.nttdata.com>
Date: Thu, 8 Aug 2024 15:49:51 +0900
Subject: [PATCH v3 1/2] Add log_verbosity to 'silent'

Currently when on_error is set to ignore, COPY logs a NOTICE message
containing the ignored row count.
When using on_error option to file_fdw, it'd be better to omit this
message in some cases, such as frequent access to a malformed file
via the same foreign table.
As a preliminary step to add on_error option to file_fdw, this patch
adds new value 'silent' to log_verbosity and enables to silence
the message.

---
 doc/src/sgml/ref/copy.sgml          | 8 ++++++--
 src/backend/commands/copy.c         | 4 +++-
 src/backend/commands/copyfrom.c     | 3 ++-
 src/include/commands/copy.h         | 5 +++--
 src/test/regress/expected/copy2.out | 4 +++-
 src/test/regress/sql/copy2.sql      | 4 ++++
 6 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 1518af8a04..3dbb1f7453 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -407,6 +407,8 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       <literal>verbose</literal>, a <literal>NOTICE</literal> message
       containing the line of the input file and the column name whose input
       conversion has failed is emitted for each discarded row.
+      When it is set to <literal>silent</literal>, no message is emitted
+      regarding ignored rows.
      </para>
     </listitem>
    </varlistentry>
@@ -428,8 +430,10 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
     <listitem>
      <para>
       Specify the amount of messages emitted by a <command>COPY</command>
-      command: <literal>default</literal> or <literal>verbose</literal>. If
-      <literal>verbose</literal> is specified, additional messages are emitted
+      command: <literal>silent</literal>, <literal>default</literal>, or
+      <literal>verbose</literal>.
+      <literal>silent</literal> excludes verbose messages.
+      If <literal>verbose</literal> is specified, additional messages are emitted
       during processing.
      </para>
      <para>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index df7a4a21c9..439bdbd83e 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -424,9 +424,11 @@ defGetCopyLogVerbosityChoice(DefElem *def, ParseState *pstate)
 	char	   *sval;
 
 	/*
-	 * Allow "default", or "verbose" values.
+	 * Allow "silent", "default", or "verbose" values.
 	 */
 	sval = defGetString(def);
+	if (pg_strcasecmp(sval, "silent") == 0)
+		return COPY_LOG_VERBOSITY_SILENT;
 	if (pg_strcasecmp(sval, "default") == 0)
 		return COPY_LOG_VERBOSITY_DEFAULT;
 	if (pg_strcasecmp(sval, "verbose") == 0)
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index c42a5621d5..37c5776ed0 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1318,7 +1318,8 @@ CopyFrom(CopyFromState cstate)
 	error_context_stack = errcallback.previous;
 
 	if (cstate->opts.on_error != COPY_ON_ERROR_STOP &&
-		cstate->num_errors > 0)
+		cstate->num_errors > 0 &&
+		cstate->opts.log_verbosity != COPY_LOG_VERBOSITY_SILENT)
 		ereport(NOTICE,
 				errmsg_plural("%llu row was skipped due to data type incompatibility",
 							  "%llu rows were skipped due to data type incompatibility",
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 141fd48dc1..0a7ef34376 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -45,8 +45,9 @@ typedef enum CopyOnErrorChoice
  */
 typedef enum CopyLogVerbosityChoice
 {
-	COPY_LOG_VERBOSITY_DEFAULT = 0, /* logs no additional messages, default */
-	COPY_LOG_VERBOSITY_VERBOSE, /* logs additional messages */
+	COPY_LOG_VERBOSITY_SILENT = -1,	/* logs none */
+	COPY_LOG_VERBOSITY_DEFAULT = 0,	/* logs no additional messages, default */
+	COPY_LOG_VERBOSITY_VERBOSE,	/* logs additional messages */
 } CopyLogVerbosityChoice;
 
 /*
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 931542f268..be3ddda03a 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -760,6 +760,7 @@ COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity verbose);
 NOTICE:  skipping row due to data type incompatibility at line 2 for column l: null input
 CONTEXT:  COPY check_ign_err2
 NOTICE:  1 row was skipped due to data type incompatibility
+COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity silent);
 -- reset context choice
 \set SHOW_CONTEXT errors
 SELECT * FROM check_ign_err;
@@ -774,7 +775,8 @@ SELECT * FROM check_ign_err2;
  n |  m  | k |   l   
 ---+-----+---+-------
  1 | {1} | 1 | 'foo'
-(1 row)
+ 3 | {3} | 3 | 'bar'
+(2 rows)
 
 -- test datatype error that can't be handled as soft: should fail
 CREATE TABLE hard_err(foo widget);
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index 8b14962194..fa6aa17344 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -533,6 +533,10 @@ COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity verbose);
 1	{1}	1	'foo'
 2	{2}	2	\N
 \.
+COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity silent);
+3	{3}	3	'bar'
+4	{4}	4	\N
+\.
 
 -- reset context choice
 \set SHOW_CONTEXT errors
-- 
2.39.2

From e950fe58bd043f8e378ff6d75773bb52855504f4 Mon Sep 17 00:00:00 2001
From: Atsushi Torikoshi <torikos...@oss.nttdata.com>
Date: Thu, 8 Aug 2024 15:51:35 +0900
Subject: [PATCH v3 2/2] Add on_error and log_verbosity options to file_fdw

9e2d870, b725b7e and f5a2278 introduced on_error and log_verbosity to COPY.
Since these options seems useful to file_fdw when accessing dirty data,
this patch adds them to file_fdw.
---
 contrib/file_fdw/expected/file_fdw.out | 18 ++++++++
 contrib/file_fdw/file_fdw.c            | 61 +++++++++++++++++++++++---
 contrib/file_fdw/sql/file_fdw.sql      |  6 +++
 doc/src/sgml/file-fdw.sgml             | 23 ++++++++++
 4 files changed, 101 insertions(+), 7 deletions(-)

diff --git a/contrib/file_fdw/expected/file_fdw.out b/contrib/file_fdw/expected/file_fdw.out
index 86c148a86b..2ea0deeb10 100644
--- a/contrib/file_fdw/expected/file_fdw.out
+++ b/contrib/file_fdw/expected/file_fdw.out
@@ -206,6 +206,24 @@ SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
 SELECT * FROM agg_bad;               -- ERROR
 ERROR:  invalid input syntax for type real: "aaa"
 CONTEXT:  COPY agg_bad, line 3, column b: "aaa"
+-- on_error and log_verbosity tests
+ALTER FOREIGN TABLE agg_bad OPTIONS (ADD on_error 'ignore');
+SELECT * FROM agg_bad;
+NOTICE:  1 row was skipped due to data type incompatibility
+  a  |   b    
+-----+--------
+ 100 | 99.097
+  42 | 324.78
+(2 rows)
+
+ALTER FOREIGN TABLE agg_bad OPTIONS (ADD log_verbosity 'silent');
+SELECT * FROM agg_bad;
+  a  |   b    
+-----+--------
+ 100 | 99.097
+  42 | 324.78
+(2 rows)
+
 -- misc query tests
 \t on
 SELECT explain_filter('EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_csv');
diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c
index 249d82d3a0..11fc75b4d7 100644
--- a/contrib/file_fdw/file_fdw.c
+++ b/contrib/file_fdw/file_fdw.c
@@ -22,8 +22,10 @@
 #include "catalog/pg_authid.h"
 #include "catalog/pg_foreign_table.h"
 #include "commands/copy.h"
+#include "commands/copyfrom_internal.h"
 #include "commands/defrem.h"
 #include "commands/explain.h"
+#include "commands/progress.h"
 #include "commands/vacuum.h"
 #include "foreign/fdwapi.h"
 #include "foreign/foreign.h"
@@ -34,6 +36,7 @@
 #include "optimizer/planmain.h"
 #include "optimizer/restrictinfo.h"
 #include "utils/acl.h"
+#include "utils/backend_progress.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
 #include "utils/sampling.h"
@@ -74,6 +77,8 @@ static const struct FileFdwOption valid_options[] = {
 	{"null", ForeignTableRelationId},
 	{"default", ForeignTableRelationId},
 	{"encoding", ForeignTableRelationId},
+	{"on_error", ForeignTableRelationId},
+	{"log_verbosity", ForeignTableRelationId},
 	{"force_not_null", AttributeRelationId},
 	{"force_null", AttributeRelationId},
 
@@ -724,12 +729,13 @@ fileIterateForeignScan(ForeignScanState *node)
 	ExprContext *econtext;
 	MemoryContext oldcontext;
 	TupleTableSlot *slot = node->ss.ss_ScanTupleSlot;
-	bool		found;
+	CopyFromState cstate = festate->cstate;
+	int64	skipped = 0;
 	ErrorContextCallback errcallback;
 
 	/* Set up callback to identify error line number. */
 	errcallback.callback = CopyFromErrorCallback;
-	errcallback.arg = (void *) festate->cstate;
+	errcallback.arg = (void *) cstate;
 	errcallback.previous = error_context_stack;
 	error_context_stack = &errcallback;
 
@@ -750,10 +756,40 @@ fileIterateForeignScan(ForeignScanState *node)
 	 * switch in case we are doing that.
 	 */
 	oldcontext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
-	found = NextCopyFrom(festate->cstate, econtext,
-						 slot->tts_values, slot->tts_isnull);
-	if (found)
+
+	for(;;)
+	{
+		if (!NextCopyFrom(cstate, econtext,
+					 slot->tts_values, slot->tts_isnull))
+			break;
+
+		if (cstate->opts.on_error != COPY_ON_ERROR_STOP &&
+			cstate->escontext->error_occurred)
+		{
+			/*
+			 * Soft error occurred, skip this tuple and deal with error
+			 * information according to ON_ERROR.
+			 */
+			if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
+
+				/*
+				 * Just make ErrorSaveContext ready for the next NextCopyFrom.
+				 * Since we don't set details_wanted and error_data is not to
+				 * be filled, just resetting error_occurred is enough.
+				 */
+				cstate->escontext->error_occurred = false;
+
+			/* Report that this tuple was skipped by the ON_ERROR clause */
+			pgstat_progress_update_param(PROGRESS_COPY_TUPLES_SKIPPED,
+										 ++skipped);
+
+			/* Repeat NextCopyFrom() until no soft error occurs */
+			continue;
+		}
+
 		ExecStoreVirtualTuple(slot);
+		break;
+	}
 
 	/* Switch back to original memory context */
 	MemoryContextSwitchTo(oldcontext);
@@ -795,8 +831,19 @@ fileEndForeignScan(ForeignScanState *node)
 	FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state;
 
 	/* if festate is NULL, we are in EXPLAIN; nothing to do */
-	if (festate)
-		EndCopyFrom(festate->cstate);
+	if (!festate)
+		return;
+
+	if (festate->cstate->opts.on_error != COPY_ON_ERROR_STOP &&
+		festate->cstate->num_errors > 0 &&
+		festate->cstate->opts.log_verbosity != COPY_LOG_VERBOSITY_SILENT)
+		ereport(NOTICE,
+				errmsg_plural("%llu row was skipped due to data type incompatibility",
+							  "%llu rows were skipped due to data type incompatibility",
+							  (unsigned long long) festate->cstate->num_errors,
+							  (unsigned long long) festate->cstate->num_errors));
+
+	EndCopyFrom(festate->cstate);
 }
 
 /*
diff --git a/contrib/file_fdw/sql/file_fdw.sql b/contrib/file_fdw/sql/file_fdw.sql
index f0548e14e1..a417067e54 100644
--- a/contrib/file_fdw/sql/file_fdw.sql
+++ b/contrib/file_fdw/sql/file_fdw.sql
@@ -150,6 +150,12 @@ SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
 -- error context report tests
 SELECT * FROM agg_bad;               -- ERROR
 
+-- on_error and log_verbosity tests
+ALTER FOREIGN TABLE agg_bad OPTIONS (ADD on_error 'ignore');
+SELECT * FROM agg_bad;
+ALTER FOREIGN TABLE agg_bad OPTIONS (ADD log_verbosity 'silent');
+SELECT * FROM agg_bad;
+
 -- misc query tests
 \t on
 SELECT explain_filter('EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_csv');
diff --git a/doc/src/sgml/file-fdw.sgml b/doc/src/sgml/file-fdw.sgml
index f2f2af9a59..bb3579b077 100644
--- a/doc/src/sgml/file-fdw.sgml
+++ b/doc/src/sgml/file-fdw.sgml
@@ -126,6 +126,29 @@
    </listitem>
   </varlistentry>
 
+  <varlistentry>
+   <term><literal>on_error</literal></term>
+
+   <listitem>
+    <para>
+     Specifies how to behave when encountering an error converting a column's
+     input value into its data type,
+     the same as <command>COPY</command>'s <literal>ON_ERROR</literal> option.
+    </para>
+   </listitem>
+  </varlistentry>
+
+  <varlistentry>
+   <term><literal>log_verbosity</literal></term>
+
+   <listitem>
+    <para>
+     Specifies the amount of messages emitted by <literal>file_fdw</literal>,
+     the same as <command>COPY</command>'s <literal>LOG_VERBOSITY</literal> option.
+    </para>
+   </listitem>
+  </varlistentry>
+
  </variablelist>
 
  <para>
-- 
2.39.2

Reply via email to