From ec7e2c671def498f894afb5d4e6467b7dff46e2b Mon Sep 17 00:00:00 2001
From: Shinya Kato <shinya11.kato@gmail.com>
Date: Thu, 26 Jun 2025 19:09:31 +0900
Subject: [PATCH v2] Add support for multi-line header skipping in COPY The
 HEADER option for COPY now accepts a non-negative integer value, allowing
 users to skip an arbitrary number of header lines when importing data with
 COPY FROM. For COPY TO, only 0 or 1 is allowed.

---
 doc/src/sgml/ref/copy.sgml           |  5 ++-
 src/backend/commands/copy.c          | 67 ++++++++++++++++++----------
 src/backend/commands/copyfromparse.c | 15 ++++---
 src/backend/commands/copyto.c        |  2 +-
 src/include/commands/copy.h          | 15 +++----
 src/test/regress/expected/copy.out   | 25 ++++++++++-
 src/test/regress/expected/copy2.out  |  6 +++
 src/test/regress/sql/copy.sql        | 30 +++++++++++++
 src/test/regress/sql/copy2.sql       |  3 ++
 src/tools/pgindent/typedefs.list     |  2 +-
 10 files changed, 130 insertions(+), 40 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 8433344e5b6..1869d74cdf1 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -37,7 +37,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
     DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
     NULL '<replaceable class="parameter">null_string</replaceable>'
     DEFAULT '<replaceable class="parameter">default_string</replaceable>'
-    HEADER [ <replaceable class="parameter">boolean</replaceable> | MATCH ]
+    HEADER [ <replaceable class="parameter">boolean</replaceable> | <replaceable class="parameter">integer</replaceable> | MATCH ]
     QUOTE '<replaceable class="parameter">quote_character</replaceable>'
     ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
     FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
@@ -307,6 +307,9 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       column in the file.  On output, the first line contains the column
       names from the table.  On input, the first line is discarded when this
       option is set to <literal>true</literal> (or equivalent Boolean value).
+      If this option is set to a non-negative integer value, specified number of
+      lines are discarded.  An integer value greater than 1 is not allowed for
+      <command>COPY TO</command> commands.
       If this option is set to <literal>MATCH</literal>, the number and names
       of the columns in the header line must match the actual column names of
       the table, in order;  otherwise an error is raised.
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 74ae42b19a7..d0150ffee4c 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -322,33 +322,46 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
 }
 
 /*
- * Extract a CopyHeaderChoice value from a DefElem.  This is like
- * defGetBoolean() but also accepts the special value "match".
+ * Extract a CopyHeaderOption value from a DefElem.
+ *
+ * Parses the HEADER option for COPY. Accepts boolean values, integer values
+ * (number of lines to skip, via skip_lines), or the special value "match".
+ *
+ * For COPY TO, boolean values and skip_lines 0 or 1 are allowed.
+ * For COPY FROM, boolean values, skip_lines >= 0, and "match" are allowed.
  */
-static CopyHeaderChoice
-defGetCopyHeaderChoice(DefElem *def, bool is_from)
+static CopyHeaderOption
+defGetCopyHeaderOption(DefElem *def, bool is_from)
 {
+	CopyHeaderOption option = {false, 1};
+
 	/*
 	 * If no parameter value given, assume "true" is meant.
 	 */
 	if (def->arg == NULL)
-		return COPY_HEADER_TRUE;
+		return option;
 
 	/*
-	 * Allow 0, 1, "true", "false", "on", "off", or "match".
+	 * Allow integer value, "true", "false", "on", "off", or "match".
 	 */
 	switch (nodeTag(def->arg))
 	{
 		case T_Integer:
-			switch (intVal(def->arg))
 			{
-				case 0:
-					return COPY_HEADER_FALSE;
-				case 1:
-					return COPY_HEADER_TRUE;
-				default:
-					/* otherwise, error out below */
-					break;
+				int skip_lines = intVal(def->arg);
+				if (skip_lines < 0)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+							 errmsg("%s requires a Boolean value, non-negative"
+								    "integer value, or \"match\"",
+									def->defname)));
+				if (!is_from && skip_lines > 1)
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("cannot use multi-line header in COPY TO")));
+
+				option.skip_lines = skip_lines;
+				return option;
 			}
 			break;
 		default:
@@ -360,13 +373,19 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
 				 * grammar's opt_boolean_or_string production.
 				 */
 				if (pg_strcasecmp(sval, "true") == 0)
-					return COPY_HEADER_TRUE;
+					return option;
 				if (pg_strcasecmp(sval, "false") == 0)
-					return COPY_HEADER_FALSE;
+				{
+					option.skip_lines = 0;
+					return option;
+				}
 				if (pg_strcasecmp(sval, "on") == 0)
-					return COPY_HEADER_TRUE;
+					return option;
 				if (pg_strcasecmp(sval, "off") == 0)
-					return COPY_HEADER_FALSE;
+				{
+					option.skip_lines = 0;
+					return option;
+				}
 				if (pg_strcasecmp(sval, "match") == 0)
 				{
 					if (!is_from)
@@ -374,16 +393,18 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
 								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 								 errmsg("cannot use \"%s\" with HEADER in COPY TO",
 										sval)));
-					return COPY_HEADER_MATCH;
+
+					option.match = true;
+					return option;
 				}
 			}
 			break;
 	}
 	ereport(ERROR,
 			(errcode(ERRCODE_SYNTAX_ERROR),
-			 errmsg("%s requires a Boolean value or \"match\"",
+			 errmsg("%s requires a Boolean value, integer value, or \"match\"",
 					def->defname)));
-	return COPY_HEADER_FALSE;	/* keep compiler quiet */
+	return option;	/* keep compiler quiet */
 }
 
 /*
@@ -566,7 +587,7 @@ ProcessCopyOptions(ParseState *pstate,
 			if (header_specified)
 				errorConflictingDefElem(defel, pstate);
 			header_specified = true;
-			opts_out->header_line = defGetCopyHeaderChoice(defel, is_from);
+			opts_out->header = defGetCopyHeaderOption(defel, is_from);
 		}
 		else if (strcmp(defel->defname, "quote") == 0)
 		{
@@ -769,7 +790,7 @@ ProcessCopyOptions(ParseState *pstate,
 				 errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim)));
 
 	/* Check header */
-	if (opts_out->binary && opts_out->header_line)
+	if (opts_out->binary && opts_out->header.skip_lines > 0)
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 		/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index f5fc346e201..912a2030cfa 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -771,23 +771,28 @@ static pg_attribute_always_inline bool
 NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)
 {
 	int			fldct;
-	bool		done;
+	bool 		done = false;
 
 	/* only available for text or csv input */
 	Assert(!cstate->opts.binary);
 
 	/* on input check that the header line is correct if needed */
-	if (cstate->cur_lineno == 0 && cstate->opts.header_line)
+	if (cstate->cur_lineno == 0 && cstate->opts.header.skip_lines)
 	{
 		ListCell   *cur;
 		TupleDesc	tupDesc;
 
 		tupDesc = RelationGetDescr(cstate->rel);
 
-		cstate->cur_lineno++;
-		done = CopyReadLine(cstate, is_csv);
+		for (int i = 0; i < cstate->opts.header.skip_lines; i++)
+		{
+			cstate->cur_lineno++;
+			done = CopyReadLine(cstate, is_csv);
+			if (done)
+				break;
+		}
 
-		if (cstate->opts.header_line == COPY_HEADER_MATCH)
+		if (cstate->opts.header.match)
 		{
 			int			fldnum;
 
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index ea6f18f2c80..d897d003a8d 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -199,7 +199,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
 														  cstate->file_encoding);
 
 	/* if a header has been requested send the line */
-	if (cstate->opts.header_line)
+	if (cstate->opts.header.skip_lines == 1)
 	{
 		ListCell   *cur;
 		bool		hdr_delim = false;
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 06dfdfef721..65b5ecc6af8 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -20,15 +20,14 @@
 #include "tcop/dest.h"
 
 /*
- * Represents whether a header line should be present, and whether it must
- * match the actual names (which implies "true").
+ * Represents how many lines to skip, and whether it must match the actual names
+ * (which implies "true").
  */
-typedef enum CopyHeaderChoice
+typedef struct CopyHeaderOption
 {
-	COPY_HEADER_FALSE = 0,
-	COPY_HEADER_TRUE,
-	COPY_HEADER_MATCH,
-} CopyHeaderChoice;
+	bool	match;		/* header line must match actual names? */
+	int		skip_lines;	/* number of lines to skip before data */
+} CopyHeaderOption;
 
 /*
  * Represents where to save input processing errors.  More values to be added
@@ -64,7 +63,7 @@ typedef struct CopyFormatOptions
 	bool		binary;			/* binary format? */
 	bool		freeze;			/* freeze rows on loading? */
 	bool		csv_mode;		/* Comma Separated Value format? */
-	CopyHeaderChoice header_line;	/* header line? */
+	CopyHeaderOption header;	/* header line? */
 	char	   *null_print;		/* NULL marker string (server encoding!) */
 	int			null_print_len; /* length of same */
 	char	   *null_print_client;	/* same converted to file encoding */
diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out
index 8d5a06563c4..03b363eaf34 100644
--- a/src/test/regress/expected/copy.out
+++ b/src/test/regress/expected/copy.out
@@ -81,6 +81,29 @@ copy copytest4 to stdout (header);
 c1	colname with tab: \t
 1	a
 2	b
+-- test multi-line header line feature
+create temp table copytest5 (c1 int);
+copy copytest5 from stdin (format csv, header 2);
+copy copytest5 to stdout (header);
+c1
+1
+2
+truncate copytest5;
+copy copytest5 from stdin (format csv, header 4);
+select count(*) from copytest5;
+ count 
+-------
+     0
+(1 row)
+
+truncate copytest5;
+copy copytest5 from stdin (format csv, header 5);
+select count(*) from copytest5;
+ count 
+-------
+     0
+(1 row)
+
 -- test copy from with a partitioned table
 create table parted_copytest (
 	a int,
@@ -224,7 +247,7 @@ alter table header_copytest add column c text;
 copy header_copytest to stdout with (header match);
 ERROR:  cannot use "match" with HEADER in COPY TO
 copy header_copytest from stdin with (header wrong_choice);
-ERROR:  header requires a Boolean value or "match"
+ERROR:  header requires a Boolean value, integer value, or "match"
 -- works
 copy header_copytest from stdin with (header match);
 copy header_copytest (c, a, b) from stdin with (header match);
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 64ea33aeae8..e514110b13d 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -132,6 +132,12 @@ COPY x from stdin with (reject_limit 1);
 ERROR:  COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE
 COPY x from stdin with (on_error ignore, reject_limit 0);
 ERROR:  REJECT_LIMIT (0) must be greater than zero
+COPY x from stdin with (header -1);
+ERROR:  header requires a Boolean value, non-negativeinteger value, or "match"
+COPY x from stdin with (header 2.5);
+ERROR:  header requires a Boolean value, integer value, or "match"
+COPY x to stdout with (header 2);
+ERROR:  cannot use multi-line header in COPY TO
 -- too many columns in column list: should fail
 COPY x (a, b, c, d, e, d, c) from stdin;
 ERROR:  column "d" specified more than once
diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql
index f0b88a23db8..a1316c73bac 100644
--- a/src/test/regress/sql/copy.sql
+++ b/src/test/regress/sql/copy.sql
@@ -94,6 +94,36 @@ this is just a line full of junk that would error out if parsed
 
 copy copytest4 to stdout (header);
 
+-- test multi-line header line feature
+
+create temp table copytest5 (c1 int);
+
+copy copytest5 from stdin (format csv, header 2);
+this is a first header line.
+this is a second header line.
+1
+2
+\.
+copy copytest5 to stdout (header);
+
+truncate copytest5;
+copy copytest5 from stdin (format csv, header 4);
+this is a first header line.
+this is a second header line.
+1
+2
+\.
+select count(*) from copytest5;
+
+truncate copytest5;
+copy copytest5 from stdin (format csv, header 5);
+this is a first header line.
+this is a second header line.
+1
+2
+\.
+select count(*) from copytest5;
+
 -- test copy from with a partitioned table
 create table parted_copytest (
 	a int,
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index 45273557ce0..cef45868db5 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -90,6 +90,9 @@ COPY x to stdout (format BINARY, on_error unsupported);
 COPY x from stdin (log_verbosity unsupported);
 COPY x from stdin with (reject_limit 1);
 COPY x from stdin with (on_error ignore, reject_limit 0);
+COPY x from stdin with (header -1);
+COPY x from stdin with (header 2.5);
+COPY x to stdout with (header 2);
 
 -- too many columns in column list: should fail
 COPY x (a, b, c, d, e, d, c) from stdin;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 32d6e718adc..49a76fac9ad 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -521,7 +521,7 @@ CopyFormatOptions
 CopyFromRoutine
 CopyFromState
 CopyFromStateData
-CopyHeaderChoice
+CopyHeaderOption
 CopyInsertMethod
 CopyLogVerbosityChoice
 CopyMethod
-- 
2.39.3

