From ce27697f0b15b6b06a658128ba1e88c11bd0512c Mon Sep 17 00:00:00 2001
From: Joel Jacobson <joel@compiler.org>
Date: Sun, 13 Oct 2024 21:01:53 +0200
Subject: [PATCH 5/5] Add "raw" COPY format support for unstructured text data.

This commit introduces a new format option to the COPY command, enabling
the import and export of unstructured text data where each line is treated as a
single field without any delimiters.
---
 doc/src/sgml/ref/copy.sgml           |  98 ++++++++++++-
 src/backend/commands/copy.c          |  39 +++--
 src/backend/commands/copyfrom.c      |   7 +
 src/backend/commands/copyfromparse.c | 204 ++++++++++++++++++++++++++-
 src/backend/commands/copyto.c        |  70 ++++++++-
 src/backend/parser/gram.y            |   8 +-
 src/include/commands/copy.h          |   1 +
 src/include/parser/kwlist.h          |   1 +
 src/test/regress/expected/copy.out   | 123 ++++++++++++++++
 src/test/regress/expected/copy2.out  |  82 ++++++++++-
 src/test/regress/sql/copy.sql        |  70 +++++++++
 src/test/regress/sql/copy2.sql       |  67 ++++++++-
 12 files changed, 741 insertions(+), 29 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 8394402f09..06ca632ee3 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -218,8 +218,9 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
      <para>
       Selects the data format to be read or written:
       <literal>text</literal>,
-      <literal>csv</literal> (Comma Separated Values),
-      or <literal>binary</literal>.
+      <literal>CSV</literal> (Comma Separated Values),
+      <literal>binary</literal>,
+      or <literal>raw</literal>
       The default is <literal>text</literal>.
       See <xref linkend="sql-copy-file-formats"/> below for details.
      </para>
@@ -257,7 +258,8 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       (line) of the file.  The default is a tab character in text format,
       a comma in <literal>CSV</literal> format.
       This must be a single one-byte character.
-      This option is not allowed when using <literal>binary</literal> format.
+      This option is allowed only when using <literal>text</literal> or
+      <literal>CSV</literal> format.
      </para>
     </listitem>
    </varlistentry>
@@ -271,7 +273,8 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       string in <literal>CSV</literal> format. You might prefer an
       empty string even in text format for cases where you don't want to
       distinguish nulls from empty strings.
-      This option is not allowed when using <literal>binary</literal> format.
+      This option is allowed only when using <literal>text</literal> or
+      <literal>CSV</literal> format.
      </para>
 
      <note>
@@ -294,7 +297,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       is found in the input file, the default value of the corresponding column
       will be used.
       This option is allowed only in <command>COPY FROM</command>, and only when
-      not using <literal>binary</literal> format.
+      using <literal>text</literal> or <literal>CSV</literal> format.
      </para>
     </listitem>
    </varlistentry>
@@ -400,7 +403,8 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
      </para>
      <para>
       The <literal>ignore</literal> option is applicable only for <command>COPY FROM</command>
-      when the <literal>FORMAT</literal> is <literal>text</literal> or <literal>csv</literal>.
+      when the <literal>FORMAT</literal> is <literal>text</literal>,
+      <literal>CSV</literal> or <literal>raw</literal>.
      </para>
      <para>
       A <literal>NOTICE</literal> message containing the ignored row count is
@@ -893,6 +897,88 @@ COPY <replaceable class="parameter">count</replaceable>
 
   </refsect2>
 
+  <refsect2>
+   <title>Raw Format</title>
+
+   <para>
+    This format option is used for importing and exporting files containing
+    unstructured text, where each line is treated as a single field. It is
+    ideal for data that does not conform to a structured, tabular format and
+    lacks delimiters.
+   </para>
+
+   <para>
+    In the <literal>raw</literal> format, each line of the input or output is
+    considered a complete value without any field separation. There are no
+    field delimiters, and all characters are taken literally. There is no
+    special handling for quotes, backslashes, or escape sequences. All
+    characters, including whitespace and special characters, are preserved
+    exactly as they appear in the file. However, it's important to note that
+    the text is still interpreted according to the specified <literal>ENCODING</literal>
+    option or the current client encoding for input, and encoded using the
+    specified <literal>ENCODING</literal> or the current client encoding for output.
+   </para>
+
+   <para>
+    When using this format, the <command>COPY</command> command must specify
+    exactly one column. Specifying multiple columns will result in an error.
+    If the table has multiple columns and no column list is provided, an error
+    will occur.
+   </para>
+
+   <para>
+    The <literal>raw</literal> format does not distinguish a <literal>NULL</literal>
+    value from an empty string. Empty lines are imported as empty strings, not
+    as <literal>NULL</literal> values.
+   </para>
+
+   <para>
+    Encoding works the same as in the <literal>text</literal> and <literal>CSV</literal> formats.
+   </para>
+
+  </refsect2>
+
+  <refsect2>
+   <title>Raw Format</title>
+
+   <para>
+    This format option is used for importing and exporting files containing
+    unstructured text, where each line is treated as a single field. It is
+    ideal for data that does not conform to a structured, tabular format and
+    lacks delimiters.
+   </para>
+
+   <para>
+    In the <literal>raw</literal> format, each line of the input or output is
+    considered a complete value without any field separation. There are no
+    field delimiters, and all characters are taken literally. There is no
+    special handling for quotes, backslashes, or escape sequences. All
+    characters, including whitespace and special characters, are preserved
+    exactly as they appear in the file. However, it's important to note that
+    the text is still interpreted according to the specified <literal>ENCODING</literal>
+    option or the current client encoding for input, and encoded using the
+    specified <literal>ENCODING</literal> or the current client encoding for output.
+   </para>
+
+   <para>
+    When using this format, the <command>COPY</command> command must specify
+    exactly one column. Specifying multiple columns will result in an error.
+    If the table has multiple columns and no column list is provided, an error
+    will occur.
+   </para>
+
+   <para>
+    The <literal>raw</literal> format does not distinguish a <literal>NULL</literal>
+    value from an empty string. Empty lines are imported as empty strings, not
+    as <literal>NULL</literal> values.
+   </para>
+
+   <para>
+    Encoding works the same as in the <literal>text</literal> and <literal>CSV</literal> formats.
+   </para>
+
+  </refsect2>
+
   <refsect2 id="sql-copy-binary-format" xreflabel="Binary Format">
    <title>Binary Format</title>
 
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 493ca5f487..b71161ad99 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -516,6 +516,8 @@ ProcessCopyOptions(ParseState *pstate,
 				opts_out->format = COPY_FORMAT_CSV;
 			else if (strcmp(fmt, "binary") == 0)
 				opts_out->format = COPY_FORMAT_BINARY;
+			else if (strcmp(fmt, "raw") == 0)
+				opts_out->format = COPY_FORMAT_RAW;
 			else
 				ereport(ERROR,
 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -710,6 +712,12 @@ ProcessCopyOptions(ParseState *pstate,
 			/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
 					errmsg("cannot specify %s in BINARY mode", "DELIMITER")));
 
+		if (opts_out->format == COPY_FORMAT_RAW)
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+			/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
+					errmsg("cannot specify %s in RAW mode", "DELIMITER")));
+
 		/* Only single-byte delimiter strings are supported. */
 		if (strlen(opts_out->delim) != 1)
 			ereport(ERROR,
@@ -740,11 +748,11 @@ ProcessCopyOptions(ParseState *pstate,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim)));
 	}
-	else if (opts_out->format != COPY_FORMAT_BINARY)
-	{
-		/* Set default delimiter */
-		opts_out->delim = opts_out->format == COPY_FORMAT_CSV ? "," : "\t";
-	}
+	/* Set default delimiter */
+	else if (opts_out->format == COPY_FORMAT_CSV)
+		opts_out->delim = ",";
+	else if (opts_out->format == COPY_FORMAT_TEXT)
+		opts_out->delim = "\t";
 
 	/* --- NULL option --- */
 	if (opts_out->null_print)
@@ -754,6 +762,11 @@ ProcessCopyOptions(ParseState *pstate,
 					(errcode(ERRCODE_SYNTAX_ERROR),
 					errmsg("cannot specify %s in BINARY mode", "NULL")));
 
+		if (opts_out->format == COPY_FORMAT_RAW)
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					errmsg("cannot specify %s in RAW mode", "NULL")));
+
 		/* Disallow end-of-line characters */
 		if (strchr(opts_out->null_print, '\r') != NULL ||
 			strchr(opts_out->null_print, '\n') != NULL)
@@ -761,11 +774,12 @@ ProcessCopyOptions(ParseState *pstate,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					errmsg("COPY null representation cannot use newline or carriage return")));
 	}
-	else if (opts_out->format != COPY_FORMAT_BINARY)
-	{
-		/* Set default null_print */
-		opts_out->null_print = opts_out->format == COPY_FORMAT_CSV ? "" : "\\N";
-	}
+	/* Set default null_print */
+	else if (opts_out->format == COPY_FORMAT_CSV)
+		opts_out->null_print = "";
+	else if (opts_out->format == COPY_FORMAT_TEXT)
+		opts_out->null_print = "\\N";
+
 	if (opts_out->null_print)
 		opts_out->null_print_len = strlen(opts_out->null_print);
 
@@ -925,6 +939,11 @@ ProcessCopyOptions(ParseState *pstate,
 					(errcode(ERRCODE_SYNTAX_ERROR),
 					 errmsg("cannot specify %s in BINARY mode", "DEFAULT")));
 
+		if (opts_out->format == COPY_FORMAT_RAW)
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("cannot specify %s in RAW mode", "DEFAULT")));
+
 		/* Assert options have been set (defaults applied if not specified) */
 		Assert(opts_out->delim);
 		Assert(opts_out->quote);
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index f350a4ff97..99dcb00f8a 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1438,6 +1438,13 @@ BeginCopyFrom(ParseState *pstate,
 	/* Generate or convert list of attributes to process */
 	cstate->attnumlist = CopyGetAttnums(tupDesc, cstate->rel, attnamelist);
 
+	/* Enforce single column requirement for RAW format */
+	if (cstate->opts.format == COPY_FORMAT_RAW &&
+		list_length(cstate->attnumlist) != 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				errmsg("COPY with format 'raw' must specify exactly one column")));
+
 	num_phys_attrs = tupDesc->natts;
 
 	/* Convert FORCE_NOT_NULL name list to per-column flags, check validity */
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 50bb4b7750..2528c6f111 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -7,7 +7,7 @@
  * formats.  The main entry point is NextCopyFrom(), which parses the
  * next input line and returns it as Datums.
  *
- * In text/CSV mode, the parsing happens in multiple stages:
+ * In text/CSV/raw mode, the parsing happens in multiple stages:
  *
  * [data source] --> raw_buf --> input_buf --> line_buf --> attribute_buf
  *                1.          2.            3.           4.
@@ -25,7 +25,7 @@
  *    is copied into 'line_buf', with quotes and escape characters still
  *    intact.
  *
- * 4. CopyReadAttributesText/CSV() function takes the input line from
+ * 4. CopyReadAttributesText/CSV/Raw() function takes the input line from
  *    'line_buf', and splits it into fields, unescaping the data as required.
  *    The fields are stored in 'attribute_buf', and 'raw_fields' array holds
  *    pointers to each field.
@@ -143,8 +143,10 @@ static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
 /* non-export function prototypes */
 static bool CopyReadLine(CopyFromState cstate);
 static bool CopyReadLineText(CopyFromState cstate);
+static bool CopyReadLineRawText(CopyFromState cstate);
 static int	CopyReadAttributesText(CopyFromState cstate);
 static int	CopyReadAttributesCSV(CopyFromState cstate);
+static int	CopyReadAttributesRaw(CopyFromState cstate);
 static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
 									 Oid typioparam, int32 typmod,
 									 bool *isnull);
@@ -732,7 +734,7 @@ CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
 }
 
 /*
- * Read raw fields in the next line for COPY FROM in text or csv mode.
+ * Read raw fields in the next line for COPY FROM in text, csv, or raw mode.
  * Return false if no more lines.
  *
  * An internal temporary buffer is returned via 'fields'. It is valid until
@@ -748,7 +750,7 @@ NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
 	int			fldct;
 	bool		done;
 
-	/* only available for text or csv input */
+	/* only available for text, csv, or raw input */
 	Assert(cstate->opts.format != COPY_FORMAT_BINARY);
 
 	/* on input check that the header line is correct if needed */
@@ -768,8 +770,15 @@ NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
 
 			if (cstate->opts.format == COPY_FORMAT_CSV)
 				fldct = CopyReadAttributesCSV(cstate);
-			else
+			else if (cstate->opts.format == COPY_FORMAT_TEXT)
 				fldct = CopyReadAttributesText(cstate);
+			else if (cstate->opts.format == COPY_FORMAT_RAW)
+				fldct = CopyReadAttributesRaw(cstate);
+			else
+			{
+				elog(ERROR, "unexpected COPY format: %d", cstate->opts.format);
+				pg_unreachable();
+			}
 
 			if (fldct != list_length(cstate->attnumlist))
 				ereport(ERROR,
@@ -823,8 +832,15 @@ NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
 	/* Parse the line into de-escaped field values */
 	if (cstate->opts.format == COPY_FORMAT_CSV)
 		fldct = CopyReadAttributesCSV(cstate);
-	else
+	else if (cstate->opts.format == COPY_FORMAT_TEXT)
 		fldct = CopyReadAttributesText(cstate);
+	else if (cstate->opts.format == COPY_FORMAT_RAW)
+		fldct = CopyReadAttributesRaw(cstate);
+	else
+	{
+		elog(ERROR, "unexpected COPY format: %d", cstate->opts.format);
+		pg_unreachable();
+	}
 
 	*fields = cstate->raw_fields;
 	*nfields = fldct;
@@ -1096,7 +1112,10 @@ CopyReadLine(CopyFromState cstate)
 	cstate->line_buf_valid = false;
 
 	/* Parse data and transfer into line_buf */
-	result = CopyReadLineText(cstate);
+	if (cstate->opts.format == COPY_FORMAT_RAW)
+		result = CopyReadLineRawText(cstate);
+	else
+		result = CopyReadLineText(cstate);
 
 	if (result)
 	{
@@ -1462,6 +1481,138 @@ CopyReadLineText(CopyFromState cstate)
 	return result;
 }
 
+/*
+ * CopyReadLineRawText - inner loop of CopyReadLine for raw text mode
+ */
+static bool
+CopyReadLineRawText(CopyFromState cstate)
+{
+	char	   *copy_input_buf;
+	int			input_buf_ptr;
+	int			copy_buf_len;
+	bool		need_data = false;
+	bool		hit_eof = false;
+	bool		result = false;
+
+	/*
+	 * The objective of this loop is to transfer the entire next input line
+	 * into line_buf. We only care for detecting newlines (\r and/or \n).
+	 * All other characters are treated as regular data.
+	 *
+	 * For speed, we try to move data from input_buf to line_buf in chunks
+	 * rather than one character at a time.  input_buf_ptr points to the next
+	 * character to examine; any characters from input_buf_index to
+	 * input_buf_ptr have been determined to be part of the line, but not yet
+	 * transferred to line_buf.
+	 *
+	 * For a little extra speed within the loop, we copy input_buf and
+	 * input_buf_len into local variables.
+	 */
+	copy_input_buf = cstate->input_buf;
+	input_buf_ptr = cstate->input_buf_index;
+	copy_buf_len = cstate->input_buf_len;
+
+	for (;;)
+	{
+		int			prev_raw_ptr;
+		char		c;
+
+		/*
+		* Load more data if needed.
+		*/
+		if (input_buf_ptr >= copy_buf_len || need_data)
+		{
+			REFILL_LINEBUF;
+
+			CopyLoadInputBuf(cstate);
+			/* update our local variables */
+			hit_eof = cstate->input_reached_eof;
+			input_buf_ptr = cstate->input_buf_index;
+			copy_buf_len = cstate->input_buf_len;
+
+			/*
+			* If we are completely out of data, break out of the loop,
+			* reporting EOF.
+			*/
+			if (INPUT_BUF_BYTES(cstate) <= 0)
+			{
+				result = true;
+				break;
+			}
+			need_data = false;
+		}
+
+		/* OK to fetch a character */
+		prev_raw_ptr = input_buf_ptr;
+		c = copy_input_buf[input_buf_ptr++];
+
+		/* Process \r */
+		if (c == '\r')
+		{
+			/* Check for \r\n on first line, _and_ handle \r\n. */
+			if (cstate->eol_type == EOL_UNKNOWN ||
+				cstate->eol_type == EOL_CRNL)
+			{
+				/*
+				 * If need more data, go back to loop top to load it.
+				 *
+				 * Note that if we are at EOF, c will wind up as '\0' because
+				 * of the guaranteed pad of input_buf.
+				 */
+				IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
+
+				/* get next char */
+				c = copy_input_buf[input_buf_ptr];
+
+				if (c == '\n')
+				{
+					input_buf_ptr++;	/* eat newline */
+					cstate->eol_type = EOL_CRNL;	/* in case not set yet */
+				}
+				else
+				{
+					if (cstate->eol_type == EOL_CRNL)
+						ereport(ERROR,
+								(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+								errmsg("inconsistent newline style")));
+					/*
+					* if we got here, it is the first line and we didn't find
+					* \n, so don't consume the peeked character
+					*/
+					cstate->eol_type = EOL_CR;
+				}
+			}
+			else if (cstate->eol_type == EOL_NL)
+				ereport(ERROR,
+						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+						errmsg("inconsistent newline style")));
+			/* If reach here, we have found the line terminator */
+			break;
+		}
+
+		/* Process \n */
+		if (c == '\n')
+		{
+			if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
+				ereport(ERROR,
+						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+						errmsg("inconsistent newline style")));
+			cstate->eol_type = EOL_NL;	/* in case not set yet */
+			/* If reach here, we have found the line terminator */
+			break;
+		}
+
+		/* All other characters are treated as regular data */
+	}							/* end of outer loop */
+
+	/*
+	 * Transfer any still-uncopied data to line_buf.
+	 */
+	REFILL_LINEBUF;
+
+	return result;
+}
+
 /*
  *	Return decimal value for a hexadecimal digit
  */
@@ -1938,6 +2089,45 @@ endfield:
 	return fieldno;
 }
 
+/*
+ * Parse the current line as a single attribute for the "raw" COPY format.
+ * No parsing, quoting, or escaping is performed.
+ * Empty lines are treated as empty strings, not NULL.
+ */
+static int
+CopyReadAttributesRaw(CopyFromState cstate)
+{
+	/* Enforce single column requirement */
+	if (cstate->max_fields != 1)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					errmsg("COPY with format 'raw' requires exactly one column")));
+	}
+
+	resetStringInfo(&cstate->attribute_buf);
+
+	/*
+	 * The attribute will certainly not be longer than the input
+	 * data line, so we can just force attribute_buf to be large enough and
+	 * then transfer data without any checks for enough space.  We need to do
+	 * it this way because enlarging attribute_buf mid-stream would invalidate
+	 * pointers already stored into cstate->raw_fields[].
+	 */
+	if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
+		enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
+
+	/* Copy the entire line into attribute_buf */
+	memcpy(cstate->attribute_buf.data, cstate->line_buf.data,
+		   cstate->line_buf.len);
+	cstate->attribute_buf.data[cstate->line_buf.len] = '\0';
+	cstate->attribute_buf.len = cstate->line_buf.len;
+
+	/* Assign the single field to raw_fields[0] */
+	cstate->raw_fields[0] = cstate->attribute_buf.data;
+
+	return 1;
+}
 
 /*
  * Read a binary attribute
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index 78531ae846..99fd68a483 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -113,6 +113,7 @@ static void CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot);
 static void CopyAttributeOutText(CopyToState cstate, const char *string);
 static void CopyAttributeOutCSV(CopyToState cstate, const char *string,
 								bool use_quote);
+static void CopyAttributeOutRaw(CopyToState cstate, const char *string);
 
 /* Low-level communications functions */
 static void SendCopyBegin(CopyToState cstate);
@@ -570,6 +571,13 @@ BeginCopyTo(ParseState *pstate,
 	/* Generate or convert list of attributes to process */
 	cstate->attnumlist = CopyGetAttnums(tupDesc, cstate->rel, attnamelist);
 
+	/* Enforce single column requirement for RAW format */
+	if (cstate->opts.format == COPY_FORMAT_RAW &&
+		list_length(cstate->attnumlist) != 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					errmsg("COPY with format 'raw' must specify exactly one column")));
+
 	num_phys_attrs = tupDesc->natts;
 
 	/* Convert FORCE_QUOTE name list to per-column flags, check validity */
@@ -835,8 +843,10 @@ DoCopyTo(CopyToState cstate)
 
 				if (cstate->opts.format == COPY_FORMAT_CSV)
 					CopyAttributeOutCSV(cstate, colname, false);
-				else
+				else if (cstate->opts.format == COPY_FORMAT_TEXT)
 					CopyAttributeOutText(cstate, colname);
+				else if (cstate->opts.format == COPY_FORMAT_RAW)
+					CopyAttributeOutRaw(cstate, colname);
 			}
 
 			CopySendEndOfRow(cstate);
@@ -917,7 +927,8 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
 	/* Make sure the tuple is fully deconstructed */
 	slot_getallattrs(slot);
 
-	if (cstate->opts.format != COPY_FORMAT_BINARY)
+	if (cstate->opts.format == COPY_FORMAT_TEXT ||
+		cstate->opts.format == COPY_FORMAT_CSV)
 	{
 		bool		need_delim = false;
 
@@ -945,7 +956,7 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
 			}
 		}
 	}
-	else
+	else if (cstate->opts.format == COPY_FORMAT_BINARY)
 	{
 		foreach_int(attnum, cstate->attnumlist)
 		{
@@ -965,6 +976,37 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
 			}
 		}
 	}
+	else if (cstate->opts.format == COPY_FORMAT_RAW)
+	{
+		int			attnum;
+		Datum		value;
+		bool		isnull;
+
+		/* Ensure only one column is being copied */
+		if (list_length(cstate->attnumlist) != 1)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("COPY with format 'raw' must specify exactly one column")));
+
+		attnum = linitial_int(cstate->attnumlist);
+		value = slot->tts_values[attnum - 1];
+		isnull = slot->tts_isnull[attnum - 1];
+
+		if (!isnull)
+		{
+			char	   *string = OutputFunctionCall(&out_functions[attnum - 1],
+													value);
+			CopyAttributeOutRaw(cstate, string);
+		}
+		/* For RAW format, we don't send anything for NULL values */
+	}
+	else
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("Unsupported COPY format")));
+	}
+
 
 	CopySendEndOfRow(cstate);
 
@@ -1219,6 +1261,28 @@ CopyAttributeOutCSV(CopyToState cstate, const char *string,
 	}
 }
 
+/*
+ * Send text representation of one attribute for RAW format.
+ */
+static void
+CopyAttributeOutRaw(CopyToState cstate, const char *string)
+{
+	const char *ptr;
+
+	/* Ensure the format is RAW */
+	Assert(cstate->opts.format == COPY_FORMAT_RAW);
+
+	/* Ensure exactly one column is being processed */
+	Assert(list_length(cstate->attnumlist) == 1);
+
+	if (cstate->need_transcoding)
+		ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
+	else
+		ptr = string;
+
+	CopySendString(cstate, ptr);
+}
+
 /*
  * copy_dest_startup --- executor startup
  */
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 4aa8646af7..0d0a3ad7ff 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -768,7 +768,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 
 	QUOTE QUOTES
 
-	RANGE READ REAL REASSIGN RECURSIVE REF_P REFERENCES REFERENCING
+	RANGE RAW READ REAL REASSIGN RECURSIVE REF_P REFERENCES REFERENCING
 	REFRESH REINDEX RELATIVE_P RELEASE RENAME REPEATABLE REPLACE REPLICA
 	RESET RESTART RESTRICT RETURN RETURNING RETURNS REVOKE RIGHT ROLE ROLLBACK ROLLUP
 	ROUTINE ROUTINES ROW ROWS RULE
@@ -3513,6 +3513,10 @@ copy_opt_item:
 				{
 					$$ = makeDefElem("encoding", (Node *) makeString($2), @1);
 				}
+			| RAW
+				{
+					$$ = makeDefElem("format", (Node *) makeString("raw"), @1);
+				}
 		;
 
 /* The following exist for backward compatibility with very old versions */
@@ -17771,6 +17775,7 @@ unreserved_keyword:
 			| QUOTE
 			| QUOTES
 			| RANGE
+			| RAW
 			| READ
 			| REASSIGN
 			| RECURSIVE
@@ -18398,6 +18403,7 @@ bare_label_keyword:
 			| QUOTE
 			| QUOTES
 			| RANGE
+			| RAW
 			| READ
 			| REAL
 			| REASSIGN
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index e700fd01b5..04f7548ef4 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -59,6 +59,7 @@ typedef enum CopyFormat
 	COPY_FORMAT_TEXT,
 	COPY_FORMAT_BINARY,
 	COPY_FORMAT_CSV,
+	COPY_FORMAT_RAW,
 } CopyFormat;
 
 /*
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index 899d64ad55..02cd28c750 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -360,6 +360,7 @@ PG_KEYWORD("publication", PUBLICATION, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("quote", QUOTE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("quotes", QUOTES, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("range", RANGE, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("raw", RAW, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("read", READ, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("real", REAL, COL_NAME_KEYWORD, BARE_LABEL)
 PG_KEYWORD("reassign", REASSIGN, UNRESERVED_KEYWORD, BARE_LABEL)
diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out
index f554d42c84..b11cabd993 100644
--- a/src/test/regress/expected/copy.out
+++ b/src/test/regress/expected/copy.out
@@ -325,3 +325,126 @@ SELECT tableoid::regclass, id % 2 = 0 is_even, count(*) from parted_si GROUP BY
 (2 rows)
 
 DROP TABLE parted_si;
+-- Test COPY FORMAT raw
+\set filename :abs_builddir '/results/copy_raw_test.data'
+CREATE TABLE copy_raw_test (id SERIAL PRIMARY KEY, col text);
+INSERT INTO copy_raw_test (col) VALUES
+(E'",\\'), (E'\\.'), (NULL), (''), (' '), (E'\n'), ('test');
+COPY copy_raw_test (col) TO :'filename' (FORMAT raw);
+TRUNCATE copy_raw_test;
+COPY copy_raw_test (col) FROM :'filename' (FORMAT raw);
+SELECT col, col IS NULL FROM copy_raw_test ORDER BY id;
+ col  | ?column? 
+------+----------
+ ",\  | f
+ \.   | f
+      | f
+      | f
+      | f
+      | f
+      | f
+ test | f
+(8 rows)
+
+TRUNCATE copy_raw_test;
+COPY copy_raw_test (col) FROM :'filename' RAW;
+SELECT col, col IS NULL FROM copy_raw_test ORDER BY id;
+ col  | ?column? 
+------+----------
+ ",\  | f
+ \.   | f
+      | f
+      | f
+      | f
+      | f
+      | f
+ test | f
+(8 rows)
+
+\o :filename
+\qecho -n line1
+\qecho -n '\n'
+\qecho -n line2
+\qecho -n '\n'
+\o
+TRUNCATE copy_raw_test;
+COPY copy_raw_test (col) FROM :'filename' (FORMAT raw);
+SELECT col, col IS NULL FROM copy_raw_test ORDER BY id;
+  col  | ?column? 
+-------+----------
+ line1 | f
+ line2 | f
+(2 rows)
+
+\o :filename
+\qecho -n line1
+\qecho -n '\n'
+\qecho -n line2
+\o
+TRUNCATE copy_raw_test;
+COPY copy_raw_test (col) FROM :'filename' (FORMAT raw);
+SELECT col, col IS NULL FROM copy_raw_test ORDER BY id;
+  col  | ?column? 
+-------+----------
+ line1 | f
+ line2 | f
+(2 rows)
+
+\o :filename
+\qecho -n line1
+\qecho -n '\r\n'
+\qecho -n line2
+\qecho -n '\r\n'
+\o
+TRUNCATE copy_raw_test;
+COPY copy_raw_test (col) FROM :'filename' (FORMAT raw);
+SELECT col, col IS NULL FROM copy_raw_test ORDER BY id;
+  col  | ?column? 
+-------+----------
+ line1 | f
+ line2 | f
+(2 rows)
+
+\o :filename
+\qecho -n line1
+\qecho -n '\r\n'
+\qecho -n line2
+\o
+TRUNCATE copy_raw_test;
+COPY copy_raw_test (col) FROM :'filename' (FORMAT raw);
+SELECT col, col IS NULL FROM copy_raw_test ORDER BY id;
+  col  | ?column? 
+-------+----------
+ line1 | f
+ line2 | f
+(2 rows)
+
+\o :filename
+\qecho -n line1
+\qecho -n '\r'
+\qecho -n line2
+\qecho -n '\r'
+\o
+TRUNCATE copy_raw_test;
+COPY copy_raw_test (col) FROM :'filename' (FORMAT raw);
+SELECT col, col IS NULL FROM copy_raw_test ORDER BY id;
+  col  | ?column? 
+-------+----------
+ line1 | f
+ line2 | f
+(2 rows)
+
+\o :filename
+\qecho -n line1
+\qecho -n '\r'
+\qecho -n line2
+\o
+TRUNCATE copy_raw_test;
+COPY copy_raw_test (col) FROM :'filename' (FORMAT raw);
+SELECT col, col IS NULL FROM copy_raw_test ORDER BY id;
+  col  | ?column? 
+-------+----------
+ line1 | f
+ line2 | f
+(2 rows)
+
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 626a437d40..f38c1d6b00 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -88,8 +88,12 @@ LINE 1: COPY x from stdin (log_verbosity default, log_verbosity verb...
 -- incorrect options
 COPY x to stdout (format BINARY, delimiter ',');
 ERROR:  cannot specify DELIMITER in BINARY mode
+COPY x to stdout (format RAW, delimiter ',');
+ERROR:  cannot specify DELIMITER in RAW mode
 COPY x to stdout (format BINARY, null 'x');
 ERROR:  cannot specify NULL in BINARY mode
+COPY x to stdout (format RAW, null 'x');
+ERROR:  cannot specify NULL in RAW mode
 COPY x from stdin (format BINARY, on_error ignore);
 ERROR:  only ON_ERROR STOP is allowed in BINARY mode
 COPY x from stdin (on_error unsupported);
@@ -100,6 +104,10 @@ COPY x to stdout (format TEXT, force_quote(a));
 ERROR:  COPY FORCE_QUOTE requires CSV mode
 COPY x to stdout (format TEXT, force_quote *);
 ERROR:  COPY FORCE_QUOTE requires CSV mode
+COPY x to stdout (format RAW, force_quote(a));
+ERROR:  COPY FORCE_QUOTE requires CSV mode
+COPY x to stdout (format RAW, force_quote *);
+ERROR:  COPY FORCE_QUOTE requires CSV mode
 COPY x from stdin (format CSV, force_quote(a));
 ERROR:  COPY FORCE_QUOTE cannot be used with COPY FROM
 COPY x from stdin (format CSV, force_quote *);
@@ -108,6 +116,10 @@ COPY x from stdin (format TEXT, force_not_null(a));
 ERROR:  COPY FORCE_NOT_NULL requires CSV mode
 COPY x from stdin (format TEXT, force_not_null *);
 ERROR:  COPY FORCE_NOT_NULL requires CSV mode
+COPY x from stdin (format RAW, force_not_null(a));
+ERROR:  COPY FORCE_NOT_NULL requires CSV mode
+COPY x from stdin (format RAW, force_not_null *);
+ERROR:  COPY FORCE_NOT_NULL requires CSV mode
 COPY x to stdout (format CSV, force_not_null(a));
 ERROR:  COPY FORCE_NOT_NULL cannot be used with COPY TO
 COPY x to stdout (format CSV, force_not_null *);
@@ -116,6 +128,10 @@ COPY x from stdin (format TEXT, force_null(a));
 ERROR:  COPY FORCE_NULL requires CSV mode
 COPY x from stdin (format TEXT, force_null *);
 ERROR:  COPY FORCE_NULL requires CSV mode
+COPY x from stdin (format RAW, force_null(a));
+ERROR:  COPY FORCE_NULL requires CSV mode
+COPY x from stdin (format RAW, force_null *);
+ERROR:  COPY FORCE_NULL requires CSV mode
 COPY x to stdout (format CSV, force_null(a));
 ERROR:  COPY FORCE_NULL cannot be used with COPY TO
 COPY x to stdout (format CSV, force_null *);
@@ -858,9 +874,11 @@ select id, text_value, ts_value from copy_default;
 (2 rows)
 
 truncate copy_default;
--- DEFAULT cannot be used in binary mode
+-- DEFAULT cannot be used in binary or raw mode
 copy copy_default from stdin with (format binary, default '\D');
 ERROR:  cannot specify DEFAULT in BINARY mode
+copy copy_default from stdin with (format raw, default '\D');
+ERROR:  cannot specify DEFAULT in RAW mode
 -- DEFAULT cannot be new line nor carriage return
 copy copy_default from stdin with (default E'\n');
 ERROR:  COPY default representation cannot use newline or carriage return
@@ -929,3 +947,65 @@ truncate copy_default;
 -- DEFAULT cannot be used in COPY TO
 copy (select 1 as test) TO stdout with (default '\D');
 ERROR:  COPY DEFAULT cannot be used with COPY TO
+--
+-- Test COPY FORMAT errors
+--
+\getenv abs_builddir PG_ABS_BUILDDIR
+\set filename :abs_builddir '/results/copy_raw_test.data'
+-- Test single column requirement
+CREATE TABLE copy_raw_test_errors (col1 text, col2 text);
+COPY copy_raw_test_errors TO :'filename' (FORMAT raw);
+ERROR:  COPY with format 'raw' must specify exactly one column
+COPY copy_raw_test_errors (col1, col2) TO :'filename' (FORMAT raw);
+ERROR:  COPY with format 'raw' must specify exactly one column
+COPY copy_raw_test_errors FROM :'filename' (FORMAT raw);
+ERROR:  COPY with format 'raw' must specify exactly one column
+COPY copy_raw_test_errors (col1, col2) FROM :'filename' (FORMAT raw);
+ERROR:  COPY with format 'raw' must specify exactly one column
+-- Test inconsistent newline style
+\o :filename
+\qecho -n line1
+\qecho -n '\r'
+\qecho -n line2
+\qecho -n '\n'
+\o
+COPY copy_raw_test_errors (col1) FROM :'filename' (FORMAT raw);
+ERROR:  inconsistent newline style
+CONTEXT:  COPY copy_raw_test_errors, line 2
+\o :filename
+\qecho -n line1
+\qecho -n '\r\n'
+\qecho -n line2
+\qecho -n '\n'
+\o
+COPY copy_raw_test_errors (col1) FROM :'filename' (FORMAT raw);
+ERROR:  inconsistent newline style
+CONTEXT:  COPY copy_raw_test_errors, line 2
+\o :filename
+\qecho -n line1
+\qecho -n '\r\n'
+\qecho -n line2
+\qecho -n '\r'
+\o
+COPY copy_raw_test_errors (col1) FROM :'filename' (FORMAT raw);
+ERROR:  inconsistent newline style
+CONTEXT:  COPY copy_raw_test_errors, line 2
+\o :filename
+\qecho -n line1
+\qecho -n '\r\n'
+\qecho -n line2
+\qecho -n '\r'
+\qecho -n line3
+\o
+COPY copy_raw_test_errors (col1) FROM :'filename' (FORMAT raw);
+ERROR:  inconsistent newline style
+CONTEXT:  COPY copy_raw_test_errors, line 2
+\o :filename
+\qecho -n line1
+\qecho -n '\n'
+\qecho -n line2
+\qecho -n '\r\n'
+\o
+COPY copy_raw_test_errors (col1) FROM :'filename' (FORMAT raw);
+ERROR:  inconsistent newline style
+CONTEXT:  COPY copy_raw_test_errors, line 2
diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql
index f1699b66b0..6333af3a90 100644
--- a/src/test/regress/sql/copy.sql
+++ b/src/test/regress/sql/copy.sql
@@ -348,3 +348,73 @@ COPY parted_si(id, data) FROM :'filename';
 SELECT tableoid::regclass, id % 2 = 0 is_even, count(*) from parted_si GROUP BY 1, 2 ORDER BY 1;
 
 DROP TABLE parted_si;
+
+-- Test COPY FORMAT raw
+\set filename :abs_builddir '/results/copy_raw_test.data'
+CREATE TABLE copy_raw_test (id SERIAL PRIMARY KEY, col text);
+INSERT INTO copy_raw_test (col) VALUES
+(E'",\\'), (E'\\.'), (NULL), (''), (' '), (E'\n'), ('test');
+COPY copy_raw_test (col) TO :'filename' (FORMAT raw);
+TRUNCATE copy_raw_test;
+COPY copy_raw_test (col) FROM :'filename' (FORMAT raw);
+SELECT col, col IS NULL FROM copy_raw_test ORDER BY id;
+TRUNCATE copy_raw_test;
+COPY copy_raw_test (col) FROM :'filename' RAW;
+SELECT col, col IS NULL FROM copy_raw_test ORDER BY id;
+
+\o :filename
+\qecho -n line1
+\qecho -n '\n'
+\qecho -n line2
+\qecho -n '\n'
+\o
+TRUNCATE copy_raw_test;
+COPY copy_raw_test (col) FROM :'filename' (FORMAT raw);
+SELECT col, col IS NULL FROM copy_raw_test ORDER BY id;
+
+\o :filename
+\qecho -n line1
+\qecho -n '\n'
+\qecho -n line2
+\o
+TRUNCATE copy_raw_test;
+COPY copy_raw_test (col) FROM :'filename' (FORMAT raw);
+SELECT col, col IS NULL FROM copy_raw_test ORDER BY id;
+
+\o :filename
+\qecho -n line1
+\qecho -n '\r\n'
+\qecho -n line2
+\qecho -n '\r\n'
+\o
+TRUNCATE copy_raw_test;
+COPY copy_raw_test (col) FROM :'filename' (FORMAT raw);
+SELECT col, col IS NULL FROM copy_raw_test ORDER BY id;
+
+\o :filename
+\qecho -n line1
+\qecho -n '\r\n'
+\qecho -n line2
+\o
+TRUNCATE copy_raw_test;
+COPY copy_raw_test (col) FROM :'filename' (FORMAT raw);
+SELECT col, col IS NULL FROM copy_raw_test ORDER BY id;
+
+\o :filename
+\qecho -n line1
+\qecho -n '\r'
+\qecho -n line2
+\qecho -n '\r'
+\o
+TRUNCATE copy_raw_test;
+COPY copy_raw_test (col) FROM :'filename' (FORMAT raw);
+SELECT col, col IS NULL FROM copy_raw_test ORDER BY id;
+
+\o :filename
+\qecho -n line1
+\qecho -n '\r'
+\qecho -n line2
+\o
+TRUNCATE copy_raw_test;
+COPY copy_raw_test (col) FROM :'filename' (FORMAT raw);
+SELECT col, col IS NULL FROM copy_raw_test ORDER BY id;
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index 3458d287f2..790793b9b8 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -71,19 +71,27 @@ COPY x from stdin (log_verbosity default, log_verbosity verbose);
 
 -- incorrect options
 COPY x to stdout (format BINARY, delimiter ',');
+COPY x to stdout (format RAW, delimiter ',');
 COPY x to stdout (format BINARY, null 'x');
+COPY x to stdout (format RAW, null 'x');
 COPY x from stdin (format BINARY, on_error ignore);
 COPY x from stdin (on_error unsupported);
 COPY x to stdout (format TEXT, force_quote(a));
 COPY x to stdout (format TEXT, force_quote *);
+COPY x to stdout (format RAW, force_quote(a));
+COPY x to stdout (format RAW, force_quote *);
 COPY x from stdin (format CSV, force_quote(a));
 COPY x from stdin (format CSV, force_quote *);
 COPY x from stdin (format TEXT, force_not_null(a));
 COPY x from stdin (format TEXT, force_not_null *);
+COPY x from stdin (format RAW, force_not_null(a));
+COPY x from stdin (format RAW, force_not_null *);
 COPY x to stdout (format CSV, force_not_null(a));
 COPY x to stdout (format CSV, force_not_null *);
 COPY x from stdin (format TEXT, force_null(a));
 COPY x from stdin (format TEXT, force_null *);
+COPY x from stdin (format RAW, force_null(a));
+COPY x from stdin (format RAW, force_null *);
 COPY x to stdout (format CSV, force_null(a));
 COPY x to stdout (format CSV, force_null *);
 COPY x to stdout (format BINARY, on_error unsupported);
@@ -636,8 +644,9 @@ select id, text_value, ts_value from copy_default;
 
 truncate copy_default;
 
--- DEFAULT cannot be used in binary mode
+-- DEFAULT cannot be used in binary or raw mode
 copy copy_default from stdin with (format binary, default '\D');
+copy copy_default from stdin with (format raw, default '\D');
 
 -- DEFAULT cannot be new line nor carriage return
 copy copy_default from stdin with (default E'\n');
@@ -707,3 +716,59 @@ truncate copy_default;
 
 -- DEFAULT cannot be used in COPY TO
 copy (select 1 as test) TO stdout with (default '\D');
+
+--
+-- Test COPY FORMAT errors
+--
+
+\getenv abs_builddir PG_ABS_BUILDDIR
+\set filename :abs_builddir '/results/copy_raw_test.data'
+
+-- Test single column requirement
+CREATE TABLE copy_raw_test_errors (col1 text, col2 text);
+COPY copy_raw_test_errors TO :'filename' (FORMAT raw);
+COPY copy_raw_test_errors (col1, col2) TO :'filename' (FORMAT raw);
+COPY copy_raw_test_errors FROM :'filename' (FORMAT raw);
+COPY copy_raw_test_errors (col1, col2) FROM :'filename' (FORMAT raw);
+
+-- Test inconsistent newline style
+\o :filename
+\qecho -n line1
+\qecho -n '\r'
+\qecho -n line2
+\qecho -n '\n'
+\o
+COPY copy_raw_test_errors (col1) FROM :'filename' (FORMAT raw);
+
+\o :filename
+\qecho -n line1
+\qecho -n '\r\n'
+\qecho -n line2
+\qecho -n '\n'
+\o
+COPY copy_raw_test_errors (col1) FROM :'filename' (FORMAT raw);
+
+\o :filename
+\qecho -n line1
+\qecho -n '\r\n'
+\qecho -n line2
+\qecho -n '\r'
+\o
+COPY copy_raw_test_errors (col1) FROM :'filename' (FORMAT raw);
+
+\o :filename
+\qecho -n line1
+\qecho -n '\r\n'
+\qecho -n line2
+\qecho -n '\r'
+\qecho -n line3
+\o
+COPY copy_raw_test_errors (col1) FROM :'filename' (FORMAT raw);
+
+\o :filename
+\qecho -n line1
+\qecho -n '\n'
+\qecho -n line2
+\qecho -n '\r\n'
+\o
+COPY copy_raw_test_errors (col1) FROM :'filename' (FORMAT raw);
-- 
2.45.1

