From 322512cbcf4b2e6c07e6bd8f9d4b2afc2c400193 Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Sun, 13 Jul 2025 10:09:57 -0700
Subject: [PATCH 2/2] Custom COPY format.

Author:
Reviewed-by:
Discussion: https://postgr.es/m/
Backpatch-through:
---
 src/backend/commands/Makefile        |   1 +
 src/backend/commands/copy.c          |  73 +++++++++------
 src/backend/commands/copyformat.c    | 134 +++++++++++++++++++++++++++
 src/backend/commands/copyfrom.c      |  26 ++++--
 src/backend/commands/copyfromparse.c |   7 +-
 src/backend/commands/copyto.c        |  28 ++++--
 src/include/commands/copy.h          |  18 +++-
 src/include/commands/copyapi.h       |   5 +
 8 files changed, 244 insertions(+), 48 deletions(-)
 create mode 100644 src/backend/commands/copyformat.c

diff --git a/src/backend/commands/Makefile b/src/backend/commands/Makefile
index cb2fbdc7c60..30693ffba27 100644
--- a/src/backend/commands/Makefile
+++ b/src/backend/commands/Makefile
@@ -24,6 +24,7 @@ OBJS = \
 	constraint.o \
 	conversioncmds.o \
 	copy.o \
+	copyformat.o \
 	copyfrom.o \
 	copyfromparse.o \
 	copyto.o \
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index fae9c41db65..4e482955544 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -514,6 +514,8 @@ ProcessCopyOptions(ParseState *pstate,
 	bool		on_error_specified = false;
 	bool		log_verbosity_specified = false;
 	bool		reject_limit_specified = false;
+	bool		csv_mode = false;
+	bool		binary = false;
 	ListCell   *option;
 
 	/* Support external use for option sanity checking */
@@ -521,6 +523,8 @@ ProcessCopyOptions(ParseState *pstate,
 		opts_out = (CopyFormatOptions *) palloc0(sizeof(CopyFormatOptions));
 
 	opts_out->file_encoding = -1;
+	opts_out->format = COPY_FORMAT_TEXT;
+	opts_out->format_ext_id = -1;
 
 	/* Extract options from the statement node tree */
 	foreach(option, options)
@@ -535,16 +539,29 @@ ProcessCopyOptions(ParseState *pstate,
 				errorConflictingDefElem(defel, pstate);
 			format_specified = true;
 			if (strcmp(fmt, "text") == 0)
-				 /* default format */ ;
+				opts_out->format = COPY_FORMAT_TEXT;
 			else if (strcmp(fmt, "csv") == 0)
-				opts_out->csv_mode = true;
+			{
+				opts_out->format = COPY_FORMAT_CSV;
+				csv_mode = true;
+			}
 			else if (strcmp(fmt, "binary") == 0)
-				opts_out->binary = true;
+			{
+				opts_out->format = COPY_FORMAT_BINARY;
+				binary = true;
+			}
 			else
-				ereport(ERROR,
-						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-						 errmsg("COPY format \"%s\" not recognized", fmt),
-						 parser_errposition(pstate, defel->location)));
+			{
+				opts_out->format_ext_id = GetCopyFormatExtensionId(defel->defname);
+
+				if (opts_out->format_ext_id == -1)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+							 errmsg("COPY format \"%s\" not recognized", fmt),
+							 parser_errposition(pstate, defel->location)));
+
+				opts_out->format = COPY_FORMAT_EXTENSION;
+			}
 		}
 		else if (strcmp(defel->defname, "freeze") == 0)
 		{
@@ -699,31 +716,31 @@ ProcessCopyOptions(ParseState *pstate,
 	 * Check for incompatible options (must do these three before inserting
 	 * defaults)
 	 */
-	if (opts_out->binary && opts_out->delim)
+	if (binary && opts_out->delim)
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
 		/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
 				 errmsg("cannot specify %s in BINARY mode", "DELIMITER")));
 
-	if (opts_out->binary && opts_out->null_print)
+	if (binary && opts_out->null_print)
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 errmsg("cannot specify %s in BINARY mode", "NULL")));
 
-	if (opts_out->binary && opts_out->default_print)
+	if (binary && opts_out->default_print)
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 errmsg("cannot specify %s in BINARY mode", "DEFAULT")));
 
 	/* Set defaults for omitted options */
 	if (!opts_out->delim)
-		opts_out->delim = opts_out->csv_mode ? "," : "\t";
+		opts_out->delim = csv_mode ? "," : "\t";
 
 	if (!opts_out->null_print)
-		opts_out->null_print = opts_out->csv_mode ? "" : "\\N";
+		opts_out->null_print = csv_mode ? "" : "\\N";
 	opts_out->null_print_len = strlen(opts_out->null_print);
 
-	if (opts_out->csv_mode)
+	if (csv_mode)
 	{
 		if (!opts_out->quote)
 			opts_out->quote = "\"";
@@ -771,7 +788,7 @@ ProcessCopyOptions(ParseState *pstate,
 	 * future-proofing.  Likewise we disallow all digits though only octal
 	 * digits are actually dangerous.
 	 */
-	if (!opts_out->csv_mode &&
+	if (!csv_mode &&
 		strchr("\\.abcdefghijklmnopqrstuvwxyz0123456789",
 			   opts_out->delim[0]) != NULL)
 		ereport(ERROR,
@@ -779,43 +796,43 @@ ProcessCopyOptions(ParseState *pstate,
 				 errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim)));
 
 	/* Check header */
-	if (opts_out->binary && opts_out->header_line != COPY_HEADER_FALSE)
+	if (binary && opts_out->header_line != COPY_HEADER_FALSE)
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 		/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
 				 errmsg("cannot specify %s in BINARY mode", "HEADER")));
 
 	/* Check quote */
-	if (!opts_out->csv_mode && opts_out->quote != NULL)
+	if (!csv_mode && opts_out->quote != NULL)
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 		/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
 				 errmsg("COPY %s requires CSV mode", "QUOTE")));
 
-	if (opts_out->csv_mode && strlen(opts_out->quote) != 1)
+	if (csv_mode && strlen(opts_out->quote) != 1)
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("COPY quote must be a single one-byte character")));
 
-	if (opts_out->csv_mode && opts_out->delim[0] == opts_out->quote[0])
+	if (csv_mode && opts_out->delim[0] == opts_out->quote[0])
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("COPY delimiter and quote must be different")));
 
 	/* Check escape */
-	if (!opts_out->csv_mode && opts_out->escape != NULL)
+	if (!csv_mode && opts_out->escape != NULL)
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 		/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
 				 errmsg("COPY %s requires CSV mode", "ESCAPE")));
 
-	if (opts_out->csv_mode && strlen(opts_out->escape) != 1)
+	if (csv_mode && strlen(opts_out->escape) != 1)
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("COPY escape must be a single one-byte character")));
 
 	/* Check force_quote */
-	if (!opts_out->csv_mode && (opts_out->force_quote || opts_out->force_quote_all))
+	if (!csv_mode && (opts_out->force_quote || opts_out->force_quote_all))
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 		/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
@@ -829,8 +846,8 @@ ProcessCopyOptions(ParseState *pstate,
 						"COPY FROM")));
 
 	/* Check force_notnull */
-	if (!opts_out->csv_mode && (opts_out->force_notnull != NIL ||
-								opts_out->force_notnull_all))
+	if (!csv_mode && (opts_out->force_notnull != NIL ||
+					  opts_out->force_notnull_all))
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 		/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
@@ -845,8 +862,8 @@ ProcessCopyOptions(ParseState *pstate,
 						"COPY TO")));
 
 	/* Check force_null */
-	if (!opts_out->csv_mode && (opts_out->force_null != NIL ||
-								opts_out->force_null_all))
+	if (!csv_mode && (opts_out->force_null != NIL ||
+					  opts_out->force_null_all))
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 		/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
@@ -870,7 +887,7 @@ ProcessCopyOptions(ParseState *pstate,
 						"NULL")));
 
 	/* Don't allow the CSV quote char to appear in the null string. */
-	if (opts_out->csv_mode &&
+	if (csv_mode &&
 		strchr(opts_out->null_print, opts_out->quote[0]) != NULL)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -906,7 +923,7 @@ ProcessCopyOptions(ParseState *pstate,
 							"DEFAULT")));
 
 		/* Don't allow the CSV quote char to appear in the default string. */
-		if (opts_out->csv_mode &&
+		if (csv_mode &&
 			strchr(opts_out->default_print, opts_out->quote[0]) != NULL)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@@ -923,7 +940,7 @@ ProcessCopyOptions(ParseState *pstate,
 					 errmsg("NULL specification and DEFAULT specification cannot be the same")));
 	}
 	/* Check on_error */
-	if (opts_out->binary && opts_out->on_error != COPY_ON_ERROR_STOP)
+	if (binary && opts_out->on_error != COPY_ON_ERROR_STOP)
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 errmsg("only ON_ERROR STOP is allowed in BINARY mode")));
diff --git a/src/backend/commands/copyformat.c b/src/backend/commands/copyformat.c
new file mode 100644
index 00000000000..6d1c40833e9
--- /dev/null
+++ b/src/backend/commands/copyformat.c
@@ -0,0 +1,134 @@
+/*-------------------------------------------------------------------------
+ *
+ * copyformat.c
+ *
+ * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/copyformat.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "commands/copyapi.h"
+#include "utils/memutils.h"
+
+typedef struct CopyFormatExtensionInfo
+{
+	const char *format_name;
+	const CopyToRoutine *to_routine;
+	const CopyFromRoutine *from_routine;
+}			CopyFormatExtensionInfo;
+
+static CopyFormatExtensionInfo * CopyFormatExtensionArray = NULL;
+static int	CopyFormatExtensionAssigned = 0;
+static int	CopyFormatExtensionAllocated = 0;
+
+/*
+ * Register new COPY routines for the given name.
+ *
+ * When name is used as a COPY format name, routine will be used to process
+ * the COPY request. See CopyFromRoutine and CopyToRoutine how to implement a
+ * COPY routine.
+ *
+ * If name is already registered, an error is raised.
+ *
+ * name is assumed to be a constant string or allocated in storage that will
+ * never be freed.
+ *
+ * fmt, from_routine, and to_routine are assumed to be allocated in storage
+ * that will never be freed.
+ */
+void
+RegisterCustomCopyFormat(const char *fmt, const CopyToRoutine *to_routine,
+						 const CopyFromRoutine *from_routine)
+{
+	CopyFormatExtensionInfo *info;
+
+	for (int i = 0; i < CopyFormatExtensionAssigned; i++)
+	{
+		if (strcmp(fmt, CopyFormatExtensionArray[i].format_name) == 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("custom COPY fromat \"%s\" already exists",
+							fmt)));
+	}
+
+	/* If there is no array yet, create one */
+	if (CopyFormatExtensionAllocated == 0)
+	{
+		CopyFormatExtensionAllocated = 16;
+		CopyFormatExtensionArray = (CopyFormatExtensionInfo *)
+			MemoryContextAlloc(TopMemoryContext,
+							   CopyFormatExtensionAllocated * sizeof(CopyFormatExtensionInfo));
+	}
+
+	/* If there's an array but it's currently full, expand it */
+	if (CopyFormatExtensionAssigned >= CopyFormatExtensionAllocated)
+	{
+		int			i = pg_nextpower2_32(CopyFormatExtensionAssigned + 1);
+
+		CopyFormatExtensionArray =
+			repalloc(CopyFormatExtensionArray, sizeof(CopyFormatExtensionInfo) * i);
+		CopyFormatExtensionAllocated = i;
+	}
+
+	/* Register the format */
+	info = &CopyFormatExtensionArray[CopyFormatExtensionAssigned++];
+	info->format_name = fmt;
+	info->from_routine = from_routine;
+	info->to_routine = to_routine;
+}
+
+/*
+ * Get an integer ID of the given name of an custom COPY format extension.
+ *
+ * Within the lifetime of a particular backend, the same name will be
+ * mapped to the same ID every time. IDs of COPY formats that are not
+ * loaded during shared_preload_libraries time are not stable across
+ * backends.
+ */
+int
+GetCopyFormatExtensionId(const char *fmt)
+{
+	for (int i = 0; i < CopyFormatExtensionAssigned; i++)
+	{
+		if (strcmp(CopyFormatExtensionArray[i].format_name, fmt) == 0)
+			return i;
+	}
+
+	return -1;
+}
+
+const CopyToRoutine *
+GetCustomCopyToRoutine(int extension_id)
+{
+	if (extension_id >= CopyFormatExtensionAssigned || extension_id < 0)
+		return NULL;
+
+	if (!CopyFormatExtensionArray[extension_id].to_routine)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("COPY format \"%s\" does not support COPY TO routine",
+						CopyFormatExtensionArray[extension_id].format_name)));
+
+	return CopyFormatExtensionArray[extension_id].to_routine;
+}
+
+const CopyFromRoutine *
+GetCustomCopyFromRoutine(int extension_id)
+{
+	if (extension_id >= CopyFormatExtensionAssigned || extension_id < 0)
+		return NULL;
+
+	if (!CopyFormatExtensionArray[extension_id].to_routine)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("COPY format \"%s\" does not support COPY FROM routine",
+						CopyFormatExtensionArray[extension_id].format_name)));
+
+	return CopyFormatExtensionArray[extension_id].from_routine;
+}
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 03779838654..fc38caaca24 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -155,13 +155,25 @@ static const CopyFromRoutine CopyFromRoutineBinary = {
 static const CopyFromRoutine *
 CopyFromGetRoutine(const CopyFormatOptions *opts)
 {
-	if (opts->csv_mode)
-		return &CopyFromRoutineCSV;
-	else if (opts->binary)
-		return &CopyFromRoutineBinary;
+	const CopyFromRoutine *routine;
 
-	/* default is text */
-	return &CopyFromRoutineText;
+	switch (opts->format)
+	{
+		case COPY_FORMAT_CSV:
+			routine = &CopyFromRoutineCSV;
+			break;
+		case COPY_FORMAT_TEXT:
+			routine = &CopyFromRoutineText;
+			break;
+		case COPY_FORMAT_BINARY:
+			routine = &CopyFromRoutineBinary;
+			break;
+		case COPY_FORMAT_EXTENSION:
+			routine = GetCustomCopyFromRoutine(opts->format_ext_id);
+			break;
+	}
+
+	return routine;
 }
 
 /* Implementation of the start callback for text and CSV formats */
@@ -263,7 +275,7 @@ CopyFromErrorCallback(void *arg)
 				   edata->cur_relname);
 		return;
 	}
-	if (cstate->opts.binary)
+	if (cstate->opts.format == COPY_FORMAT_BINARY)
 	{
 		/* can't usefully display the data */
 		if (edata->cur_attname)
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 595dc84b172..0b94d8851f3 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -171,7 +171,7 @@ ReceiveCopyBegin(CopyFromState cstate)
 {
 	StringInfoData buf;
 	int			natts = list_length(cstate->attnumlist);
-	int16		format = (cstate->opts.binary ? 1 : 0);
+	int16		format = (cstate->opts.format == COPY_FORMAT_BINARY ? 1 : 0);
 	int			i;
 
 	pq_beginmessage(&buf, PqMsg_CopyInResponse);
@@ -754,7 +754,7 @@ bool
 NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
 {
 	return NextCopyFromRawFieldsInternal(cstate, fields, nfields,
-										 cstate->opts.csv_mode);
+										 cstate->opts.format == COPY_FORMAT_CSV);
 }
 
 /*
@@ -782,7 +782,8 @@ NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields
 	bool		done = false;
 
 	/* only available for text or csv input */
-	Assert(!cstate->opts.binary);
+	Assert(cstate->opts.format == COPY_FORMAT_TEXT ||
+		   cstate->opts.format == COPY_FORMAT_CSV);
 
 	/* on input check that the header line is correct if needed */
 	if (edata->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE)
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index e90fab6d958..842c5efa8d5 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -160,13 +160,25 @@ static const CopyToRoutine CopyToRoutineBinary = {
 static const CopyToRoutine *
 CopyToGetRoutine(const CopyFormatOptions *opts)
 {
-	if (opts->csv_mode)
-		return &CopyToRoutineCSV;
-	else if (opts->binary)
-		return &CopyToRoutineBinary;
+	const CopyToRoutine *routine;
 
-	/* default is text */
-	return &CopyToRoutineText;
+	switch (opts->format)
+	{
+		case COPY_FORMAT_CSV:
+			routine = &CopyToRoutineCSV;
+			break;
+		case COPY_FORMAT_TEXT:
+			routine = &CopyToRoutineText;
+			break;
+		case COPY_FORMAT_BINARY:
+			routine = &CopyToRoutineBinary;
+			break;
+		case COPY_FORMAT_EXTENSION:
+			routine = GetCustomCopyToRoutine(opts->format_ext_id);
+			break;
+	}
+
+	return routine;
 }
 
 /* Implementation of the start callback for text and CSV formats */
@@ -199,7 +211,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
 
 			colname = NameStr(TupleDescAttr(tupDesc, attnum - 1)->attname);
 
-			if (cstate->opts.csv_mode)
+			if (cstate->opts.format == COPY_FORMAT_CSV)
 				CopyAttributeOutCSV(cstate, colname, false);
 			else
 				CopyAttributeOutText(cstate, colname);
@@ -376,7 +388,7 @@ SendCopyBegin(CopyToState cstate)
 {
 	StringInfoData buf;
 	int			natts = list_length(cstate->attnumlist);
-	int16		format = (cstate->opts.binary ? 1 : 0);
+	int16		format = (cstate->opts.format == COPY_FORMAT_BINARY ? 1 : 0);
 	int			i;
 
 	pq_beginmessage(&buf, PqMsg_CopyOutResponse);
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 1fd44a132be..73660e694ce 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -49,6 +49,17 @@ typedef enum CopyLogVerbosityChoice
 	COPY_LOG_VERBOSITY_VERBOSE, /* logs additional messages */
 } CopyLogVerbosityChoice;
 
+/*
+ * Represents the file format processed by COPY command.
+ */
+typedef enum CopyFormatType
+{
+	COPY_FORMAT_CSV = 0,
+	COPY_FORMAT_TEXT,
+	COPY_FORMAT_BINARY,
+	COPY_FORMAT_EXTENSION,
+}			CopyFormatType;
+
 /*
  * A struct to hold COPY options, in a parsed form. All of these are related
  * to formatting, except for 'freeze', which doesn't really belong here, but
@@ -59,9 +70,10 @@ typedef struct CopyFormatOptions
 	/* parameters from the COPY command */
 	int			file_encoding;	/* file or remote side's character encoding,
 								 * -1 if not specified */
-	bool		binary;			/* binary format? */
+	CopyFormatType format;
+	int			format_ext_id;	/* custom COPY format id, -1 if format is
+								 * built-in format */
 	bool		freeze;			/* freeze rows on loading? */
-	bool		csv_mode;		/* Comma Separated Value format? */
 	int			header_line;	/* number of lines to skip or COPY_HEADER_XXX
 								 * value (see the above) */
 	char	   *null_print;		/* NULL marker string (server encoding!) */
@@ -163,6 +175,8 @@ extern uint64 CopyFrom(CopyFromState cstate);
 
 extern DestReceiver *CreateCopyDestReceiver(void);
 
+extern int	GetCopyFormatExtensionId(const char *fmt);
+
 /*
  * internal prototypes
  */
diff --git a/src/include/commands/copyapi.h b/src/include/commands/copyapi.h
index 2a2d2f9876b..b9e29dddd5b 100644
--- a/src/include/commands/copyapi.h
+++ b/src/include/commands/copyapi.h
@@ -102,4 +102,9 @@ typedef struct CopyFromRoutine
 	void		(*CopyFromEnd) (CopyFromState cstate);
 } CopyFromRoutine;
 
+extern void RegisterCustomCopyFormat(const char *fmt, const CopyToRoutine *to_routine,
+									 const CopyFromRoutine *from_routine);
+extern const CopyToRoutine *GetCustomCopyToRoutine(int extension_id);
+extern const CopyFromRoutine *GetCustomCopyFromRoutine(int extension_id);
+
 #endif							/* COPYAPI_H */
-- 
2.43.5

