Hi,

The server still supports the old protocol version 2. Protocol version 3 was introduced in PostgreSQL 7.4, so there shouldn't be many clients around anymore that don't support it.

COPY FROM STDIN is particularly problematic with the old protocol, because the end-of-copy can only be detected by the \. marker. So the server has to read the input one byte at a time, and check for \. as it goes. At [1], I'm working on a patch to change the way the encoding conversion is performed in COPY FROM, so that we convert the data in larger chunks, before scanning the input for line boundaries. We can't do that safely in the old protocol.

I propose that we remove server support for COPY FROM STDIN with protocol version 2, per attached patch. Even if we could still support it, it would be a very rarely used and tested codepath, prone to bugs. Perhaps we could remove support for the old protocol altogether, but I'm not proposing that we go that far just yet.

[1] https://www.postgresql.org/message-id/e7861509-3960-538a-9025-b75a61188e01%40iki.fi

- Heikki
>From 21ce10dd2bf43c6924645bff4d40d18663835dcf Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakan...@iki.fi>
Date: Wed, 3 Feb 2021 17:40:47 +0200
Subject: [PATCH 1/1] Remove support for COPY FROM with protocol version 2.

I'm working on a patch to refactor the way the encoding conversion is
performed, so that we convert the data in larger chunks, before scanning
the input for line boundaries. We can't do that, if we cannot safely try
to read ahead data past the end-of-copy marker. With the old protocol
gone, we can safely read as much as we want.
---
 src/backend/commands/copyfrom.c          |   7 -
 src/backend/commands/copyfromparse.c     | 162 +++++++----------------
 src/backend/commands/copyto.c            |   2 +-
 src/include/commands/copyfrom_internal.h |   5 +-
 4 files changed, 50 insertions(+), 126 deletions(-)

diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index c39cc736ed2..6d43d056cca 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1125,13 +1125,6 @@ CopyFrom(CopyFromState cstate)
 
 	MemoryContextSwitchTo(oldcontext);
 
-	/*
-	 * In the old protocol, tell pqcomm that we can process normal protocol
-	 * messages again.
-	 */
-	if (cstate->copy_src == COPY_OLD_FE)
-		pq_endmsgread();
-
 	/* Execute AFTER STATEMENT insertion triggers */
 	ExecASInsertTriggers(estate, target_resultRelInfo, cstate->transition_capture);
 
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 4c74067f849..e8497cbdf00 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -46,21 +46,6 @@
  * empty statements.  See http://www.cit.gu.edu.au/~anthony/info/C/C.macros.
  */
 
-/*
- * This keeps the character read at the top of the loop in the buffer
- * even if there is more than one read-ahead.
- */
-#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
-if (1) \
-{ \
-	if (raw_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
-	{ \
-		raw_buf_ptr = prev_raw_ptr; /* undo fetch */ \
-		need_data = true; \
-		continue; \
-	} \
-} else ((void) 0)
-
 /* This consumes the remainder of the buffer and breaks */
 #define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
 if (1) \
@@ -118,7 +103,7 @@ static int	CopyGetData(CopyFromState cstate, void *databuf,
 						int minread, int maxread);
 static inline bool CopyGetInt32(CopyFromState cstate, int32 *val);
 static inline bool CopyGetInt16(CopyFromState cstate, int16 *val);
-static bool CopyLoadRawBuf(CopyFromState cstate);
+static bool CopyLoadRawBuf(CopyFromState cstate, int minread);
 static int	CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes);
 
 void
@@ -144,14 +129,9 @@ ReceiveCopyBegin(CopyFromState cstate)
 	else
 	{
 		/* old way */
-		if (cstate->opts.binary)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("COPY BINARY is not supported to stdout or from stdin")));
-		pq_putemptymessage('G');
-		/* any error in old protocol will make us lose sync */
-		pq_startmsgread();
-		cstate->copy_src = COPY_OLD_FE;
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY FROM STDIN is not supported in protocol version 2")));
 	}
 	/* We *must* flush here to ensure FE knows it can send. */
 	pq_flush();
@@ -225,27 +205,9 @@ CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
 				ereport(ERROR,
 						(errcode_for_file_access(),
 						 errmsg("could not read from COPY file: %m")));
-			if (bytesread == 0)
+			if (bytesread < maxread)
 				cstate->reached_eof = true;
 			break;
-		case COPY_OLD_FE:
-
-			/*
-			 * We cannot read more than minread bytes (which in practice is 1)
-			 * because old protocol doesn't have any clear way of separating
-			 * the COPY stream from following data.  This is slow, but not any
-			 * slower than the code path was originally, and we don't care
-			 * much anymore about the performance of old protocol.
-			 */
-			if (pq_getbytes((char *) databuf, minread))
-			{
-				/* Only a \. terminator is legal EOF in old protocol */
-				ereport(ERROR,
-						(errcode(ERRCODE_CONNECTION_FAILURE),
-						 errmsg("unexpected EOF on client connection with an open transaction")));
-			}
-			bytesread = minread;
-			break;
 		case COPY_NEW_FE:
 			while (maxread > 0 && bytesread < minread && !cstate->reached_eof)
 			{
@@ -312,6 +274,8 @@ CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
 			break;
 		case COPY_CALLBACK:
 			bytesread = cstate->data_source_cb(databuf, minread, maxread);
+			if (bytesread < minread)
+				cstate->reached_eof = true;
 			break;
 	}
 
@@ -363,14 +327,13 @@ CopyGetInt16(CopyFromState cstate, int16 *val)
 /*
  * CopyLoadRawBuf loads some more data into raw_buf
  *
- * Returns true if able to obtain at least one more byte, else false.
+ * Returns true if able to obtain at least 'minread' bytes, else false.
  *
  * If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start
- * of the buffer and then we load more data after that.  This case occurs only
- * when a multibyte character crosses a bufferload boundary.
+ * of the buffer and then we load more data after that.
  */
 static bool
-CopyLoadRawBuf(CopyFromState cstate)
+CopyLoadRawBuf(CopyFromState cstate, int minread)
 {
 	int			nbytes = RAW_BUF_BYTES(cstate);
 	int			inbytes;
@@ -381,14 +344,15 @@ CopyLoadRawBuf(CopyFromState cstate)
 				nbytes);
 
 	inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes,
-						  1, RAW_BUF_SIZE - nbytes);
+						  minread, RAW_BUF_SIZE - nbytes);
 	nbytes += inbytes;
 	cstate->raw_buf[nbytes] = '\0';
 	cstate->raw_buf_index = 0;
 	cstate->raw_buf_len = nbytes;
 	cstate->bytes_processed += nbytes;
 	pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed);
-	return (inbytes > 0);
+
+	return (inbytes >= minread);
 }
 
 /*
@@ -423,7 +387,7 @@ CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
 			/* Load more data if buffer is empty. */
 			if (RAW_BUF_BYTES(cstate) == 0)
 			{
-				if (!CopyLoadRawBuf(cstate))
+				if (!CopyLoadRawBuf(cstate, 1))
 					break;		/* EOF */
 			}
 
@@ -619,21 +583,17 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 		if (fld_count == -1)
 		{
 			/*
-			 * Received EOF marker.  In a V3-protocol copy, wait for the
-			 * protocol-level EOF, and complain if it doesn't come
-			 * immediately.  This ensures that we correctly handle CopyFail,
-			 * if client chooses to send that now.
+			 * Received EOF marker.  Wait for the protocol-level EOF, and
+			 * complain if it doesn't come immediately.  This ensures that we
+			 * correctly handle CopyFail, if client chooses to send that now.
 			 *
-			 * Note that we MUST NOT try to read more data in an old-protocol
-			 * copy, since there is no protocol-level EOF marker then.  We
-			 * could go either way for copy from file, but choose to throw
-			 * error if there's data after the EOF marker, for consistency
-			 * with the new-protocol case.
+			 * When copying from file, we could continue reading like we do in
+			 * text mode, but we choose to throw error if there's data after
+			 * the EOF marker, for consistency with the V3-protocol case.
 			 */
 			char		dummy;
 
-			if (cstate->copy_src != COPY_OLD_FE &&
-				CopyReadBinaryData(cstate, &dummy, 1) > 0)
+			if (CopyReadBinaryData(cstate, &dummy, 1) > 0)
 				ereport(ERROR,
 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
 						 errmsg("received copy data after EOF marker")));
@@ -717,7 +677,7 @@ CopyReadLine(CopyFromState cstate)
 			do
 			{
 				cstate->raw_buf_index = cstate->raw_buf_len;
-			} while (CopyLoadRawBuf(cstate));
+			} while (CopyLoadRawBuf(cstate, 1));
 		}
 	}
 	else
@@ -786,7 +746,6 @@ CopyReadLineText(CopyFromState cstate)
 	char	   *copy_raw_buf;
 	int			raw_buf_ptr;
 	int			copy_buf_len;
-	bool		need_data = false;
 	bool		hit_eof = false;
 	bool		result = false;
 	char		mblen_str[2];
@@ -840,38 +799,41 @@ CopyReadLineText(CopyFromState cstate)
 		char		c;
 
 		/*
-		 * Load more data if needed.  Ideally we would just force four bytes
-		 * of read-ahead and avoid the many calls to
-		 * IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(), but the COPY_OLD_FE protocol
-		 * does not allow us to read too far ahead or we might read into the
-		 * next data, so we read-ahead only as far we know we can.  One
-		 * optimization would be to read-ahead four byte here if
-		 * cstate->copy_src != COPY_OLD_FE, but it hardly seems worth it,
-		 * considering the size of the buffer.
+		 * Load more data if needed.
+		 *
+		 * We look ahead max three bytes in the code below (for the sequence
+		 * \.<CR><NL>).  Make sure we have at least four bytes in the buffer,
+		 * so that the rest of the code in the loop can just assume that the
+		 * data is in the buffer.  Note that we always guarantee that there is
+		 * one \0 in the buffer, after last valid byte; the lookahead code
+		 * below relies on that.
 		 */
-		if (raw_buf_ptr >= copy_buf_len || need_data)
+#define COPY_READ_LINE_LOOKAHEAD	4
+		if (raw_buf_ptr + COPY_READ_LINE_LOOKAHEAD >= copy_buf_len)
 		{
-			REFILL_LINEBUF;
+			if (!hit_eof)
+			{
+				REFILL_LINEBUF;
 
-			/*
-			 * Try to read some more data.  This will certainly reset
-			 * raw_buf_index to zero, and raw_buf_ptr must go with it.
-			 */
-			if (!CopyLoadRawBuf(cstate))
-				hit_eof = true;
-			raw_buf_ptr = 0;
-			copy_buf_len = cstate->raw_buf_len;
+				/*
+				 * Try to read some more data.  This will certainly reset
+				 * raw_buf_index to zero, and raw_buf_ptr must go with it.
+				 */
+				if (!CopyLoadRawBuf(cstate, COPY_READ_LINE_LOOKAHEAD))
+					hit_eof = true;
+				raw_buf_ptr = 0;
+				copy_buf_len = cstate->raw_buf_len;
+			}
 
 			/*
 			 * If we are completely out of data, break out of the loop,
 			 * reporting EOF.
 			 */
-			if (copy_buf_len <= 0)
+			if (copy_buf_len - raw_buf_ptr <= 0)
 			{
 				result = true;
 				break;
 			}
-			need_data = false;
 		}
 
 		/* OK to fetch a character */
@@ -880,20 +842,6 @@ CopyReadLineText(CopyFromState cstate)
 
 		if (cstate->opts.csv_mode)
 		{
-			/*
-			 * If character is '\\' or '\r', we may need to look ahead below.
-			 * Force fetch of the next character if we don't already have it.
-			 * We need to do this before changing CSV state, in case one of
-			 * these characters is also the quote or escape character.
-			 *
-			 * Note: old-protocol does not like forced prefetch, but it's OK
-			 * here since we cannot validly be at EOF.
-			 */
-			if (c == '\\' || c == '\r')
-			{
-				IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
-			}
-
 			/*
 			 * Dealing with quotes and escapes here is mildly tricky. If the
 			 * quote char is also the escape char, there's no problem - we
@@ -927,14 +875,9 @@ CopyReadLineText(CopyFromState cstate)
 				cstate->eol_type == EOL_CRNL)
 			{
 				/*
-				 * If need more data, go back to loop top to load it.
-				 *
-				 * Note that if we are at EOF, c will wind up as '\0' because
-				 * of the guaranteed pad of raw_buf.
+				 * Look at the next character.  If we're at EOF, c2 will wind up as
+				 * '\0' because of the guaranteed pad of raw_buf.
 				 */
-				IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
-
-				/* get next char */
 				c = copy_raw_buf[raw_buf_ptr];
 
 				if (c == '\n')
@@ -1000,7 +943,6 @@ CopyReadLineText(CopyFromState cstate)
 		{
 			char		c2;
 
-			IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
 			IF_NEED_REFILL_AND_EOF_BREAK(0);
 
 			/* -----
@@ -1015,15 +957,8 @@ CopyReadLineText(CopyFromState cstate)
 			{
 				raw_buf_ptr++;	/* consume the '.' */
 
-				/*
-				 * Note: if we loop back for more data here, it does not
-				 * matter that the CSV state change checks are re-executed; we
-				 * will come back here with no important state changed.
-				 */
 				if (cstate->eol_type == EOL_CRNL)
 				{
-					/* Get the next character */
-					IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
 					/* if hit_eof, c2 will become '\0' */
 					c2 = copy_raw_buf[raw_buf_ptr++];
 
@@ -1047,8 +982,6 @@ CopyReadLineText(CopyFromState cstate)
 					}
 				}
 
-				/* Get the next character */
-				IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
 				/* if hit_eof, c2 will become '\0' */
 				c2 = copy_raw_buf[raw_buf_ptr++];
 
@@ -1126,7 +1059,6 @@ not_end_of_copy:
 			mblen_str[0] = c;
 			mblen = pg_encoding_mblen(cstate->file_encoding, mblen_str);
 
-			IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(mblen - 1);
 			IF_NEED_REFILL_AND_EOF_BREAK(mblen - 1);
 			raw_buf_ptr += mblen - 1;
 		}
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index e04ec1e331b..edbd5d83a0f 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -162,7 +162,7 @@ SendCopyBegin(CopyToState cstate)
 		if (cstate->opts.binary)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("COPY BINARY is not supported to stdout or from stdin")));
+					 errmsg("COPY BINARY is not supported to stdout or from stdin in protocol version 2")));
 		pq_putemptymessage('H');
 		/* grottiness needed for old COPY OUT protocol */
 		pq_startcopyout();
diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h
index e37942df391..afa70326137 100644
--- a/src/include/commands/copyfrom_internal.h
+++ b/src/include/commands/copyfrom_internal.h
@@ -24,7 +24,7 @@
 typedef enum CopySource
 {
 	COPY_FILE,					/* from file (or a piped program) */
-	COPY_OLD_FE,				/* from frontend (2.0 protocol) */
+	/* protocol version 2 not supported with COPY FROM */
 	COPY_NEW_FE,				/* from frontend (3.0 protocol) */
 	COPY_CALLBACK				/* from callback function */
 } CopySource;
@@ -71,8 +71,7 @@ typedef struct CopyFromStateData
 	CopySource	copy_src;		/* type of copy source */
 	FILE	   *copy_file;		/* used if copy_src == COPY_FILE */
 	StringInfo	fe_msgbuf;		/* used if copy_src == COPY_NEW_FE */
-	bool		reached_eof;	/* true if we read to end of copy data (not
-								 * all copy_src types maintain this) */
+	bool		reached_eof;	/* true if we read to end of copy data */
 
 	EolType		eol_type;		/* EOL type of input */
 	int			file_encoding;	/* file or remote side's character encoding */
-- 
2.30.0

Reply via email to