Hello,

On Wed, Jan 21, 2026 at 9:50 PM Neil Conway <[email protected]> wrote:

> A few suggestions:
>
> * I'm curious if we'll see better performance on large inputs if we flush
> to `line_buf` periodically (e.g., at least every few thousand bytes or so).
> Otherwise we might see poor data cache behavior if large inputs with no
> control characters get evicted before we've copied them over. See the
> approach taken in escape_json_with_len() in utils/adt/json.c
>
> So i gave this a try, attached is the small patch that has v3 + the
suggestion added, here are the results with different threshold for
line_buf refill:

Execution time compared to master:
Workload v3 v3.1 (2k) v3.1 (4k) v3.1 (8k) v3.1 (16k) v3.1 (20k) v3.1 (28k)
text/none -16.5% -17.4% -14.3% -12.6% -13.6% -10.5% -16.3%
text/esc +5.6% +11.1% +3.1% +7.6% +3.0% +4.9% +4.2%
csv/none -31.0% -29.9% -26.7% -30.1% -27.9% -30.2% -29.6%
csv/quote +0.2% -0.6% -0.4% -1.0% +0.1% +2.5% -1.0%

L1d cache miss rates:
Workload Master v3 v3.1 (2k) v3.1 (4k) v3.1 (8k) v3.1 (16k) v3.1 (20k) v3.1
(28k)
text/none 0.20% 0.23% 0.21% 0.22% 0.21% 0.21% 0.21% 0.22%
text/esc 0.21% 0.22% 0.22% 0.22% 0.22% 0.21% 0.22% 0.22%
csv/none 0.17% 0.22% 0.21% 0.22% 0.21% 0.21% 0.22% 0.22%
csv/quote 0.18% 0.22% 0.19% 0.20% 0.20% 0.19% 0.20% 0.20%
On my laptop I have 32KB L1 cache per core.
Results are super close, it is hard to see in the cache misses numbers but
execution times are saying other things, doing the periodic filling of
line_buf seems good to do.
If Manni can rerun the benchmarks on these too, it would be nice to confirm
this.


Regards,
Ayoub
From 40230259b9c867352094d868f0f34cc6fd418c15 Mon Sep 17 00:00:00 2001
From: AyoubKAZ <[email protected]>
Date: Sat, 31 Jan 2026 16:23:35 +0100
Subject: [PATCH] COPY from SIMD v3 with line_buf periodic refill

---
 src/backend/commands/copyfrom.c          |   6 +
 src/backend/commands/copyfromparse.c     | 162 ++++++++++++++++++++++-
 src/include/commands/copyfrom_internal.h |   7 +
 3 files changed, 170 insertions(+), 5 deletions(-)

diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 25ee20b23db..3ef81201008 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1721,6 +1721,12 @@ BeginCopyFrom(ParseState *pstate,
 	cstate->cur_attval = NULL;
 	cstate->relname_only = false;
 
+	/* Initialize SIMD variables */
+	cstate->simd_continue = false;
+	cstate->simd_initialized = false;
+	cstate->simd_current_sleep_cycle = 0;
+	cstate->simd_last_sleep_cycle = 0;
+
 	/*
 	 * Allocate buffers for the input pipeline.
 	 *
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 5868a7fa11f..3694b814a85 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -71,7 +71,9 @@
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "pgstat.h"
+#include "port/pg_bitutils.h"
 #include "port/pg_bswap.h"
+#include "port/simd.h"
 #include "utils/builtins.h"
 #include "utils/rel.h"
 
@@ -135,18 +137,22 @@ if (1) \
 	} \
 } else ((void) 0)
 
+#define LINE_BUF_FLUSH_AFTER 4096
+
 /* NOTE: there's a copy of this in copyto.c */
 static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
 
 
 /* non-export function prototypes */
 static bool CopyReadLine(CopyFromState cstate, bool is_csv);
-static bool CopyReadLineText(CopyFromState cstate, bool is_csv);
 static int	CopyReadAttributesText(CopyFromState cstate);
 static int	CopyReadAttributesCSV(CopyFromState cstate);
 static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
 									 Oid typioparam, int32 typmod,
 									 bool *isnull);
+static pg_attribute_always_inline bool CopyReadLineText(CopyFromState cstate,
+														bool is_csv,
+														bool simd_continue);
 static pg_attribute_always_inline bool CopyFromTextLikeOneRow(CopyFromState cstate,
 															  ExprContext *econtext,
 															  Datum *values,
@@ -1171,8 +1177,23 @@ CopyReadLine(CopyFromState cstate, bool is_csv)
 	resetStringInfo(&cstate->line_buf);
 	cstate->line_buf_valid = false;
 
-	/* Parse data and transfer into line_buf */
-	result = CopyReadLineText(cstate, is_csv);
+	/* If that is the first time we do read, initalize the SIMD */
+	if (unlikely(!cstate->simd_initialized))
+	{
+		cstate->simd_initialized = true;
+		cstate->simd_continue = true;
+		cstate->simd_current_sleep_cycle = 0;
+		cstate->simd_last_sleep_cycle = 0;
+	}
+
+	/*
+	 * Parse data and transfer into line_buf. To get benefit from inlining,
+	 * call CopyReadLineText() with the constant boolean variables.
+	 */
+	if (cstate->simd_continue)
+		result = CopyReadLineText(cstate, is_csv, true);
+	else
+		result = CopyReadLineText(cstate, is_csv, false);
 
 	if (result)
 	{
@@ -1239,8 +1260,8 @@ CopyReadLine(CopyFromState cstate, bool is_csv)
 /*
  * CopyReadLineText - inner loop of CopyReadLine for text mode
  */
-static bool
-CopyReadLineText(CopyFromState cstate, bool is_csv)
+static pg_attribute_always_inline bool
+CopyReadLineText(CopyFromState cstate, bool is_csv, bool simd_continue)
 {
 	char	   *copy_input_buf;
 	int			input_buf_ptr;
@@ -1255,6 +1276,19 @@ CopyReadLineText(CopyFromState cstate, bool is_csv)
 	char		quotec = '\0';
 	char		escapec = '\0';
 
+#ifndef USE_NO_SIMD
+#define SIMD_SLEEP_MAX 1024
+#define SIMD_ADVANCE_AT_LEAST 5
+	Vector8		nl = vector8_broadcast('\n');
+	Vector8		cr = vector8_broadcast('\r');
+	Vector8		bs = vector8_broadcast('\\');
+	Vector8		quote = vector8_broadcast(0);
+	Vector8		escape = vector8_broadcast(0);
+
+	uint64		simd_total_cycle = 0;
+	uint64		simd_total_advance = 0;
+#endif
+
 	if (is_csv)
 	{
 		quotec = cstate->opts.quote[0];
@@ -1262,6 +1296,12 @@ CopyReadLineText(CopyFromState cstate, bool is_csv)
 		/* ignore special escape processing if it's the same as quotec */
 		if (quotec == escapec)
 			escapec = '\0';
+
+#ifndef USE_NO_SIMD
+		quote = vector8_broadcast(quotec);
+		if (quotec != escapec)
+			escape = vector8_broadcast(escapec);
+#endif
 	}
 
 	/*
@@ -1328,6 +1368,74 @@ CopyReadLineText(CopyFromState cstate, bool is_csv)
 			need_data = false;
 		}
 
+#ifndef USE_NO_SIMD
+
+		/*
+		 * Use SIMD instructions to efficiently scan the input buffer for
+		 * special characters (e.g., newline, carriage return, quote, and
+		 * escape). This is faster than byte-by-byte iteration, especially on
+		 * large buffers.
+		 *
+		 * We do not apply the SIMD fast path in either of the following
+		 * cases: - When the previously processed character was an escape
+		 * character (last_was_esc), since the next byte must be examined
+		 * sequentially. - The remaining buffer is smaller than one vector
+		 * width (sizeof(Vector8)); SIMD operates on fixed-size chunks.
+		 */
+		if (simd_continue && !last_was_esc && copy_buf_len - input_buf_ptr >= sizeof(Vector8))
+		{
+			Vector8		chunk;
+			Vector8		match = vector8_broadcast(0);
+			uint32		mask;
+
+			simd_total_cycle++;
+
+			/* Load a chunk of data into a vector register */
+			vector8_load(&chunk, (const uint8 *) &copy_input_buf[input_buf_ptr]);
+
+			/* \n and \r are not special inside quotes */
+			if (!in_quote)
+				match = vector8_or(vector8_eq(chunk, nl), vector8_eq(chunk, cr));
+
+			if (is_csv)
+			{
+				match = vector8_or(match, vector8_eq(chunk, quote));
+				if (escapec != '\0')
+					match = vector8_or(match, vector8_eq(chunk, escape));
+			}
+			else
+				match = vector8_or(match, vector8_eq(chunk, bs));
+
+			/* Check if we found any special characters */
+			mask = vector8_highbit_mask(match);
+			if (mask != 0)
+			{
+				/*
+				 * Found a special character. Advance up to that point and let
+				 * the scalar code handle it.
+				 */
+				int			advance = pg_rightmost_one_pos32(mask);
+
+				input_buf_ptr += advance;
+				simd_total_advance += advance;
+			}
+			else
+			{
+				/* No special characters found, so skip the entire chunk */
+				input_buf_ptr += sizeof(Vector8);
+				simd_total_advance += sizeof(Vector8);
+
+				// /* Periodically flush to line_buf to avoid L1D cache misses */
+				if (input_buf_ptr - cstate->input_buf_index >= LINE_BUF_FLUSH_AFTER)
+				{
+					REFILL_LINEBUF;
+				}
+
+				continue;
+			}
+		}
+#endif
+
 		/* OK to fetch a character */
 		prev_raw_ptr = input_buf_ptr;
 		c = copy_input_buf[input_buf_ptr++];
@@ -1530,6 +1638,50 @@ CopyReadLineText(CopyFromState cstate, bool is_csv)
 		}
 	}							/* end of outer loop */
 
+#ifndef USE_NO_SIMD
+
+	/* SIMD was enabled */
+	if (simd_continue)
+	{
+		/* SIMD is worth */
+		if (simd_total_cycle && simd_total_advance / simd_total_cycle >= SIMD_ADVANCE_AT_LEAST)
+		{
+			Assert(cstate->simd_current_sleep_cycle == 0);
+			cstate->simd_last_sleep_cycle >>= 1;
+		}
+		/* SIMD was enabled but it isn't worth */
+		else
+		{
+			uint16		simd_last_sleep_cycle = cstate->simd_last_sleep_cycle;
+
+			cstate->simd_continue = false;
+
+			if (simd_last_sleep_cycle == 0)
+				simd_last_sleep_cycle = 1;
+			else if (simd_last_sleep_cycle >= SIMD_SLEEP_MAX / 2)
+				simd_last_sleep_cycle = SIMD_SLEEP_MAX;
+			else
+				simd_last_sleep_cycle <<= 1;
+			cstate->simd_current_sleep_cycle = simd_last_sleep_cycle;
+			cstate->simd_last_sleep_cycle = simd_last_sleep_cycle;
+		}
+	}
+	/* SIMD was disabled */
+	else
+	{
+		/*
+		 * We should come here with decrementing
+		 * cstate->simd_current_sleep_cycle from a positive number.
+		 */
+		Assert(cstate->simd_current_sleep_cycle != 0);
+		cstate->simd_current_sleep_cycle--;
+
+		if (cstate->simd_current_sleep_cycle == 0)
+			cstate->simd_continue = true;
+	}
+
+#endif
+
 	/*
 	 * Transfer any still-uncopied data to line_buf.
 	 */
diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h
index 822ef33cf69..0d72e03c999 100644
--- a/src/include/commands/copyfrom_internal.h
+++ b/src/include/commands/copyfrom_internal.h
@@ -89,6 +89,13 @@ typedef struct CopyFromStateData
 	const char *cur_attval;		/* current att value for error messages */
 	bool		relname_only;	/* don't output line number, att, etc. */
 
+	/* SIMD variables */
+	bool		simd_continue;
+	bool		simd_initialized;
+	uint16		simd_last_sleep_cycle;
+	uint16		simd_current_sleep_cycle;
+
+
 	/*
 	 * Working state
 	 */
-- 
2.34.1

Reply via email to