While analyzing a customer's performance problem, I noticed that
the performance of pg_dump for large arrays is terrible.

As a test case, I created a table with 10000 rows, each of which
had an array of 10000 uuids.  The table resided in shared buffers.

The following took 24.5 seconds:

  COPY mytab TO '/dev/null';

Most of the time was spent in array_out and uuid_out.

I tried binary copy, which took 4.4 seconds:

  COPY mytab TO '/dev/null' (FORMAT 'binary');

Here, a lot of time was spent in pq_begintypsend.


So I looked for low-hanging fruit, and the result is the attached
patch series.

- Patch 0001 speeds up pq_begintypsend with a custom macro.
  That brought the binary copy down to 3.7 seconds, which is a
  speed gain of 15%.

- Patch 0001 speeds up uuid_out by avoiding the overhead of
  a Stringinfo.  This brings text mode COPY to 19.4 seconds,
  which is speed gain of 21%.

- Patch 0003 speeds up array_out a bit by avoiding some zero
  byte writes.  The measured speed gain is under 2%.

Yours,
Laurenz Albe
From eef8fb5d5a567a1731d8eb6ae24f32a9a0879028 Mon Sep 17 00:00:00 2001
From: Laurenz Albe <laurenz.a...@cybertec.at>
Date: Sat, 17 Feb 2024 17:12:40 +0100
Subject: [PATCH v1 1/3] Speed up binary COPY TO

Performance analysis shows that a lot of time is spent
in pq_begintypsend, so speeding that up will boost the
performance of binary COPY TO considerably.

Invent a new macro to initialize a StringInfo and fill
the first four bytes with zeros.  This macro is only used
in pq_begintypsend, but we had better keep implementation
details of StringInfo in stringinfo.h.
---
 src/backend/libpq/pqformat.c |  6 +-----
 src/include/lib/stringinfo.h | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/backend/libpq/pqformat.c b/src/backend/libpq/pqformat.c
index a697ccfbbf..255dd4e5e3 100644
--- a/src/backend/libpq/pqformat.c
+++ b/src/backend/libpq/pqformat.c
@@ -328,12 +328,8 @@ pq_endmessage_reuse(StringInfo buf)
 void
 pq_begintypsend(StringInfo buf)
 {
-	initStringInfo(buf);
 	/* Reserve four bytes for the bytea length word */
-	appendStringInfoCharMacro(buf, '\0');
-	appendStringInfoCharMacro(buf, '\0');
-	appendStringInfoCharMacro(buf, '\0');
-	appendStringInfoCharMacro(buf, '\0');
+	initStringInfoWith4Zeros(buf);
 }
 
 /* --------------------------------
diff --git a/src/include/lib/stringinfo.h b/src/include/lib/stringinfo.h
index 2cd636b01c..db38b6d3f7 100644
--- a/src/include/lib/stringinfo.h
+++ b/src/include/lib/stringinfo.h
@@ -112,6 +112,20 @@ extern StringInfo makeStringInfo(void);
  */
 extern void initStringInfo(StringInfo str);
 
+/*------------------------
+ * initStringInfoWith4Zeros
+ * As above, but append four zero bytes.
+ */
+#define initStringInfoWith4Zeros(str) \
+	{ \
+		initStringInfo(str); \
+		(str)->data[0] = '\0'; \
+		(str)->data[1] = '\0'; \
+		(str)->data[2] = '\0'; \
+		(str)->data[3] = '\0'; \
+		(str)->len = 4; \
+	}
+
 /*------------------------
  * initReadOnlyStringInfo
  * Initialize a StringInfoData struct from an existing string without copying
-- 
2.43.2

From de87d249e3f55c38062e563821af9fa32d15e341 Mon Sep 17 00:00:00 2001
From: Laurenz Albe <laurenz.a...@cybertec.at>
Date: Sat, 17 Feb 2024 17:23:39 +0100
Subject: [PATCH v1 2/3] Speed up uuid_out

Since the size of the string representation of an uuid is
fixed, there is no benefit in using a StringInfo.
Avoiding the overhead makes the function substantially faster.
---
 src/backend/utils/adt/uuid.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c
index 73dfd711c7..b48bbf01e1 100644
--- a/src/backend/utils/adt/uuid.c
+++ b/src/backend/utils/adt/uuid.c
@@ -53,10 +53,11 @@ uuid_out(PG_FUNCTION_ARGS)
 {
 	pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
 	static const char hex_chars[] = "0123456789abcdef";
-	StringInfoData buf;
+	char	   *buf, *p;
 	int			i;
 
-	initStringInfo(&buf);
+	buf = palloc(2 * UUID_LEN + 5);
+	p = buf;
 	for (i = 0; i < UUID_LEN; i++)
 	{
 		int			hi;
@@ -68,16 +69,17 @@ uuid_out(PG_FUNCTION_ARGS)
 		 * ("-"). Therefore, add the hyphens at the appropriate places here.
 		 */
 		if (i == 4 || i == 6 || i == 8 || i == 10)
-			appendStringInfoChar(&buf, '-');
+			*p++ = '-';
 
 		hi = uuid->data[i] >> 4;
 		lo = uuid->data[i] & 0x0F;
 
-		appendStringInfoChar(&buf, hex_chars[hi]);
-		appendStringInfoChar(&buf, hex_chars[lo]);
+		*p++ = hex_chars[hi];
+		*p++ = hex_chars[lo];
 	}
+	*p = '\0';
 
-	PG_RETURN_CSTRING(buf.data);
+	PG_RETURN_CSTRING(buf);
 }
 
 /*
-- 
2.43.2

From e8346ea88785a763d2bd3f99800ae928b7469f64 Mon Sep 17 00:00:00 2001
From: Laurenz Albe <laurenz.a...@cybertec.at>
Date: Sat, 17 Feb 2024 17:24:19 +0100
Subject: [PATCH v1 3/3] Small speedup for array_out

Avoid writing zero bytes where it is not necessary.
This offers only a small, but measurable speed gain
for larger arrays.
---
 src/backend/utils/adt/arrayfuncs.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index f3fee54e37..306c5062f7 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -1200,7 +1200,7 @@ array_out(PG_FUNCTION_ARGS)
 	p = retval;
 
 #define APPENDSTR(str)	(strcpy(p, (str)), p += strlen(p))
-#define APPENDCHAR(ch)	(*p++ = (ch), *p = '\0')
+#define APPENDCHAR(ch)	(*p++ = (ch))
 
 	if (needdims)
 		APPENDSTR(dims_str);
@@ -1222,10 +1222,9 @@ array_out(PG_FUNCTION_ARGS)
 				char		ch = *tmp;
 
 				if (ch == '"' || ch == '\\')
-					*p++ = '\\';
-				*p++ = ch;
+					APPENDCHAR('\\');
+				APPENDCHAR(ch);
 			}
-			*p = '\0';
 			APPENDCHAR('"');
 		}
 		else
@@ -1248,6 +1247,8 @@ array_out(PG_FUNCTION_ARGS)
 		j = i;
 	} while (j != -1);
 
+	*p = '\0';
+
 #undef APPENDSTR
 #undef APPENDCHAR
 
-- 
2.43.2

Reply via email to