While analyzing a customer's performance problem, I noticed that the performance of pg_dump for large arrays is terrible.
As a test case, I created a table with 10000 rows, each of which had an array of 10000 uuids. The table resided in shared buffers. The following took 24.5 seconds: COPY mytab TO '/dev/null'; Most of the time was spent in array_out and uuid_out. I tried binary copy, which took 4.4 seconds: COPY mytab TO '/dev/null' (FORMAT 'binary'); Here, a lot of time was spent in pq_begintypsend. So I looked for low-hanging fruit, and the result is the attached patch series. - Patch 0001 speeds up pq_begintypsend with a custom macro. That brought the binary copy down to 3.7 seconds, which is a speed gain of 15%. - Patch 0001 speeds up uuid_out by avoiding the overhead of a Stringinfo. This brings text mode COPY to 19.4 seconds, which is speed gain of 21%. - Patch 0003 speeds up array_out a bit by avoiding some zero byte writes. The measured speed gain is under 2%. Yours, Laurenz Albe
From eef8fb5d5a567a1731d8eb6ae24f32a9a0879028 Mon Sep 17 00:00:00 2001 From: Laurenz Albe <laurenz.a...@cybertec.at> Date: Sat, 17 Feb 2024 17:12:40 +0100 Subject: [PATCH v1 1/3] Speed up binary COPY TO Performance analysis shows that a lot of time is spent in pq_begintypsend, so speeding that up will boost the performance of binary COPY TO considerably. Invent a new macro to initialize a StringInfo and fill the first four bytes with zeros. This macro is only used in pq_begintypsend, but we had better keep implementation details of StringInfo in stringinfo.h. --- src/backend/libpq/pqformat.c | 6 +----- src/include/lib/stringinfo.h | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/backend/libpq/pqformat.c b/src/backend/libpq/pqformat.c index a697ccfbbf..255dd4e5e3 100644 --- a/src/backend/libpq/pqformat.c +++ b/src/backend/libpq/pqformat.c @@ -328,12 +328,8 @@ pq_endmessage_reuse(StringInfo buf) void pq_begintypsend(StringInfo buf) { - initStringInfo(buf); /* Reserve four bytes for the bytea length word */ - appendStringInfoCharMacro(buf, '\0'); - appendStringInfoCharMacro(buf, '\0'); - appendStringInfoCharMacro(buf, '\0'); - appendStringInfoCharMacro(buf, '\0'); + initStringInfoWith4Zeros(buf); } /* -------------------------------- diff --git a/src/include/lib/stringinfo.h b/src/include/lib/stringinfo.h index 2cd636b01c..db38b6d3f7 100644 --- a/src/include/lib/stringinfo.h +++ b/src/include/lib/stringinfo.h @@ -112,6 +112,20 @@ extern StringInfo makeStringInfo(void); */ extern void initStringInfo(StringInfo str); +/*------------------------ + * initStringInfoWith4Zeros + * As above, but append four zero bytes. + */ +#define initStringInfoWith4Zeros(str) \ + { \ + initStringInfo(str); \ + (str)->data[0] = '\0'; \ + (str)->data[1] = '\0'; \ + (str)->data[2] = '\0'; \ + (str)->data[3] = '\0'; \ + (str)->len = 4; \ + } + /*------------------------ * initReadOnlyStringInfo * Initialize a StringInfoData struct from an existing string without copying -- 2.43.2
From de87d249e3f55c38062e563821af9fa32d15e341 Mon Sep 17 00:00:00 2001 From: Laurenz Albe <laurenz.a...@cybertec.at> Date: Sat, 17 Feb 2024 17:23:39 +0100 Subject: [PATCH v1 2/3] Speed up uuid_out Since the size of the string representation of an uuid is fixed, there is no benefit in using a StringInfo. Avoiding the overhead makes the function substantially faster. --- src/backend/utils/adt/uuid.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c index 73dfd711c7..b48bbf01e1 100644 --- a/src/backend/utils/adt/uuid.c +++ b/src/backend/utils/adt/uuid.c @@ -53,10 +53,11 @@ uuid_out(PG_FUNCTION_ARGS) { pg_uuid_t *uuid = PG_GETARG_UUID_P(0); static const char hex_chars[] = "0123456789abcdef"; - StringInfoData buf; + char *buf, *p; int i; - initStringInfo(&buf); + buf = palloc(2 * UUID_LEN + 5); + p = buf; for (i = 0; i < UUID_LEN; i++) { int hi; @@ -68,16 +69,17 @@ uuid_out(PG_FUNCTION_ARGS) * ("-"). Therefore, add the hyphens at the appropriate places here. */ if (i == 4 || i == 6 || i == 8 || i == 10) - appendStringInfoChar(&buf, '-'); + *p++ = '-'; hi = uuid->data[i] >> 4; lo = uuid->data[i] & 0x0F; - appendStringInfoChar(&buf, hex_chars[hi]); - appendStringInfoChar(&buf, hex_chars[lo]); + *p++ = hex_chars[hi]; + *p++ = hex_chars[lo]; } + *p = '\0'; - PG_RETURN_CSTRING(buf.data); + PG_RETURN_CSTRING(buf); } /* -- 2.43.2
From e8346ea88785a763d2bd3f99800ae928b7469f64 Mon Sep 17 00:00:00 2001 From: Laurenz Albe <laurenz.a...@cybertec.at> Date: Sat, 17 Feb 2024 17:24:19 +0100 Subject: [PATCH v1 3/3] Small speedup for array_out Avoid writing zero bytes where it is not necessary. This offers only a small, but measurable speed gain for larger arrays. --- src/backend/utils/adt/arrayfuncs.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index f3fee54e37..306c5062f7 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -1200,7 +1200,7 @@ array_out(PG_FUNCTION_ARGS) p = retval; #define APPENDSTR(str) (strcpy(p, (str)), p += strlen(p)) -#define APPENDCHAR(ch) (*p++ = (ch), *p = '\0') +#define APPENDCHAR(ch) (*p++ = (ch)) if (needdims) APPENDSTR(dims_str); @@ -1222,10 +1222,9 @@ array_out(PG_FUNCTION_ARGS) char ch = *tmp; if (ch == '"' || ch == '\\') - *p++ = '\\'; - *p++ = ch; + APPENDCHAR('\\'); + APPENDCHAR(ch); } - *p = '\0'; APPENDCHAR('"'); } else @@ -1248,6 +1247,8 @@ array_out(PG_FUNCTION_ARGS) j = i; } while (j != -1); + *p = '\0'; + #undef APPENDSTR #undef APPENDCHAR -- 2.43.2