On Sat, Jan 11, 2025 at 3:46 AM Nathan Bossart <[email protected]> wrote:
>
> I was able to get auto-vectorization to take effect on Apple clang 16 with
> the following addition to src/backend/utils/adt/Makefile:
>
> encode.o: CFLAGS += ${CFLAGS_VECTORIZE} -mllvm -force-vector-width=8
>
> This gave the following results with your hex_encode_test() function:
>
> buf | HEAD | patch | % diff
> -------+-------+-------+--------
> 16 | 21 | 16 | 24
> 64 | 54 | 41 | 24
> 256 | 138 | 100 | 28
> 1024 | 441 | 300 | 32
> 4096 | 1671 | 1106 | 34
> 16384 | 6890 | 4570 | 34
> 65536 | 27393 | 18054 | 34
We can do about as well simply by changing the nibble lookup to a byte
lookup, which works on every compiler and architecture:
select hex_encode_test(1000000, 1024);
master:
Time: 1158.700 ms
v2:
Time: 777.443 ms
If we need to do much better than this, it seems better to send the
data to the client as binary, if possible.
--
John Naylor
Amazon Web Services
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
index 4a6fcb56cd..8b059bc834 100644
--- a/src/backend/utils/adt/encode.c
+++ b/src/backend/utils/adt/encode.c
@@ -145,7 +145,7 @@ binary_decode(PG_FUNCTION_ARGS)
* HEX
*/
-static const char hextbl[] = "0123456789abcdef";
+static const char hextbl[512] = "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
static const int8 hexlookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
@@ -165,9 +165,8 @@ hex_encode(const char *src, size_t len, char *dst)
while (src < end)
{
- *dst++ = hextbl[(*src >> 4) & 0xF];
- *dst++ = hextbl[*src & 0xF];
- src++;
+ memcpy(dst, &hextbl[(* ((unsigned char *) src)) * 2], 2);
+ src++; dst+=2;
}
return (uint64) len * 2;
}