On Tue, Aug 15, 2023 at 01:53:25PM +0700, John Naylor wrote:
> If we're not going to have a general SQL conversion function, here are some
> comments on the current patch.
I appreciate the review.
> +static char *convert_to_base(uint64 value, int base);
>
> Not needed if the definition is above the callers.
Done.
> + * Workhorse for to_binary, to_oct, and to_hex. Note that base must be
> either
> + * 2, 8, or 16.
>
> Why wouldn't it work with any base <= 16?
You're right. I changed this in v5.
> - *ptr = '\0';
> + Assert(base == 2 || base == 8 || base == 16);
>
> + *ptr = '\0';
>
> Spurious whitespace change?
I think this might just be a weird artifact of the diff algorithm.
> - char buf[32]; /* bigger than needed, but reasonable */
> + char *buf = palloc(sizeof(uint64) * BITS_PER_BYTE + 1);
>
> Why is this no longer allocated on the stack? Maybe needs a comment about
> the size calculation.
It really should be. IIRC I wanted to avoid passing a pre-allocated buffer
to convert_to_base(), but I don't remember why. I fixed this in v5.
> +static char *
> +convert_to_base(uint64 value, int base)
>
> On my machine this now requires a function call and a DIV instruction, even
> though the patch claims not to support anything but a power of two. It's
> tiny enough to declare it inline so the compiler can specialize for each
> call site.
This was on my list of things to check before committing. I assumed that
it would be automatically inlined, but given your analysis, I went ahead
and added the inline keyword. My compiler took the hint and inlined the
function, and it used SHR instead of DIV instructions. The machine code
for to_hex32/64 is still a couple of instructions longer than before
(probably because of the uint64 casts), but I don't know if we need to
worry about micro-optimizing these functions any further.
> +{ oid => '5101', descr => 'convert int4 number to binary',
>
> Needs to be over 8000.
Done.
--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com
>From 2ad83f8781a013ef5e5e5a62a422f3b73c6c5f2d Mon Sep 17 00:00:00 2001
From: Nathan Bossart <[email protected]>
Date: Tue, 25 Jul 2023 16:09:01 -0700
Subject: [PATCH v5 1/1] add to_binary() and to_oct()
---
doc/src/sgml/func.sgml | 42 +++++++++++
src/backend/utils/adt/varlena.c | 103 +++++++++++++++++++-------
src/include/catalog/pg_proc.dat | 12 +++
src/test/regress/expected/strings.out | 26 ++++++-
src/test/regress/sql/strings.sql | 9 ++-
5 files changed, 163 insertions(+), 29 deletions(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index be2f54c914..23b5ac7457 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -3737,6 +3737,48 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>to_binary</primary>
+ </indexterm>
+ <function>to_binary</function> ( <type>integer</type> )
+ <returnvalue>text</returnvalue>
+ </para>
+ <para role="func_signature">
+ <function>to_binary</function> ( <type>bigint</type> )
+ <returnvalue>text</returnvalue>
+ </para>
+ <para>
+ Converts the number to its equivalent binary representation.
+ </para>
+ <para>
+ <literal>to_binary(2147483647)</literal>
+ <returnvalue>1111111111111111111111111111111</returnvalue>
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>to_oct</primary>
+ </indexterm>
+ <function>to_oct</function> ( <type>integer</type> )
+ <returnvalue>text</returnvalue>
+ </para>
+ <para role="func_signature">
+ <function>to_oct</function> ( <type>bigint</type> )
+ <returnvalue>text</returnvalue>
+ </para>
+ <para>
+ Converts the number to its equivalent octal representation.
+ </para>
+ <para>
+ <literal>to_oct(2147483647)</literal>
+ <returnvalue>17777777777</returnvalue>
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm>
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index b1ec5c32ce..b955e54611 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -4919,54 +4919,105 @@ array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
return result;
}
-#define HEXBASE 16
/*
- * Convert an int32 to a string containing a base 16 (hex) representation of
- * the number.
+ * We size the buffer for to_binary's longest possible return value, including
+ * the terminating '\0'.
*/
-Datum
-to_hex32(PG_FUNCTION_ARGS)
+#define CONVERT_TO_BASE_BUFSIZE (sizeof(uint64) * BITS_PER_BYTE + 1)
+
+/*
+ * Workhorse for to_binary, to_oct, and to_hex. Note that base must be > 1 and
+ * <= 16. buf must be at least CONVERT_TO_BASE_BUFSIZE bytes.
+ */
+static inline char *
+convert_to_base(uint64 value, int base, char *buf)
{
- uint32 value = (uint32) PG_GETARG_INT32(0);
- char *ptr;
const char *digits = "0123456789abcdef";
- char buf[32]; /* bigger than needed, but reasonable */
+ char *ptr = buf + CONVERT_TO_BASE_BUFSIZE - 1;
- ptr = buf + sizeof(buf) - 1;
- *ptr = '\0';
+ Assert(base > 1);
+ Assert(base <= 16);
+ *ptr = '\0';
do
{
- *--ptr = digits[value % HEXBASE];
- value /= HEXBASE;
+ *--ptr = digits[value % base];
+ value /= base;
} while (ptr > buf && value);
- PG_RETURN_TEXT_P(cstring_to_text(ptr));
+ return ptr;
+}
+
+/*
+ * Convert an integer to a string containing a base-2 (binary) representation
+ * of the number.
+ */
+Datum
+to_binary32(PG_FUNCTION_ARGS)
+{
+ uint64 value = (uint64) PG_GETARG_INT32(0);
+ char buf[CONVERT_TO_BASE_BUFSIZE];
+ char *result = convert_to_base(value, 2, buf);
+
+ PG_RETURN_TEXT_P(cstring_to_text(result));
+}
+Datum
+to_binary64(PG_FUNCTION_ARGS)
+{
+ uint64 value = (uint64) PG_GETARG_INT64(0);
+ char buf[CONVERT_TO_BASE_BUFSIZE];
+ char *result = convert_to_base(value, 2, buf);
+
+ PG_RETURN_TEXT_P(cstring_to_text(result));
}
/*
- * Convert an int64 to a string containing a base 16 (hex) representation of
+ * Convert an integer to a string containing a base-8 (oct) representation of
* the number.
*/
Datum
-to_hex64(PG_FUNCTION_ARGS)
+to_oct32(PG_FUNCTION_ARGS)
+{
+ uint64 value = (uint64) PG_GETARG_INT32(0);
+ char buf[CONVERT_TO_BASE_BUFSIZE];
+ char *result = convert_to_base(value, 8, buf);
+
+ PG_RETURN_TEXT_P(cstring_to_text(result));
+}
+Datum
+to_oct64(PG_FUNCTION_ARGS)
{
uint64 value = (uint64) PG_GETARG_INT64(0);
- char *ptr;
- const char *digits = "0123456789abcdef";
- char buf[32]; /* bigger than needed, but reasonable */
+ char buf[CONVERT_TO_BASE_BUFSIZE];
+ char *result = convert_to_base(value, 8, buf);
- ptr = buf + sizeof(buf) - 1;
- *ptr = '\0';
+ PG_RETURN_TEXT_P(cstring_to_text(result));
+}
- do
- {
- *--ptr = digits[value % HEXBASE];
- value /= HEXBASE;
- } while (ptr > buf && value);
+/*
+ * Convert an integer to a string containing a base-16 (hex) representation of
+ * the number.
+ */
+Datum
+to_hex32(PG_FUNCTION_ARGS)
+{
+ uint64 value = (uint64) PG_GETARG_INT32(0);
+ char buf[CONVERT_TO_BASE_BUFSIZE];
+ char *result = convert_to_base(value, 16, buf);
- PG_RETURN_TEXT_P(cstring_to_text(ptr));
+ PG_RETURN_TEXT_P(cstring_to_text(result));
}
+Datum
+to_hex64(PG_FUNCTION_ARGS)
+{
+ uint64 value = (uint64) PG_GETARG_INT64(0);
+ char buf[CONVERT_TO_BASE_BUFSIZE];
+ char *result = convert_to_base(value, 16, buf);
+
+ PG_RETURN_TEXT_P(cstring_to_text(result));
+}
+
+#undef CONVERT_TO_BASE_BUFSIZE
/*
* Return the size of a datum, possibly compressed
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 6996073989..e046d91c38 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -3707,6 +3707,18 @@
{ oid => '2768', descr => 'split string by pattern',
proname => 'regexp_split_to_array', prorettype => '_text',
proargtypes => 'text text text', prosrc => 'regexp_split_to_array' },
+{ oid => '9030', descr => 'convert int4 number to binary',
+ proname => 'to_binary', prorettype => 'text', proargtypes => 'int4',
+ prosrc => 'to_binary32' },
+{ oid => '9031', descr => 'convert int8 number to binary',
+ proname => 'to_binary', prorettype => 'text', proargtypes => 'int8',
+ prosrc => 'to_binary64' },
+{ oid => '9032', descr => 'convert int4 number to oct',
+ proname => 'to_oct', prorettype => 'text', proargtypes => 'int4',
+ prosrc => 'to_oct32' },
+{ oid => '9033', descr => 'convert int8 number to oct',
+ proname => 'to_oct', prorettype => 'text', proargtypes => 'int8',
+ prosrc => 'to_oct64' },
{ oid => '2089', descr => 'convert int4 number to hex',
proname => 'to_hex', prorettype => 'text', proargtypes => 'int4',
prosrc => 'to_hex32' },
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 62698569e1..28c81ea6e4 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -2129,8 +2129,32 @@ select split_part('@joeuser@mydatabase@','@',-2) AS "mydatabase";
(1 row)
--
--- test to_hex
+-- test to_binary, to_oct, and to_hex
--
+select to_binary(256*256*256 - 1) AS "111111111111111111111111";
+ 111111111111111111111111
+--------------------------
+ 111111111111111111111111
+(1 row)
+
+select to_binary(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS "11111111111111111111111111111111";
+ 11111111111111111111111111111111
+----------------------------------
+ 11111111111111111111111111111111
+(1 row)
+
+select to_oct(256*256*256 - 1) AS "77777777";
+ 77777777
+----------
+ 77777777
+(1 row)
+
+select to_oct(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS "37777777777";
+ 37777777777
+-------------
+ 37777777777
+(1 row)
+
select to_hex(256*256*256 - 1) AS "ffffff";
ffffff
--------
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index ca32f6bba5..f7e35bfb41 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -685,10 +685,15 @@ select split_part('joeuser@mydatabase','@',-3) AS "empty string";
select split_part('@joeuser@mydatabase@','@',-2) AS "mydatabase";
--
--- test to_hex
+-- test to_binary, to_oct, and to_hex
--
-select to_hex(256*256*256 - 1) AS "ffffff";
+select to_binary(256*256*256 - 1) AS "111111111111111111111111";
+select to_binary(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS "11111111111111111111111111111111";
+
+select to_oct(256*256*256 - 1) AS "77777777";
+select to_oct(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS "37777777777";
+select to_hex(256*256*256 - 1) AS "ffffff";
select to_hex(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS "ffffffff";
--
--
2.25.1