From 185072d5c64f6d252db1d3440f22dcca7ba2b424 Mon Sep 17 00:00:00 2001
From: Aleksander Alekseev <aleksander@timescale.com>
Date: Thu, 18 Jul 2024 12:59:40 +0300
Subject: [PATCH v1] Add crc32(text) & crc32(bytea)

Per several user requests.

Aleksander Alekseev, reviewed by TODO FIXME
Discussion: TODO FIXME
---
 doc/src/sgml/func.sgml                   | 36 ++++++++++
 src/backend/utils/adt/Makefile           |  1 +
 src/backend/utils/adt/hashfuncs.c        | 90 ++++++++++++++++++++++++
 src/backend/utils/adt/meson.build        |  1 +
 src/include/catalog/pg_proc.dat          |  8 +++
 src/test/regress/expected/opr_sanity.out |  2 +
 src/test/regress/expected/strings.out    | 27 +++++++
 src/test/regress/sql/strings.sql         | 10 +++
 8 files changed, 175 insertions(+)
 create mode 100644 src/backend/utils/adt/hashfuncs.c

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 3f93c61aa3..7943b1ee2a 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -3030,6 +3030,24 @@ SELECT NOT(ROW(table.*) IS NOT NULL) FROM TABLE; -- detect at least one null in
        </para></entry>
       </row>
 
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>crc32</primary>
+        </indexterm>
+        <function>crc32</function> ( <type>text</type> )
+        <returnvalue>text</returnvalue>
+       </para>
+       <para>
+        Computes the CRC32 <link linkend="functions-hash-note">hash</link> of
+        the argument, with the result written in hexadecimal.
+       </para>
+       <para>
+        <literal>crc32('PostgreSQL')</literal>
+        <returnvalue>cb97b83b</returnvalue>
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
@@ -4484,6 +4502,24 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
        </para></entry>
       </row>
 
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>crc32</primary>
+        </indexterm>
+        <function>crc32</function> ( <type>bytea</type> )
+        <returnvalue>text</returnvalue>
+       </para>
+       <para>
+        Computes the CRC32 <link linkend="functions-hash-note">hash</link> of
+        the binary string, with the result written in hexadecimal.
+       </para>
+       <para>
+        <literal>crc32('PostgreSQL' :: bytea)</literal>
+        <returnvalue>cb97b83b</returnvalue>
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile
index edb09d4e35..be84d68856 100644
--- a/src/backend/utils/adt/Makefile
+++ b/src/backend/utils/adt/Makefile
@@ -42,6 +42,7 @@ OBJS = \
 	geo_ops.o \
 	geo_selfuncs.o \
 	geo_spgist.o \
+	hashfuncs.o \
 	hbafuncs.o \
 	inet_cidr_ntop.o \
 	inet_net_pton.o \
diff --git a/src/backend/utils/adt/hashfuncs.c b/src/backend/utils/adt/hashfuncs.c
new file mode 100644
index 0000000000..13a5fe55fc
--- /dev/null
+++ b/src/backend/utils/adt/hashfuncs.c
@@ -0,0 +1,90 @@
+/*-------------------------------------------------------------------------
+ *
+ * hashfuncs.c
+ *	  Non-cryptographic hash functions
+ *
+ * Portions Copyright (c) 2024, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/adt/hashfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "utils/builtins.h"
+#include "utils/pg_crc.h"
+#include "varatt.h"
+
+/*
+ * Calculate CRC32 of the given data.
+ *
+ * Common code for crc32_text() and crc32_bytea().
+ */
+static unsigned int
+crc32_sz(const char *buf, int size)
+{
+	pg_crc32	crc;
+	const char *p = buf;
+
+	INIT_TRADITIONAL_CRC32(crc);
+	while (size > 0)
+	{
+		char		c = (char) (*p);
+
+		COMP_TRADITIONAL_CRC32(crc, &c, 1);
+		size--;
+		p++;
+	}
+	FIN_TRADITIONAL_CRC32(crc);
+	return (unsigned int) crc;
+}
+
+/*
+ * Create a CRC32 hash of a text value and return it as hex string.
+ */
+Datum
+crc32_text(PG_FUNCTION_ARGS)
+{
+	text	   *in_text = PG_GETARG_TEXT_PP(0);
+	size_t		len;
+	unsigned int hashsum;
+	char		result[16];
+
+	/* calculate the length of the buffer using varlena metadata */
+	len = VARSIZE_ANY_EXHDR(in_text);
+
+	/* get the hash result */
+	hashsum = crc32_sz(VARDATA_ANY(in_text), len);
+
+	/* format the hex string */
+	snprintf(result, sizeof(result), "%08x", hashsum);
+
+	/* convert to text and return it */
+	PG_RETURN_TEXT_P(cstring_to_text(result));
+}
+
+/*
+ * Create a CRC32 hash of a bytea value and return it as a hex string.
+ */
+Datum
+crc32_bytea(PG_FUNCTION_ARGS)
+{
+	bytea	   *in = PG_GETARG_BYTEA_PP(0);
+	size_t		len;
+	unsigned int hashsum;
+	char		result[16];
+
+	/* calculate the length of the buffer using varlena metadata */
+	len = VARSIZE_ANY_EXHDR(in);
+
+	/* get the hash result */
+	hashsum = crc32_sz(VARDATA_ANY(in), len);
+
+	/* format the hex string */
+	snprintf(result, sizeof(result), "%08x", hashsum);
+
+	/* convert to text and return it */
+	PG_RETURN_TEXT_P(cstring_to_text(result));
+}
diff --git a/src/backend/utils/adt/meson.build b/src/backend/utils/adt/meson.build
index 8c6fc80c37..6239ea7b42 100644
--- a/src/backend/utils/adt/meson.build
+++ b/src/backend/utils/adt/meson.build
@@ -31,6 +31,7 @@ backend_sources += files(
   'geo_ops.c',
   'geo_selfuncs.c',
   'geo_spgist.c',
+  'hashfuncs.c',
   'hbafuncs.c',
   'inet_cidr_ntop.c',
   'inet_net_pton.c',
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 73d9cf8582..0f79998bba 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -7704,6 +7704,14 @@
   proname => 'system', provolatile => 'v', prorettype => 'tsm_handler',
   proargtypes => 'internal', prosrc => 'tsm_system_handler' },
 
+# non-cryptographic
+{ oid => '8571', descr => 'CRC32 hash',
+  proname => 'crc32', proleakproof => 't', prorettype => 'text',
+  proargtypes => 'text', prosrc => 'crc32_text' },
+{ oid => '8572', descr => 'CRC32 hash',
+  proname => 'crc32', proleakproof => 't', prorettype => 'text',
+  proargtypes => 'bytea', prosrc => 'crc32_bytea' },
+
 # cryptographic
 { oid => '2311', descr => 'MD5 hash',
   proname => 'md5', proleakproof => 't', prorettype => 'text',
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 9d047b21b8..e170a9bb1b 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -874,6 +874,8 @@ xid8ne(xid8,xid8)
 xid8cmp(xid8,xid8)
 uuid_extract_timestamp(uuid)
 uuid_extract_version(uuid)
+crc32(text)
+crc32(bytea)
 -- restore normal output mode
 \a\t
 -- List of functions used by libpq's fe-lobj.c
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 52b69a107f..de77070038 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -2203,6 +2203,33 @@ select to_hex(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS "ffffffff"
  ffffffff
 (1 row)
 
+--
+-- CRC32
+--
+SELECT crc32('');
+  crc32   
+----------
+ 00000000
+(1 row)
+
+SELECT crc32('The quick brown fox jumps over the lazy dog.');
+  crc32   
+----------
+ 519025e9
+(1 row)
+
+SELECT crc32('' :: bytea);
+  crc32   
+----------
+ 00000000
+(1 row)
+
+SELECT crc32('The quick brown fox jumps over the lazy dog.' :: bytea);
+  crc32   
+----------
+ 519025e9
+(1 row)
+
 --
 -- SHA-2
 --
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 3959678992..93149d170a 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -702,6 +702,16 @@ select to_hex(-1234::bigint) AS "fffffffffffffb2e";
 select to_hex(256*256*256 - 1) AS "ffffff";
 select to_hex(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS "ffffffff";
 
+--
+-- CRC32
+--
+
+SELECT crc32('');
+SELECT crc32('The quick brown fox jumps over the lazy dog.');
+
+SELECT crc32('' :: bytea);
+SELECT crc32('The quick brown fox jumps over the lazy dog.' :: bytea);
+
 --
 -- SHA-2
 --
-- 
2.45.2

