From ef0e928dbc408f96cc0d23c32095fb102222562e Mon Sep 17 00:00:00 2001
From: Aleksander Alekseev <aleksander@timescale.com>
Date: Thu, 18 Jul 2024 12:59:40 +0300
Subject: [PATCH v2] Add crc32(bytea) & crc32c(bytea)

Per user requests.

Aleksander Alekseev, reviewed by Nathan Bossart
Discussion: https://postgr.es/m/CAJ7c6TNMTGnqnG=yXXUQh9E88JDckmR45H2Q+=ucaCLMOW1QQw@mail.gmail.com
---
 doc/src/sgml/func.sgml                   |  34 ++++++++
 src/backend/utils/adt/Makefile           |   1 +
 src/backend/utils/adt/hashfuncs.c        | 100 +++++++++++++++++++++++
 src/backend/utils/adt/meson.build        |   1 +
 src/include/catalog/pg_proc.dat          |   8 ++
 src/test/regress/expected/opr_sanity.out |   2 +
 src/test/regress/expected/strings.out    |  27 ++++++
 src/test/regress/sql/strings.sql         |   9 ++
 8 files changed, 182 insertions(+)
 create mode 100644 src/backend/utils/adt/hashfuncs.c

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index b39f97dc8d..a327674f65 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -4490,6 +4490,40 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
        </para></entry>
       </row>
 
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>crc32</primary>
+        </indexterm>
+        <function>crc32</function> ( <type>bytea</type> )
+        <returnvalue>bytea</returnvalue>
+       </para>
+       <para>
+        Computes the CRC32 value of the binary string.
+       </para>
+       <para>
+        <literal>crc32('PostgreSQL'::bytea)</literal>
+        <returnvalue>\xcb97b83b</returnvalue>
+       </para></entry>
+      </row>
+
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>crc32c</primary>
+        </indexterm>
+        <function>crc32c</function> ( <type>bytea</type> )
+        <returnvalue>bytea</returnvalue>
+       </para>
+       <para>
+        Computes the CRC32C value of the binary string.
+       </para>
+       <para>
+        <literal>crc32c('PostgreSQL'::bytea)</literal>
+        <returnvalue>\xa7c16abd</returnvalue>
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile
index edb09d4e35..be84d68856 100644
--- a/src/backend/utils/adt/Makefile
+++ b/src/backend/utils/adt/Makefile
@@ -42,6 +42,7 @@ OBJS = \
 	geo_ops.o \
 	geo_selfuncs.o \
 	geo_spgist.o \
+	hashfuncs.o \
 	hbafuncs.o \
 	inet_cidr_ntop.o \
 	inet_net_pton.o \
diff --git a/src/backend/utils/adt/hashfuncs.c b/src/backend/utils/adt/hashfuncs.c
new file mode 100644
index 0000000000..7554d04ca4
--- /dev/null
+++ b/src/backend/utils/adt/hashfuncs.c
@@ -0,0 +1,100 @@
+/*-------------------------------------------------------------------------
+ *
+ * hashfuncs.c
+ *	  Non-cryptographic hash functions
+ *
+ * Portions Copyright (c) 2024, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/adt/hashfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "port/pg_crc32c.h"
+#include "utils/builtins.h"
+#include "utils/pg_crc.h"
+#include "varatt.h"
+
+/*
+ * Calculate CRC32 of the given data.
+ */
+static inline pg_crc32
+crc32_sz(const char *buf, int size)
+{
+	pg_crc32	crc;
+	const char *p = buf;
+
+	INIT_TRADITIONAL_CRC32(crc);
+	while (size > 0)
+	{
+		char		c = (char) (*p);
+
+		COMP_TRADITIONAL_CRC32(crc, &c, 1);
+		size--;
+		p++;
+	}
+	FIN_TRADITIONAL_CRC32(crc);
+	return crc;
+}
+
+/*
+ * Convert crc value to bytea.
+ *
+ * Common code for crc32_bytea() and crc32c_bytea().
+ */
+static inline bytea *
+crc_to_bytea(uint32 crc)
+{
+	bytea	   *result;
+
+	result = palloc0(sizeof(uint32) + VARHDRSZ);
+	*(uint32 *) VARDATA(result) = pg_hton32(crc);
+	SET_VARSIZE(result, sizeof(uint32) + VARHDRSZ);
+
+	return result;
+}
+
+/*
+ * Calculate CRC32 of a bytea value and return it as bytea.
+ */
+Datum
+crc32_bytea(PG_FUNCTION_ARGS)
+{
+	bytea	   *in = PG_GETARG_BYTEA_PP(0);
+	size_t		len;
+	pg_crc32	crc;
+
+	/* calculate the length of the buffer using varlena metadata */
+	len = VARSIZE_ANY_EXHDR(in);
+
+	/* get the crc value */
+	crc = crc32_sz(VARDATA_ANY(in), len);
+
+	/* return crc value as bytea */
+	PG_RETURN_BYTEA_P(crc_to_bytea((uint32) crc));
+}
+
+/*
+ * Calculate CRC32C of a bytea value and return it as bytea.
+ */
+Datum
+crc32c_bytea(PG_FUNCTION_ARGS)
+{
+	bytea	   *in = PG_GETARG_BYTEA_PP(0);
+	size_t		len;
+	pg_crc32c	crc;
+
+	/* calculate the length of the buffer using varlena metadata */
+	len = VARSIZE_ANY_EXHDR(in);
+
+	/* get the crc value */
+	INIT_CRC32C(crc);
+	COMP_CRC32C(crc, VARDATA_ANY(in), len);
+	FIN_CRC32C(crc);
+
+	/* return crc value as bytea */
+	PG_RETURN_BYTEA_P(crc_to_bytea((uint32) crc));
+}
diff --git a/src/backend/utils/adt/meson.build b/src/backend/utils/adt/meson.build
index 8c6fc80c37..6239ea7b42 100644
--- a/src/backend/utils/adt/meson.build
+++ b/src/backend/utils/adt/meson.build
@@ -31,6 +31,7 @@ backend_sources += files(
   'geo_ops.c',
   'geo_selfuncs.c',
   'geo_spgist.c',
+  'hashfuncs.c',
   'hbafuncs.c',
   'inet_cidr_ntop.c',
   'inet_net_pton.c',
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 06b2f4ba66..1349a696ce 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -7731,6 +7731,14 @@
   proname => 'system', provolatile => 'v', prorettype => 'tsm_handler',
   proargtypes => 'internal', prosrc => 'tsm_system_handler' },
 
+# non-cryptographic
+{ oid => '8571', descr => 'CRC32 value',
+  proname => 'crc32', proleakproof => 't', prorettype => 'bytea',
+  proargtypes => 'bytea', prosrc => 'crc32_bytea' },
+{ oid => '8572', descr => 'CRC32C value',
+  proname => 'crc32c', proleakproof => 't', prorettype => 'bytea',
+  proargtypes => 'bytea', prosrc => 'crc32c_bytea' },
+
 # cryptographic
 { oid => '2311', descr => 'MD5 hash',
   proname => 'md5', proleakproof => 't', prorettype => 'text',
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 9d047b21b8..0d734169f1 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -874,6 +874,8 @@ xid8ne(xid8,xid8)
 xid8cmp(xid8,xid8)
 uuid_extract_timestamp(uuid)
 uuid_extract_version(uuid)
+crc32(bytea)
+crc32c(bytea)
 -- restore normal output mode
 \a\t
 -- List of functions used by libpq's fe-lobj.c
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 52b69a107f..074b0e11e2 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -2255,6 +2255,33 @@ SELECT sha512('The quick brown fox jumps over the lazy dog.');
  \x91ea1245f20d46ae9a037a989f54f1f790f0a47607eeb8a14d12890cea77a1bbc6c7ed9cf205e67b7f2b8fd4c7dfd3a7a8617e45f3c463d481c7e586c39ac1ed
 (1 row)
 
+--
+-- CRC32
+--
+SELECT crc32(''::bytea);
+   crc32    
+------------
+ \x00000000
+(1 row)
+
+SELECT crc32('The quick brown fox jumps over the lazy dog.'::bytea);
+   crc32    
+------------
+ \x519025e9
+(1 row)
+
+SELECT crc32c(''::bytea);
+   crc32c   
+------------
+ \x00000000
+(1 row)
+
+SELECT crc32c('The quick brown fox jumps over the lazy dog.'::bytea);
+   crc32c   
+------------
+ \x190097b3
+(1 row)
+
 --
 -- encode/decode
 --
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 3959678992..f8cd621d9a 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -719,6 +719,15 @@ SELECT sha384('The quick brown fox jumps over the lazy dog.');
 SELECT sha512('');
 SELECT sha512('The quick brown fox jumps over the lazy dog.');
 
+--
+-- CRC32
+--
+SELECT crc32(''::bytea);
+SELECT crc32('The quick brown fox jumps over the lazy dog.'::bytea);
+
+SELECT crc32c(''::bytea);
+SELECT crc32c('The quick brown fox jumps over the lazy dog.'::bytea);
+
 --
 -- encode/decode
 --
-- 
2.45.2

