On Tue, Jan 30, 2024 at 4:13 AM Ants Aasma <ants.aa...@cybertec.at> wrote:
>
> But given that we know the data length and we have it in a register
> already, it's easy enough to just mask out data past the end with a
> shift. See patch 1. Performance benefit is about 1.5x Measured on a
> small test harness that just hashes and finalizes an array of strings,
> with a data dependency between consecutive hashes (next address
> depends on the previous hash output).

I pushed this with a couple cosmetic adjustments, after fixing the
endianness issue. I'm not sure why valgrind is fine with this way, and
the other ways I tried forming the (little-endian) mask raised errors.
In addition to "zero_byte_low | (zero_byte_low - 1)", I tried
"~zero_byte_low & (zero_byte_low - 1)" and "zero_byte_low ^
(zero_byte_low - 1)" to no avail.

On Thu, Mar 28, 2024 at 12:37 PM Jeff Davis <pg...@j-davis.com> wrote:
> 0001 looks good to me, thank you.
>
> > v21-0003 adds a new file hashfn_unstable.c for convenience functions
> > and converts all the duplicate frontend uses of hash_string_pointer.
>
> Why not make hash_string() inline, too? I'm fine with it either way,
> I'm just curious why you went to the trouble to create a new .c file so
> it didn't have to be inlined.

Thanks for looking! I pushed these, with hash_string() inlined.

I've attached (not reindented for clarity) an update of something
mentioned a few times already -- removing strlen calls for dynahash
and dshash string keys. I'm not quite sure how the comments should be
updated about string_hash being deprecated to call directly. This
patch goes further and semi-deprecates calling it at all, so these
comments seem a bit awkward now.
From 2e41e683b2fe2bc76b808e58ca2fea9067bff4e1 Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Fri, 5 Apr 2024 13:59:13 +0700
Subject: [PATCH v21] Use fasthash for string keys in dynahash and dshash

This avoids strlen calls. string_hash is kept around in case
extensions are using it.
---
 src/backend/lib/dshash.c             |  5 +++--
 src/backend/utils/hash/dynahash.c    | 25 ++++++++++++++++-----
 src/common/hashfn.c                  |  4 +++-
 src/include/common/hashfn_unstable.h | 33 ++++++++++++++++++++++++++++
 src/include/lib/dshash.h             |  2 +-
 5 files changed, 59 insertions(+), 10 deletions(-)

diff --git a/src/backend/lib/dshash.c b/src/backend/lib/dshash.c
index 93a9e21ddd..8bebf92cb8 100644
--- a/src/backend/lib/dshash.c
+++ b/src/backend/lib/dshash.c
@@ -32,6 +32,7 @@
 #include "postgres.h"
 
 #include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
 #include "lib/dshash.h"
 #include "storage/lwlock.h"
 #include "utils/dsa.h"
@@ -605,14 +606,14 @@ dshash_strcmp(const void *a, const void *b, size_t size, void *arg)
 }
 
 /*
- * A hash function that forwards to string_hash.
+ * A hash function that forwards to hash_string_with_len.
  */
 dshash_hash
 dshash_strhash(const void *v, size_t size, void *arg)
 {
 	Assert(strlen((const char *) v) < size);
 
-	return string_hash((const char *) v, size);
+	return hash_string_with_len((const char *) v, size - 1);
 }
 
 /*
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index 145e058fe6..9b85af2743 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -98,6 +98,7 @@
 
 #include "access/xact.h"
 #include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
 #include "port/pg_bitutils.h"
 #include "storage/shmem.h"
 #include "storage/spin.h"
@@ -309,6 +310,18 @@ string_compare(const char *key1, const char *key2, Size keysize)
 	return strncmp(key1, key2, keysize - 1);
 }
 
+/*
+ * hash function used when HASH_STRINGS is set
+ *
+ * If the string exceeds keysize-1 bytes, we want to hash only that many,
+ * because when it is copied into the hash table it will be truncated at
+ * that length.
+ */
+static uint32
+default_string_hash(const void *key, Size keysize)
+{
+	return hash_string_with_len((const char *) key, keysize - 1);
+}
 
 /************************** CREATE ROUTINES **********************/
 
@@ -420,8 +433,8 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
 	else
 	{
 		/*
-		 * string_hash used to be considered the default hash method, and in a
-		 * non-assert build it effectively still is.  But we now consider it
+		 * string_hash used to be considered the default hash method, and
+		 * it effectively still was until version 17.  Since version 14 we consider it
 		 * an assertion error to not say HASH_STRINGS explicitly.  To help
 		 * catch mistaken usage of HASH_STRINGS, we also insist on a
 		 * reasonably long string length: if the keysize is only 4 or 8 bytes,
@@ -430,12 +443,12 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
 		Assert(flags & HASH_STRINGS);
 		Assert(info->keysize > 8);
 
-		hashp->hash = string_hash;
+		hashp->hash = default_string_hash;
 	}
 
 	/*
 	 * If you don't specify a match function, it defaults to string_compare if
-	 * you used string_hash, and to memcmp otherwise.
+	 * you used default_string_hash, and to memcmp otherwise.
 	 *
 	 * Note: explicitly specifying string_hash is deprecated, because this
 	 * might not work for callers in loadable modules on some platforms due to
@@ -444,7 +457,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
 	 */
 	if (flags & HASH_COMPARE)
 		hashp->match = info->match;
-	else if (hashp->hash == string_hash)
+	else if (hashp->hash == default_string_hash)
 		hashp->match = (HashCompareFunc) string_compare;
 	else
 		hashp->match = memcmp;
@@ -454,7 +467,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
 	 */
 	if (flags & HASH_KEYCOPY)
 		hashp->keycopy = info->keycopy;
-	else if (hashp->hash == string_hash)
+	else if (hashp->hash == default_string_hash)
 	{
 		/*
 		 * The signature of keycopy is meant for memcpy(), which returns
diff --git a/src/common/hashfn.c b/src/common/hashfn.c
index 4db468cf85..5cfa4815a1 100644
--- a/src/common/hashfn.c
+++ b/src/common/hashfn.c
@@ -654,7 +654,9 @@ hash_bytes_uint32_extended(uint32 k, uint64 seed)
 /*
  * string_hash: hash function for keys that are NUL-terminated strings.
  *
- * NOTE: this is the default hash function if none is specified.
+ * NOTE: this was the default string hash for dynahash until version 17,
+ * and is now here only for backward compatibility. It's more efficient
+ * to use hash_string_with_len instead.
  */
 uint32
 string_hash(const void *key, Size keysize)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 7b647470ab..38abf690a0 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -450,4 +450,37 @@ hash_string(const char *s)
 	return fasthash_final32(&hs, s_len);
 }
 
+/*
+ * hash_string_with_len: Like hash_string but only hashes
+ * 'len' bytes. Callers will likely want to 'len' to be
+ * one less than the space they have in mind, to leave
+ * room for the NUL terminator.
+ */
+static inline uint32
+hash_string_with_len(const char *s, Size len)
+{
+	fasthash_state hs;
+	size_t		s_len = 0;
+
+	fasthash_init(&hs, 0);
+
+	while (*s && s_len < len)
+	{
+		int			chunk_len = 0;
+
+		while (s[chunk_len] != '\0' &&
+			   s_len < len &&
+			   chunk_len < FH_SIZEOF_ACCUM)
+		{
+			chunk_len++;
+			s_len++;
+		}
+
+		fasthash_accum(&hs, s, chunk_len);
+		s += chunk_len;
+	}
+
+	return fasthash_final32(&hs, s_len);
+}
+
 #endif							/* HASHFN_UNSTABLE_H */
diff --git a/src/include/lib/dshash.h b/src/include/lib/dshash.h
index 7dda269b75..b72903c02e 100644
--- a/src/include/lib/dshash.h
+++ b/src/include/lib/dshash.h
@@ -117,7 +117,7 @@ extern dshash_hash dshash_memhash(const void *v, size_t size, void *arg);
 extern void dshash_memcpy(void *dest, const void *src, size_t size, void *arg);
 
 /*
- * Convenience hash, compare, and copy functions wrapping strcmp, string_hash,
+ * Convenience hash, compare, and copy functions wrapping strcmp, hash_string_with_len,
  * and strcpy.
  */
 extern int	dshash_strcmp(const void *a, const void *b, size_t size, void *arg);
-- 
2.44.0

Reply via email to