On Wed, Dec 20, 2023 at 3:23 AM Jeff Davis <pg...@j-davis.com> wrote:
>
> On Tue, 2023-12-19 at 16:23 +0700, John Naylor wrote:
> > That wasn't the next place I thought to look (that would be the
> > strcmp
> > call), but something like this could be worthwhile.
>
> The reason I looked here is that the inner while statement (to find the
> chunk size) looked out of place and possibly slow, and there's a
> bitwise trick we can use instead.

There are other bit tricks we can use. In v11-0005 Just for fun, I
translated a couple more into C from

https://github.com/openbsd/src/blob/master/lib/libc/arch/amd64/string/strlen.S
From 9999aecbc51780ada4634855727c50e3b85a8f7f Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Sat, 9 Dec 2023 16:24:56 +0700
Subject: [PATCH v11 3/5] Use fasthash32 for pgstat_hash_hash_key

Currently this calls the 32-bit Murmur finalizer on the three elements,
then joined with hash_combine().  This is simpler and has better
collision guarantees.

WIP: Make sure performance is at least comparable.

WIP: We may not need the full 32-bit finalizer reducing step.
It would be slightly cheaper to just use fasthash64 and then take
the lower 32 bits.

Discussion: (none yet, buried in a related patchset)
---
 src/include/utils/pgstat_internal.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h
index 60fbf9394b..ecc46bef04 100644
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -14,7 +14,7 @@
 #define PGSTAT_INTERNAL_H
 
 
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
 #include "lib/dshash.h"
 #include "lib/ilist.h"
 #include "pgstat.h"
@@ -777,15 +777,10 @@ static inline uint32
 pgstat_hash_hash_key(const void *d, size_t size, void *arg)
 {
 	const PgStat_HashKey *key = (PgStat_HashKey *) d;
-	uint32		hash;
 
 	Assert(size == sizeof(PgStat_HashKey) && arg == NULL);
 
-	hash = murmurhash32(key->kind);
-	hash = hash_combine(hash, murmurhash32(key->dboid));
-	hash = hash_combine(hash, murmurhash32(key->objoid));
-
-	return hash;
+	return fasthash32((const char *) key, size, 0);
 }
 
 /*
-- 
2.43.0

From c7bd727b24a8935343df6fb24d10948fa6d4d57c Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Mon, 18 Dec 2023 11:10:28 +0700
Subject: [PATCH v11 2/5] Use fasthash for the search path cache

This serves to demonstrate the incremental API, allowing inlined
hash calculation without a strlen call. This brings the general case
performance closer to the optimization done in commit a86c61c9ee.

WIP: roleid should be mixed in normally, unless we have
reason to just use it as a seed.

Jeff Davis, with switch to chunked interface by me

Discussion: https://www.postgresql.org/message-id/b40292c99e623defe5eadedab1d438cf51a4107c.camel%40j-davis.com
---
 src/backend/catalog/namespace.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index 5027efc91d..7fe2fd1fd4 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -41,7 +41,7 @@
 #include "catalog/pg_ts_template.h"
 #include "catalog/pg_type.h"
 #include "commands/dbcommands.h"
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
 #include "funcapi.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
@@ -247,11 +247,25 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
 static inline uint32
 spcachekey_hash(SearchPathCacheKey key)
 {
-	const unsigned char *bytes = (const unsigned char *) key.searchPath;
-	int			blen = strlen(key.searchPath);
+	const char *const start = key.searchPath;
+	const char *buf = key.searchPath;
+	fasthash_state hs;
 
-	return hash_combine(hash_bytes(bytes, blen),
-						hash_uint32(key.roleid));
+	/* WIP: maybe roleid should be mixed in normally */
+	fasthash_init(&hs, FH_UNKNOWN_LENGTH, key.roleid);
+	while (*buf)
+	{
+		int			chunk_len = 0;
+
+		while (chunk_len < FH_SIZEOF_ACCUM && buf[chunk_len] != '\0')
+			chunk_len++;
+
+		fasthash_accum(&hs, buf, chunk_len);
+		buf += chunk_len;
+	}
+
+	/* pass the length to tweak the final mix */
+	return fasthash_final32(&hs, buf - start);
 }
 
 static inline bool
-- 
2.43.0

From ec447cc9a9718421883d9619e9dde1b5df3ada9c Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Wed, 20 Dec 2023 13:08:46 +0700
Subject: [PATCH v11 5/5] Optimize tail with inspiration from OpenBSD

This only works on little endian, so add guard for that and
for 64-bit. Word-at-a-time NUL checks are not worth the
extra complexity for 32-bit platforms. There is an algorithm
that works for big-endian, but this is all just demonstration
anyway.
---
 src/backend/catalog/namespace.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index cb840ce9dd..2046d6788d 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -253,20 +253,36 @@ cstring_hash_aligned(const char *str, uint64 seed)
 {
 	const char *const start = str;
 	const char *buf = start;
+	// todo: this is now really "remainder"
 	int chunk_len = 0;
+	uint64 zero_bytes, chunk;
 	fasthash_state hs;
 
 	fasthash_init(&hs, FH_UNKNOWN_LENGTH, seed);
 
+	// WIP: if this is the common case, we could have an "unlikely" bytewise preamble
 	Assert(PointerIsAligned(start, uint64));
-	while (!haszero64(*(uint64 *)buf))
+	while (true)
 	{
+		chunk = *(uint64 *)buf;
+		zero_bytes = (chunk - 0x0101010101010101UL) & 0x8080808080808080UL;
+
+		// WIP: this is from OpenBSD strlen -- the extra branch is probably not worth it for short strings
+		if (zero_bytes)
+		{
+			// only needed if the input can have the high bit set
+			zero_bytes &= ~chunk;
+			if (zero_bytes)
+				break;
+		}
+		// WIP: since we have the chunk already, maybe just combine it directly?
 		fasthash_accum64(&hs, buf);
 		buf += sizeof(uint64);
 	}
 
-	while (buf[chunk_len] != '\0')
-		chunk_len++;
+	// XXX this only works for little endian machines. See
+	// https://github.com/openbsd/src/blob/master/lib/libc/arch/amd64/string/strlen.S
+	chunk_len = (63 - pg_leftmost_one_pos64(zero_bytes)) / BITS_PER_BYTE;
 	fasthash_accum(&hs, buf, chunk_len);
 	buf += chunk_len;
 
@@ -300,9 +316,11 @@ cstring_hash_unaligned(const char *str, uint64 seed)
 static inline uint32
 spcachekey_hash(SearchPathCacheKey key)
 {
+#if ((SIZEOF_VOIDP == 8) && !defined(WORDS_BIGENDIAN))
 	if (PointerIsAligned(key.searchPath, uint64))
 		return cstring_hash_aligned(key.searchPath, key.roleid);
 	else
+#endif
 		return cstring_hash_unaligned(key.searchPath, key.roleid);
 }
 
-- 
2.43.0

From a990c20cab3c293a514b0c5120dfb83a3258e666 Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Mon, 27 Nov 2023 17:03:38 +0700
Subject: [PATCH v11 1/5] Add inlineable, incremental hash functions for
 in-memory use

A number of places hash NUL-termminated strings. Currently, we need
to call strlen first because hash_bytes needs the length. For short
strings the C library call has a large overhead, and strlen calls
show up prominently in profiles.

Per suggestion from Andres Freund, add hash functions with an
incremental interface. Instead of trying to whack around hash_bytes
while maintaining its current behavior on all platforms, we base
this work on fasthash (MIT licensed) which is simple, faster than
hash_bytes for inputs over 12 bytes long, and also passes the hash
function testing suite SMHasher.

The original functions have been reimplemented using our new
incremental interface to validate that this method will still give the
same answer, provided we have the input length ahead of time. Future
work will use these for some existing uses of simplehash and dynahash.

The new functionality lives in a new header hashfn_unstable.h. The
name implies we have the freedom to change things across versions that
would be unacceptable for our other hash functions that are used for
e.g. hash indexes and hash partitioning. As such, these should only
be used for in-memory data structures like hash tables. There is also
no guarantee of being endian-independent.

Reviewed (in an earlier version) by Heikki Linnakangas

Discussion: https://www.postgresql.org/message-id/20231122223432.lywt4yz2bn7tlp27%40awork3.anarazel.de
---
 src/include/common/hashfn_unstable.h | 213 +++++++++++++++++++++++++++
 src/tools/pgindent/typedefs.list     |   1 +
 2 files changed, 214 insertions(+)
 create mode 100644 src/include/common/hashfn_unstable.h

diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
new file mode 100644
index 0000000000..bf1dbee28d
--- /dev/null
+++ b/src/include/common/hashfn_unstable.h
@@ -0,0 +1,213 @@
+/*
+Building blocks for creating fast inlineable hash functions. The
+unstable designation is in contrast to hashfn.h, which cannot break
+compatibility because hashes can be writen to disk and so must produce
+the same hashes between versions.
+
+ *
+ * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group
+ *
+ * src/include/common/hashfn_unstable.c
+ */
+
+#ifndef HASHFN_UNSTABLE_H
+#define HASHFN_UNSTABLE_H
+
+/*
+ * fasthash is a modification of code taken from
+ * https://code.google.com/archive/p/fast-hash/source/default/source
+ * under the terms of the MIT licencse. The original copyright
+ * notice follows:
+ */
+
+/* The MIT License
+
+   Copyright (C) 2012 Zilong Tan (eric.zl...@gmail.com)
+
+   Permission is hereby granted, free of charge, to any person
+   obtaining a copy of this software and associated documentation
+   files (the "Software"), to deal in the Software without
+   restriction, including without limitation the rights to use, copy,
+   modify, merge, publish, distribute, sublicense, and/or sell copies
+   of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/*
+ * There are two interfaces available. Examples assume a 32-bit hash:
+ *
+ * 1) When the length is known ahead of time, use fasthash32().
+ * 2) When the length is not known, use the incremental interface. To
+ *    ensure good results, keep track of the length and pass it to the finalizer:
+
+fasthash_state hs;
+fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
+fasthash_accum(&hs, <pointer to a chunk of the input>, <chunk length, up to 8>);
+return fasthash_final32(&hs, <final length>);
+
+*/
+
+
+typedef struct fasthash_state
+{
+	uint64		accum;
+#define FH_SIZEOF_ACCUM sizeof(uint64)
+
+	uint64		hash;
+} fasthash_state;
+
+
+#define FH_UNKNOWN_LENGTH 1
+
+/*
+ * Initialize the hash state.
+ *
+ * "len" is the length of the input, if known ahead of time.
+ * If that is not known, pass FH_UNKNOWN_LENGTH.
+ * "seed" can be zero.
+ */
+static inline void
+fasthash_init(fasthash_state *hs, int len, uint64 seed)
+{
+	memset(hs, 0, sizeof(fasthash_state));
+	hs->hash = seed ^ (len * 0x880355f21e6d1965);
+}
+
+/* Both the finalizer and part of the combining step */
+static inline uint64
+fasthash_mix(uint64 h, uint64 tweak)
+{
+	h ^= (h >> 23) + tweak;
+	h *= 0x2127599bf4325c37;
+	h ^= h >> 47;
+	return h;
+}
+
+static inline void
+fasthash_combine(fasthash_state *hs)
+{
+	hs->hash ^= fasthash_mix(hs->accum, 0);
+	hs->hash *= 0x880355f21e6d1965;
+
+	/* reset hash state for next input */
+	hs->accum = 0;
+}
+
+/* Accumulate up to 8 bytes of input and combine it into the hash */
+static inline void
+fasthash_accum(fasthash_state *hs, const char *k, int len)
+{
+	Assert(hs->accum == 0);
+	Assert(len <= FH_SIZEOF_ACCUM);
+
+	switch (len)
+	{
+		case 8:
+			memcpy(&hs->accum, k, 8);
+			break;
+		case 7:
+			hs->accum |= (uint64) k[6] << 48;
+			/* FALLTHROUGH */
+		case 6:
+			hs->accum |= (uint64) k[5] << 40;
+			/* FALLTHROUGH */
+		case 5:
+			hs->accum |= (uint64) k[4] << 32;
+			/* FALLTHROUGH */
+		case 4:
+			hs->accum |= (uint64) k[3] << 24;
+			/* FALLTHROUGH */
+		case 3:
+			hs->accum |= (uint64) k[2] << 16;
+			/* FALLTHROUGH */
+		case 2:
+			hs->accum |= (uint64) k[1] << 8;
+			/* FALLTHROUGH */
+		case 1:
+			hs->accum |= (uint64) k[0];
+			break;
+		case 0:
+			return;
+	}
+
+	fasthash_combine(hs);
+}
+
+/*
+ * The finalizer
+ *
+ * "tweak" is the input length when the caller doesn't know
+ * the length ahead of time, such as for NUL-terminated
+ * strings, otherwise zero.
+ */
+static inline uint64
+fasthash_final64(fasthash_state *hs, uint64 tweak)
+{
+	return fasthash_mix(hs->hash, tweak);
+}
+
+/*
+ * Reduce a 64-bit hash to a 32-bit hash.
+ *
+ * This provides a bit more additional mixing compared to
+ * just taking the lower 32-bits.
+ */
+static inline uint32
+fasthash_reduce32(uint64 h)
+{
+	/*
+	 * The following trick converts the 64-bit hashcode to Fermat residue,
+	 * which shall retain information from both the higher and lower parts of
+	 * hashcode.
+	 */
+	return h - (h >> 32);
+}
+
+static inline uint32
+fasthash_final32(fasthash_state *hs, uint64 tweak)
+{
+	return fasthash_reduce32(fasthash_final64(hs, tweak));
+}
+
+/*
+ * The original fasthash64 function, re-implemented using
+ * the incremental interface.
+ */
+static inline uint64
+fasthash64(const char *k, int len, uint64 seed)
+{
+	fasthash_state hs;
+
+	fasthash_init(&hs, len, seed);
+
+	while (len >= FH_SIZEOF_ACCUM)
+	{
+		fasthash_accum(&hs, k, FH_SIZEOF_ACCUM);
+		k += FH_SIZEOF_ACCUM;
+		len -= FH_SIZEOF_ACCUM;
+	}
+
+	fasthash_accum(&hs, k, len);
+	return fasthash_final64(&hs, 0);
+}
+
+/* Like fasthash64, but returns a 32-bit hash */
+static inline uint64
+fasthash32(const char *k, int len, uint64 seed)
+{
+	return fasthash_reduce32(fasthash64(k, len, seed));
+}
+
+#endif							/* HASHFN_UNSTABLE_H */
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index d659adbfd6..4038d07458 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3321,6 +3321,7 @@ exec_thread_arg
 execution_state
 explain_get_index_name_hook_type
 f_smgr
+fasthash_state
 fd_set
 fe_scram_state
 fe_scram_state_enum
-- 
2.43.0

From d5ec732a544ad6f6de0a42622d1656003b3dc351 Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Wed, 20 Dec 2023 11:40:11 +0700
Subject: [PATCH v11 4/5] Jeff Davis v10jd-0004

---
 src/backend/catalog/namespace.c      | 46 +++++++++++++++++++++++++---
 src/include/common/hashfn_unstable.h |  9 ++++++
 2 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index 7fe2fd1fd4..cb840ce9dd 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -244,15 +244,44 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
  * to initialize a key, and also offers a more convenient API.
  */
 
+/* From: https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord */
+#define haszero64(v) \
+	(((v) - 0x0101010101010101UL) & ~(v) & 0x8080808080808080UL)
+
 static inline uint32
-spcachekey_hash(SearchPathCacheKey key)
+cstring_hash_aligned(const char *str, uint64 seed)
+{
+	const char *const start = str;
+	const char *buf = start;
+	int chunk_len = 0;
+	fasthash_state hs;
+
+	fasthash_init(&hs, FH_UNKNOWN_LENGTH, seed);
+
+	Assert(PointerIsAligned(start, uint64));
+	while (!haszero64(*(uint64 *)buf))
+	{
+		fasthash_accum64(&hs, buf);
+		buf += sizeof(uint64);
+	}
+
+	while (buf[chunk_len] != '\0')
+		chunk_len++;
+	fasthash_accum(&hs, buf, chunk_len);
+	buf += chunk_len;
+
+	return fasthash_final32(&hs, buf - start);
+}
+
+static inline uint32
+cstring_hash_unaligned(const char *str, uint64 seed)
 {
-	const char *const start = key.searchPath;
-	const char *buf = key.searchPath;
+	const char *const start = str;
+	const char *buf = str;
 	fasthash_state hs;
 
 	/* WIP: maybe roleid should be mixed in normally */
-	fasthash_init(&hs, FH_UNKNOWN_LENGTH, key.roleid);
+	fasthash_init(&hs, FH_UNKNOWN_LENGTH, seed);
 	while (*buf)
 	{
 		int			chunk_len = 0;
@@ -268,6 +297,15 @@ spcachekey_hash(SearchPathCacheKey key)
 	return fasthash_final32(&hs, buf - start);
 }
 
+static inline uint32
+spcachekey_hash(SearchPathCacheKey key)
+{
+	if (PointerIsAligned(key.searchPath, uint64))
+		return cstring_hash_aligned(key.searchPath, key.roleid);
+	else
+		return cstring_hash_unaligned(key.searchPath, key.roleid);
+}
+
 static inline bool
 spcachekey_equal(SearchPathCacheKey a, SearchPathCacheKey b)
 {
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index bf1dbee28d..553fab0415 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -105,6 +105,15 @@ fasthash_combine(fasthash_state *hs)
 	hs->accum = 0;
 }
 
+/* Accumulate 8 bytes from aligned pointer and combine it into the hash */
+static inline void
+fasthash_accum64(fasthash_state *hs, const char *ptr)
+{
+	Assert(PointerIsAligned(ptr, uint64));
+	hs->accum = *(uint64 *)ptr;
+	fasthash_combine(hs);
+}
+
 /* Accumulate up to 8 bytes of input and combine it into the hash */
 static inline void
 fasthash_accum(fasthash_state *hs, const char *k, int len)
-- 
2.43.0

Reply via email to