On Tue, Jan 30, 2024 at 4:13 AM Ants Aasma <ants.aa...@cybertec.at> wrote: > > But given that we know the data length and we have it in a register > already, it's easy enough to just mask out data past the end with a > shift. See patch 1. Performance benefit is about 1.5x Measured on a > small test harness that just hashes and finalizes an array of strings, > with a data dependency between consecutive hashes (next address > depends on the previous hash output).
I pushed this with a couple cosmetic adjustments, after fixing the endianness issue. I'm not sure why valgrind is fine with this way, and the other ways I tried forming the (little-endian) mask raised errors. In addition to "zero_byte_low | (zero_byte_low - 1)", I tried "~zero_byte_low & (zero_byte_low - 1)" and "zero_byte_low ^ (zero_byte_low - 1)" to no avail. On Thu, Mar 28, 2024 at 12:37 PM Jeff Davis <pg...@j-davis.com> wrote: > 0001 looks good to me, thank you. > > > v21-0003 adds a new file hashfn_unstable.c for convenience functions > > and converts all the duplicate frontend uses of hash_string_pointer. > > Why not make hash_string() inline, too? I'm fine with it either way, > I'm just curious why you went to the trouble to create a new .c file so > it didn't have to be inlined. Thanks for looking! I pushed these, with hash_string() inlined. I've attached (not reindented for clarity) an update of something mentioned a few times already -- removing strlen calls for dynahash and dshash string keys. I'm not quite sure how the comments should be updated about string_hash being deprecated to call directly. This patch goes further and semi-deprecates calling it at all, so these comments seem a bit awkward now.
From 2e41e683b2fe2bc76b808e58ca2fea9067bff4e1 Mon Sep 17 00:00:00 2001 From: John Naylor <john.nay...@postgresql.org> Date: Fri, 5 Apr 2024 13:59:13 +0700 Subject: [PATCH v21] Use fasthash for string keys in dynahash and dshash This avoids strlen calls. string_hash is kept around in case extensions are using it. --- src/backend/lib/dshash.c | 5 +++-- src/backend/utils/hash/dynahash.c | 25 ++++++++++++++++----- src/common/hashfn.c | 4 +++- src/include/common/hashfn_unstable.h | 33 ++++++++++++++++++++++++++++ src/include/lib/dshash.h | 2 +- 5 files changed, 59 insertions(+), 10 deletions(-) diff --git a/src/backend/lib/dshash.c b/src/backend/lib/dshash.c index 93a9e21ddd..8bebf92cb8 100644 --- a/src/backend/lib/dshash.c +++ b/src/backend/lib/dshash.c @@ -32,6 +32,7 @@ #include "postgres.h" #include "common/hashfn.h" +#include "common/hashfn_unstable.h" #include "lib/dshash.h" #include "storage/lwlock.h" #include "utils/dsa.h" @@ -605,14 +606,14 @@ dshash_strcmp(const void *a, const void *b, size_t size, void *arg) } /* - * A hash function that forwards to string_hash. + * A hash function that forwards to hash_string_with_len. */ dshash_hash dshash_strhash(const void *v, size_t size, void *arg) { Assert(strlen((const char *) v) < size); - return string_hash((const char *) v, size); + return hash_string_with_len((const char *) v, size - 1); } /* diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index 145e058fe6..9b85af2743 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -98,6 +98,7 @@ #include "access/xact.h" #include "common/hashfn.h" +#include "common/hashfn_unstable.h" #include "port/pg_bitutils.h" #include "storage/shmem.h" #include "storage/spin.h" @@ -309,6 +310,18 @@ string_compare(const char *key1, const char *key2, Size keysize) return strncmp(key1, key2, keysize - 1); } +/* + * hash function used when HASH_STRINGS is set + * + * If the string exceeds keysize-1 bytes, we want to hash only that many, + * because when it is copied into the hash table it will be truncated at + * that length. + */ +static uint32 +default_string_hash(const void *key, Size keysize) +{ + return hash_string_with_len((const char *) key, keysize - 1); +} /************************** CREATE ROUTINES **********************/ @@ -420,8 +433,8 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) else { /* - * string_hash used to be considered the default hash method, and in a - * non-assert build it effectively still is. But we now consider it + * string_hash used to be considered the default hash method, and + * it effectively still was until version 17. Since version 14 we consider it * an assertion error to not say HASH_STRINGS explicitly. To help * catch mistaken usage of HASH_STRINGS, we also insist on a * reasonably long string length: if the keysize is only 4 or 8 bytes, @@ -430,12 +443,12 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) Assert(flags & HASH_STRINGS); Assert(info->keysize > 8); - hashp->hash = string_hash; + hashp->hash = default_string_hash; } /* * If you don't specify a match function, it defaults to string_compare if - * you used string_hash, and to memcmp otherwise. + * you used default_string_hash, and to memcmp otherwise. * * Note: explicitly specifying string_hash is deprecated, because this * might not work for callers in loadable modules on some platforms due to @@ -444,7 +457,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) */ if (flags & HASH_COMPARE) hashp->match = info->match; - else if (hashp->hash == string_hash) + else if (hashp->hash == default_string_hash) hashp->match = (HashCompareFunc) string_compare; else hashp->match = memcmp; @@ -454,7 +467,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) */ if (flags & HASH_KEYCOPY) hashp->keycopy = info->keycopy; - else if (hashp->hash == string_hash) + else if (hashp->hash == default_string_hash) { /* * The signature of keycopy is meant for memcpy(), which returns diff --git a/src/common/hashfn.c b/src/common/hashfn.c index 4db468cf85..5cfa4815a1 100644 --- a/src/common/hashfn.c +++ b/src/common/hashfn.c @@ -654,7 +654,9 @@ hash_bytes_uint32_extended(uint32 k, uint64 seed) /* * string_hash: hash function for keys that are NUL-terminated strings. * - * NOTE: this is the default hash function if none is specified. + * NOTE: this was the default string hash for dynahash until version 17, + * and is now here only for backward compatibility. It's more efficient + * to use hash_string_with_len instead. */ uint32 string_hash(const void *key, Size keysize) diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h index 7b647470ab..38abf690a0 100644 --- a/src/include/common/hashfn_unstable.h +++ b/src/include/common/hashfn_unstable.h @@ -450,4 +450,37 @@ hash_string(const char *s) return fasthash_final32(&hs, s_len); } +/* + * hash_string_with_len: Like hash_string but only hashes + * 'len' bytes. Callers will likely want to 'len' to be + * one less than the space they have in mind, to leave + * room for the NUL terminator. + */ +static inline uint32 +hash_string_with_len(const char *s, Size len) +{ + fasthash_state hs; + size_t s_len = 0; + + fasthash_init(&hs, 0); + + while (*s && s_len < len) + { + int chunk_len = 0; + + while (s[chunk_len] != '\0' && + s_len < len && + chunk_len < FH_SIZEOF_ACCUM) + { + chunk_len++; + s_len++; + } + + fasthash_accum(&hs, s, chunk_len); + s += chunk_len; + } + + return fasthash_final32(&hs, s_len); +} + #endif /* HASHFN_UNSTABLE_H */ diff --git a/src/include/lib/dshash.h b/src/include/lib/dshash.h index 7dda269b75..b72903c02e 100644 --- a/src/include/lib/dshash.h +++ b/src/include/lib/dshash.h @@ -117,7 +117,7 @@ extern dshash_hash dshash_memhash(const void *v, size_t size, void *arg); extern void dshash_memcpy(void *dest, const void *src, size_t size, void *arg); /* - * Convenience hash, compare, and copy functions wrapping strcmp, string_hash, + * Convenience hash, compare, and copy functions wrapping strcmp, hash_string_with_len, * and strcpy. */ extern int dshash_strcmp(const void *a, const void *b, size_t size, void *arg); -- 2.44.0