I wrote:

> Thinking some more, I'm not quite comfortable with the number of
> places in these patches that have to know about the pre-downcased
> strings, or whether we need that in the first place. If lower case is
> common enough to optimize for, it seems the equality function can just
> check strict equality on the char and only on mismatch try downcasing
> before returning false. Doing our own function would allow the
> compiler to inline it, or at least keep it on the same page. Further,
> the old hash function shouldn't need to branch to do the same
> downcasing, since hashing is lossy anyway. In the keyword hashes, we
> just do "*ch |= 0x20", which downcases letters and turns undercores to
> DEL. I can take a stab at that later.

v4 is a quick POC for that. I haven't verified that it's correct for
the case of the probe and the entry don't match, but in case it
doesn't it should be easy to fix. I also didn't bother with
SH_STORE_HASH in my testing.

0001 adds the murmur32 finalizer -- we should do that regardless of
anything else in this thread.
0002 is just Jeff's 0001
0003 adds an equality function that downcases lazily, and teaches the
hash function about the 0x20 trick.

master:
latency average = 581.765 ms

v3 0001-0005:
latency average = 544.576 ms

v4 0001-0003:
latency average = 547.489 ms

This gives similar results with a tiny amount of code (excluding the
simplehash conversion). I didn't check if the compiler inlined these
functions, but we can hint it if necessary. We could use the new
equality function in all the call sites that currently test for
"guc_name_compare() == 0", in which case it might not end up inlined,
but that's probably okay.

We could also try to improve the hash function's collision behavior by
collecting the bytes on a uint64 and calling our new murmur64 before
returning the lower half, but that's speculative.
From 1a516bb341afb72680470897d75c1d23f75fb37e Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Wed, 22 Nov 2023 17:28:41 +0700
Subject: [PATCH v4 1/3] Add finalizer to guc_name_hash

---
 src/backend/utils/misc/guc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 82d8efbc96..e3834d52ee 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -33,6 +33,7 @@
 #include "catalog/objectaccess.h"
 #include "catalog/pg_authid.h"
 #include "catalog/pg_parameter_acl.h"
+#include "common/hashfn.h"
 #include "guc_internal.h"
 #include "libpq/pqformat.h"
 #include "parser/scansup.h"
@@ -1339,7 +1340,7 @@ guc_name_hash(const void *key, Size keysize)
 		result = pg_rotate_left32(result, 5);
 		result ^= (uint32) ch;
 	}
-	return result;
+	return murmurhash32(result);
 }
 
 /*
-- 
2.42.0

From 01b053b473d897c71725a7bb09a3127fc78140dc Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Wed, 22 Nov 2023 18:45:48 +0700
Subject: [PATCH v4 3/3] Optimize GUC functions for simple hash

Only downcase a character when an equality check fails,
since we expect most names to be lower case. Also simplify
downcasing in the hash function.
---
 src/backend/utils/misc/guc.c | 40 ++++++++++++++++++++++++++++++++----
 1 file changed, 36 insertions(+), 4 deletions(-)

diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index bf05b022c3..7896deb63b 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -229,6 +229,7 @@ static int	GUCNestLevel = 0;	/* 1 when in main transaction */
 
 
 static int	guc_var_compare(const void *a, const void *b);
+static bool guc_name_eq(const char *namea, const char *nameb);
 static uint32 guc_name_hash(const char *name);
 static void InitializeGUCOptionsFromEnvironment(void);
 static void InitializeOneGUCOption(struct config_generic *gconf);
@@ -271,7 +272,7 @@ static bool call_enum_check_hook(struct config_enum *conf, int *newval,
 #define SH_KEY_TYPE		const char *
 #define	SH_KEY			gucname
 #define SH_HASH_KEY(tb, key)   	guc_name_hash(key)
-#define SH_EQUAL(tb, a, b)		(guc_name_compare(a, b) == 0)
+#define SH_EQUAL(tb, a, b)		(guc_name_eq(a, b))
 #define	SH_SCOPE		static inline
 #define SH_DECLARE
 #define SH_DEFINE
@@ -1308,6 +1309,38 @@ guc_name_compare(const char *namea, const char *nameb)
 	return 0;
 }
 
+static bool
+guc_name_eq(const char *namea, const char *nameb)
+{
+	char		cha;
+	char		chb;
+
+	while (*namea && *nameb)
+	{
+		cha = *namea++;
+		chb = *nameb++;
+
+		if (cha != chb)
+		{
+			/* Casefold lazily since we expect lower case */
+			if (cha >= 'A' && cha <= 'Z')
+				cha += 'a' - 'A';
+			if (chb >= 'A' && chb <= 'Z')
+				chb += 'a' - 'A';
+
+			if (cha != chb)
+				return false;
+		}
+	}
+
+	if (*namea == *nameb)
+		return true;
+	else
+		return false;
+
+	//Assert(guc_name_compare(namea, nameb) == 0);
+}
+
 /*
  * Hash function that's compatible with guc_name_compare
  */
@@ -1320,9 +1353,8 @@ guc_name_hash(const char *name)
 	{
 		char		ch = *name++;
 
-		/* Case-fold in the same way as guc_name_compare */
-		if (ch >= 'A' && ch <= 'Z')
-			ch += 'a' - 'A';
+		/* quick and dirty casefolding suitable for hashing */
+		ch |= 0x20;
 
 		/* Merge into hash ... not very bright, but it needn't be */
 		result = pg_rotate_left32(result, 5);
-- 
2.42.0

From 3db510405e03be41dfad34b82069b36586591f42 Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Wed, 22 Nov 2023 17:35:18 +0700
Subject: [PATCH v4 2/3] Convert GUC hashtable to use simplehash

---
 src/backend/utils/misc/guc.c | 141 ++++++++++++++---------------------
 1 file changed, 56 insertions(+), 85 deletions(-)

diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index e3834d52ee..bf05b022c3 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -203,9 +203,10 @@ typedef struct
 {
 	const char *gucname;		/* hash key */
 	struct config_generic *gucvar;	/* -> GUC's defining structure */
-} GUCHashEntry;
 
-static HTAB *guc_hashtab;		/* entries are GUCHashEntrys */
+	/* needed by simplehash */
+	char status;
+} GUCHashEntry;
 
 /*
  * In addition to the hash table, variables having certain properties are
@@ -228,8 +229,7 @@ static int	GUCNestLevel = 0;	/* 1 when in main transaction */
 
 
 static int	guc_var_compare(const void *a, const void *b);
-static uint32 guc_name_hash(const void *key, Size keysize);
-static int	guc_name_match(const void *key1, const void *key2, Size keysize);
+static uint32 guc_name_hash(const char *name);
 static void InitializeGUCOptionsFromEnvironment(void);
 static void InitializeOneGUCOption(struct config_generic *gconf);
 static void RemoveGUCFromLists(struct config_generic *gconf);
@@ -266,6 +266,18 @@ static bool call_string_check_hook(struct config_string *conf, char **newval,
 static bool call_enum_check_hook(struct config_enum *conf, int *newval,
 								 void **extra, GucSource source, int elevel);
 
+#define SH_PREFIX		GUCHash
+#define SH_ELEMENT_TYPE	GUCHashEntry
+#define SH_KEY_TYPE		const char *
+#define	SH_KEY			gucname
+#define SH_HASH_KEY(tb, key)   	guc_name_hash(key)
+#define SH_EQUAL(tb, a, b)		(guc_name_compare(a, b) == 0)
+#define	SH_SCOPE		static inline
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+static GUCHash_hash *guc_hashtab = NULL;	/* entries are GUCHashEntrys */
 
 /*
  * This function handles both actual config file (re)loads and execution of
@@ -283,7 +295,7 @@ ProcessConfigFileInternal(GucContext context, bool applySettings, int elevel)
 	ConfigVariable *item,
 			   *head,
 			   *tail;
-	HASH_SEQ_STATUS status;
+	GUCHash_iterator iter;
 	GUCHashEntry *hentry;
 
 	/* Parse the main config file into a list of option names and values */
@@ -359,8 +371,8 @@ ProcessConfigFileInternal(GucContext context, bool applySettings, int elevel)
 	 * need this so that we can tell below which ones have been removed from
 	 * the file since we last processed it.
 	 */
-	hash_seq_init(&status, guc_hashtab);
-	while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+	GUCHash_start_iterate(guc_hashtab, &iter);
+	while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
 	{
 		struct config_generic *gconf = hentry->gucvar;
 
@@ -446,8 +458,8 @@ ProcessConfigFileInternal(GucContext context, bool applySettings, int elevel)
 	 * boot-time defaults.  If such a variable can't be changed after startup,
 	 * report that and continue.
 	 */
-	hash_seq_init(&status, guc_hashtab);
-	while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+	GUCHash_start_iterate(guc_hashtab, &iter);
+	while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
 	{
 		struct config_generic *gconf = hentry->gucvar;
 		GucStack   *stack;
@@ -868,17 +880,17 @@ struct config_generic **
 get_guc_variables(int *num_vars)
 {
 	struct config_generic **result;
-	HASH_SEQ_STATUS status;
+	GUCHash_iterator iter;
 	GUCHashEntry *hentry;
 	int			i;
 
-	*num_vars = hash_get_num_entries(guc_hashtab);
+	*num_vars = guc_hashtab->members;
 	result = palloc(sizeof(struct config_generic *) * *num_vars);
 
 	/* Extract pointers from the hash table */
 	i = 0;
-	hash_seq_init(&status, guc_hashtab);
-	while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+	GUCHash_start_iterate(guc_hashtab, &iter);
+	while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
 		result[i++] = hentry->gucvar;
 	Assert(i == *num_vars);
 
@@ -900,7 +912,6 @@ build_guc_variables(void)
 {
 	int			size_vars;
 	int			num_vars = 0;
-	HASHCTL		hash_ctl;
 	GUCHashEntry *hentry;
 	bool		found;
 	int			i;
@@ -962,24 +973,14 @@ build_guc_variables(void)
 	 */
 	size_vars = num_vars + num_vars / 4;
 
-	hash_ctl.keysize = sizeof(char *);
-	hash_ctl.entrysize = sizeof(GUCHashEntry);
-	hash_ctl.hash = guc_name_hash;
-	hash_ctl.match = guc_name_match;
-	hash_ctl.hcxt = GUCMemoryContext;
-	guc_hashtab = hash_create("GUC hash table",
-							  size_vars,
-							  &hash_ctl,
-							  HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
+	guc_hashtab = GUCHash_create(GUCMemoryContext, size_vars, NULL);
 
 	for (i = 0; ConfigureNamesBool[i].gen.name; i++)
 	{
 		struct config_generic *gucvar = &ConfigureNamesBool[i].gen;
 
-		hentry = (GUCHashEntry *) hash_search(guc_hashtab,
-											  &gucvar->name,
-											  HASH_ENTER,
-											  &found);
+		hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
 		Assert(!found);
 		hentry->gucvar = gucvar;
 	}
@@ -988,10 +989,8 @@ build_guc_variables(void)
 	{
 		struct config_generic *gucvar = &ConfigureNamesInt[i].gen;
 
-		hentry = (GUCHashEntry *) hash_search(guc_hashtab,
-											  &gucvar->name,
-											  HASH_ENTER,
-											  &found);
+		hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
 		Assert(!found);
 		hentry->gucvar = gucvar;
 	}
@@ -1000,10 +999,8 @@ build_guc_variables(void)
 	{
 		struct config_generic *gucvar = &ConfigureNamesReal[i].gen;
 
-		hentry = (GUCHashEntry *) hash_search(guc_hashtab,
-											  &gucvar->name,
-											  HASH_ENTER,
-											  &found);
+		hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
 		Assert(!found);
 		hentry->gucvar = gucvar;
 	}
@@ -1012,10 +1009,8 @@ build_guc_variables(void)
 	{
 		struct config_generic *gucvar = &ConfigureNamesString[i].gen;
 
-		hentry = (GUCHashEntry *) hash_search(guc_hashtab,
-											  &gucvar->name,
-											  HASH_ENTER,
-											  &found);
+		hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
 		Assert(!found);
 		hentry->gucvar = gucvar;
 	}
@@ -1024,15 +1019,13 @@ build_guc_variables(void)
 	{
 		struct config_generic *gucvar = &ConfigureNamesEnum[i].gen;
 
-		hentry = (GUCHashEntry *) hash_search(guc_hashtab,
-											  &gucvar->name,
-											  HASH_ENTER,
-											  &found);
+		hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
 		Assert(!found);
 		hentry->gucvar = gucvar;
 	}
 
-	Assert(num_vars == hash_get_num_entries(guc_hashtab));
+	Assert(num_vars == guc_hashtab->members);
 }
 
 /*
@@ -1045,10 +1038,8 @@ add_guc_variable(struct config_generic *var, int elevel)
 	GUCHashEntry *hentry;
 	bool		found;
 
-	hentry = (GUCHashEntry *) hash_search(guc_hashtab,
-										  &var->name,
-										  HASH_ENTER_NULL,
-										  &found);
+	hentry = GUCHash_insert(guc_hashtab, var->name, &found);
+
 	if (unlikely(hentry == NULL))
 	{
 		ereport(elevel,
@@ -1237,10 +1228,8 @@ find_option(const char *name, bool create_placeholders, bool skip_errors,
 	Assert(name);
 
 	/* Look it up using the hash table. */
-	hentry = (GUCHashEntry *) hash_search(guc_hashtab,
-										  &name,
-										  HASH_FIND,
-										  NULL);
+	hentry = GUCHash_lookup(guc_hashtab, name);
+
 	if (hentry)
 		return hentry->gucvar;
 
@@ -1323,10 +1312,9 @@ guc_name_compare(const char *namea, const char *nameb)
  * Hash function that's compatible with guc_name_compare
  */
 static uint32
-guc_name_hash(const void *key, Size keysize)
+guc_name_hash(const char *name)
 {
 	uint32		result = 0;
-	const char *name = *(const char *const *) key;
 
 	while (*name)
 	{
@@ -1343,19 +1331,6 @@ guc_name_hash(const void *key, Size keysize)
 	return murmurhash32(result);
 }
 
-/*
- * Dynahash match function to use in guc_hashtab
- */
-static int
-guc_name_match(const void *key1, const void *key2, Size keysize)
-{
-	const char *name1 = *(const char *const *) key1;
-	const char *name2 = *(const char *const *) key2;
-
-	return guc_name_compare(name1, name2);
-}
-
-
 /*
  * Convert a GUC name to the form that should be used in pg_parameter_acl.
  *
@@ -1525,7 +1500,7 @@ check_GUC_init(struct config_generic *gconf)
 void
 InitializeGUCOptions(void)
 {
-	HASH_SEQ_STATUS status;
+	GUCHash_iterator iter;
 	GUCHashEntry *hentry;
 
 	/*
@@ -1543,8 +1518,8 @@ InitializeGUCOptions(void)
 	 * Load all variables with their compiled-in defaults, and initialize
 	 * status fields as needed.
 	 */
-	hash_seq_init(&status, guc_hashtab);
-	while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+	GUCHash_start_iterate(guc_hashtab, &iter);
+	while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
 	{
 		/* Check mapping between initial and default value */
 		Assert(check_GUC_init(hentry->gucvar));
@@ -2529,7 +2504,7 @@ AtEOXact_GUC(bool isCommit, int nestLevel)
 void
 BeginReportingGUCOptions(void)
 {
-	HASH_SEQ_STATUS status;
+	GUCHash_iterator iter;
 	GUCHashEntry *hentry;
 
 	/*
@@ -2553,8 +2528,8 @@ BeginReportingGUCOptions(void)
 						PGC_INTERNAL, PGC_S_OVERRIDE);
 
 	/* Transmit initial values of interesting variables */
-	hash_seq_init(&status, guc_hashtab);
-	while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+	GUCHash_start_iterate(guc_hashtab, &iter);
+	while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
 	{
 		struct config_generic *conf = hentry->gucvar;
 
@@ -4810,10 +4785,8 @@ define_custom_variable(struct config_generic *variable)
 	/*
 	 * See if there's a placeholder by the same name.
 	 */
-	hentry = (GUCHashEntry *) hash_search(guc_hashtab,
-										  &name,
-										  HASH_FIND,
-										  NULL);
+	hentry = GUCHash_lookup(guc_hashtab, name);
+
 	if (hentry == NULL)
 	{
 		/*
@@ -5149,7 +5122,7 @@ void
 MarkGUCPrefixReserved(const char *className)
 {
 	int			classLen = strlen(className);
-	HASH_SEQ_STATUS status;
+	GUCHash_iterator iter;
 	GUCHashEntry *hentry;
 	MemoryContext oldcontext;
 
@@ -5159,8 +5132,8 @@ MarkGUCPrefixReserved(const char *className)
 	 * don't bother trying to free associated memory, since this shouldn't
 	 * happen often.)
 	 */
-	hash_seq_init(&status, guc_hashtab);
-	while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+	GUCHash_start_iterate(guc_hashtab, &iter);
+	while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
 	{
 		struct config_generic *var = hentry->gucvar;
 
@@ -5175,10 +5148,8 @@ MarkGUCPrefixReserved(const char *className)
 					 errdetail("\"%s\" is now a reserved prefix.",
 							   className)));
 			/* Remove it from the hash table */
-			hash_search(guc_hashtab,
-						&var->name,
-						HASH_REMOVE,
-						NULL);
+			GUCHash_delete(guc_hashtab, var->name);
+
 			/* Remove it from any lists it's in, too */
 			RemoveGUCFromLists(var);
 		}
@@ -5209,7 +5180,7 @@ get_explain_guc_options(int *num)
 	 * While only a fraction of all the GUC variables are marked GUC_EXPLAIN,
 	 * it doesn't seem worth dynamically resizing this array.
 	 */
-	result = palloc(sizeof(struct config_generic *) * hash_get_num_entries(guc_hashtab));
+	result = palloc(sizeof(struct config_generic *) * guc_hashtab->members);
 
 	/* We need only consider GUCs with source not PGC_S_DEFAULT */
 	dlist_foreach(iter, &guc_nondef_list)
-- 
2.42.0

Reply via email to