Re: Faster "SET search_path"

2024-07-08 Thread Noah Misch
On Mon, Jul 08, 2024 at 04:39:21PM -0700, Jeff Davis wrote:
> On Sun, 2024-06-30 at 15:30 -0700, Noah Misch wrote:
> > You're caching the result of object_aclcheck(NamespaceRelationId,
> > ...), so
> > pg_auth_members changes
> 
> Thank you for the report.
> 
> Question: to check for changes to pg_auth_members, it seems like either
> AUTHMEMROLEMEM or AUTHMEMMEMROLE work, and to use both would be
> redundant. Am I missing something, or should I just pick one
> arbitrarily (or by some convention)?

One suffices.  initialize_acl() is the only other place that invals on this
catalog, so consider it a fine convention to mimic.

> >  and pg_database changes also need to invalidate this
> > cache.  (pg_database affects the ACL_CREATE_TEMP case in
> > pg_namespace_aclmask_ext()
> 
> I am having trouble finding an actual problem with ACL_CREATE_TEMP.
> search_path ACL checks are normally bypassed for the temp namespace, so
> it can only happen when the actual temp namespace name (e.g.
> pg_temp_NNN) is explicitly included. In that case, the mask is
> ACL_USAGE, so the two branches in pg_namespace_aclmask_ext() are
> equivalent, right?

I don't know.

> This question is not terribly important for the fix, because
> invalidating for each pg_database change is still necessary as you
> point out




Re: Faster "SET search_path"

2024-07-08 Thread Jeff Davis
On Sun, 2024-06-30 at 15:30 -0700, Noah Misch wrote:
> You're caching the result of object_aclcheck(NamespaceRelationId,
> ...), so
> pg_auth_members changes

Thank you for the report.

Question: to check for changes to pg_auth_members, it seems like either
AUTHMEMROLEMEM or AUTHMEMMEMROLE work, and to use both would be
redundant. Am I missing something, or should I just pick one
arbitrarily (or by some convention)?

>  and pg_database changes also need to invalidate this
> cache.  (pg_database affects the ACL_CREATE_TEMP case in
> pg_namespace_aclmask_ext()

I am having trouble finding an actual problem with ACL_CREATE_TEMP.
search_path ACL checks are normally bypassed for the temp namespace, so
it can only happen when the actual temp namespace name (e.g.
pg_temp_NNN) is explicitly included. In that case, the mask is
ACL_USAGE, so the two branches in pg_namespace_aclmask_ext() are
equivalent, right?

This question is not terribly important for the fix, because
invalidating for each pg_database change is still necessary as you
point out in here:

>  and affects ROLE_PG_DATABASE_OWNER membership.)

Another good catch, thank you.

Patch attached. Contains a bit of cleanup and is intended for 17+18.

Regards,
Jeff Davis

From 1b24d08302e268370ebb14e732f4d5f1f1407726 Mon Sep 17 00:00:00 2001
From: Jeff Davis 
Date: Mon, 8 Jul 2024 15:52:50 -0700
Subject: [PATCH v1] Fix missing invalidations for search_path cache.

Reported-by: Noah Misch
Discussion: https://postgr.es/m/20240630223047.1f.nmi...@google.com
Backpatch-through: 17
---
 src/backend/catalog/namespace.c | 28 +---
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index a2510cf80c6..43b707699d7 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -230,7 +230,7 @@ static void AccessTempTableNamespace(bool force);
 static void InitTempTableNamespace(void);
 static void RemoveTempRelations(Oid tempNamespaceId);
 static void RemoveTempRelationsCallback(int code, Datum arg);
-static void NamespaceCallback(Datum arg, int cacheid, uint32 hashvalue);
+static void InvalidationCallback(Datum arg, int cacheid, uint32 hashvalue);
 static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
 		   bool include_out_arguments, int pronargs,
 		   int **argnumbers);
@@ -4749,15 +4749,29 @@ InitializeSearchPath(void)
 
 		/*
 		 * In normal mode, arrange for a callback on any syscache invalidation
-		 * of pg_namespace or pg_authid rows. (Changing a role name may affect
-		 * the meaning of the special string $user.)
+		 * that will affect the search_path cache.
 		 */
+
+		/* namespace name or ACLs may have changed */
 		CacheRegisterSyscacheCallback(NAMESPACEOID,
-	  NamespaceCallback,
+	  InvalidationCallback,
 	  (Datum) 0);
+
+		/* role name may affect the meaning of "$user" */
 		CacheRegisterSyscacheCallback(AUTHOID,
-	  NamespaceCallback,
+	  InvalidationCallback,
+	  (Datum) 0);
+
+		/* role membership may affect ACLs */
+		CacheRegisterSyscacheCallback(AUTHMEMROLEMEM,
+	  InvalidationCallback,
 	  (Datum) 0);
+
+		/* database owner may affect ACLs */
+		CacheRegisterSyscacheCallback(DATABASEOID,
+	  InvalidationCallback,
+	  (Datum) 0);
+
 		/* Force search path to be recomputed on next use */
 		baseSearchPathValid = false;
 		searchPathCacheValid = false;
@@ -4765,11 +4779,11 @@ InitializeSearchPath(void)
 }
 
 /*
- * NamespaceCallback
+ * InvalidationCallback
  *		Syscache inval callback function
  */
 static void
-NamespaceCallback(Datum arg, int cacheid, uint32 hashvalue)
+InvalidationCallback(Datum arg, int cacheid, uint32 hashvalue)
 {
 	/*
 	 * Force search path to be recomputed on next use, also invalidating the
-- 
2.34.1



Re: Faster "SET search_path"

2024-06-30 Thread Noah Misch
On Tue, Nov 14, 2023 at 08:13:25PM -0800, Jeff Davis wrote:
> On Thu, 2023-10-19 at 19:01 -0700, Jeff Davis wrote:
> > 0003: Cache for recomputeNamespacePath.
> 
> Committed

Commit f26c236 wrote:
> Add cache for recomputeNamespacePath().
> 
> When search_path is changed to something that was previously set, and
> no invalidation happened in between, use the cached list of namespace
> OIDs rather than recomputing them. This avoids syscache lookups and
> ACL checks.

> --- a/src/backend/catalog/namespace.c
> +++ b/src/backend/catalog/namespace.c

> @@ -109,11 +110,13 @@
>   * activeSearchPath is always the actually active path; it points to
>   * to baseSearchPath which is the list derived from namespace_search_path.
>   *
> - * If baseSearchPathValid is false, then baseSearchPath (and other
> - * derived variables) need to be recomputed from namespace_search_path.
> - * We mark it invalid upon an assignment to namespace_search_path or receipt
> - * of a syscache invalidation event for pg_namespace.  The recomputation
> - * is done during the next lookup attempt.
> + * If baseSearchPathValid is false, then baseSearchPath (and other derived
> + * variables) need to be recomputed from namespace_search_path, or retrieved
> + * from the search path cache if there haven't been any syscache
> + * invalidations.  We mark it invalid upon an assignment to
> + * namespace_search_path or receipt of a syscache invalidation event for
> + * pg_namespace or pg_authid.  The recomputation is done during the next

You're caching the result of object_aclcheck(NamespaceRelationId, ...), so
pg_auth_members changes and pg_database changes also need to invalidate this
cache.  (pg_database affects the ACL_CREATE_TEMP case in
pg_namespace_aclmask_ext() and affects ROLE_PG_DATABASE_OWNER membership.)




Re: Faster "SET search_path"

2023-12-05 Thread Jeff Davis
On Mon, 2023-11-20 at 17:13 -0800, Jeff Davis wrote:
> Will commit 0005 soon.

Committed.

> I also attached a trivial 0006 patch that uses SH_STORE_HASH. I
> wasn't
> able to show much benefit, though, even when there's a bucket
> collision. Perhaps there just aren't enough elements to matter -- I
> suppose there would be a benefit if there are lots of unique
> search_path strings, but that doesn't seem very plausible to me. If
> someone thinks it's worth committing, then I will, but I don't see
> any
> real upside or downside.

I tried again by forcing a hash table with ~25 entries and 13
collisions, and even then, SH_STORE_HASH didn't make a difference in my
test. Maybe a microbenchmark would show a difference, but I didn't see
much reason to commit 0006. (There's also no downside, so I was tempted
to commit it just so I wouldn't have to put more thought into whether
it's a problem or not.)

Regards,
Jeff Davis





Re: Faster "SET search_path"

2023-11-20 Thread Jeff Davis
On Thu, 2023-11-16 at 16:46 -0800, Jeff Davis wrote:
> While I considered OOM during hash key initialization, I missed some
> other potential out-of-memory hazards. Attached a fixup patch 0003,
> which re-introduces one list copy but it simplifies things
> substantially in addition to being safer around OOM conditions.

Committed 0003 fixup.

> > > 0004: Use the same cache to optimize check_search_path().

Committed 0004.

> > > 0005: Optimize cache for repeated lookups of the same value.

Will commit 0005 soon.

I also attached a trivial 0006 patch that uses SH_STORE_HASH. I wasn't
able to show much benefit, though, even when there's a bucket
collision. Perhaps there just aren't enough elements to matter -- I
suppose there would be a benefit if there are lots of unique
search_path strings, but that doesn't seem very plausible to me. If
someone thinks it's worth committing, then I will, but I don't see any
real upside or downside.

Regards,
Jeff Davis

From 5f41c0ecc602dd183b7f6e2f23cd28c9338b3c5b Mon Sep 17 00:00:00 2001
From: Jeff Davis 
Date: Sun, 19 Nov 2023 14:47:04 -0800
Subject: [PATCH v10 2/2] Use SH_STORE_HASH for search_path cache.

Does not change performance in expected cases, but makes performance
more resilient in case of bucket collisions.

Discussion: https://postgr.es/m/04c8592dbd694e4114a3ed87139a7a04e4363030.camel%40j-davis.com
Discussion: https://postgr.es/m/27a7a289d5b8f42e1b1e79b1bcaeef3a40583bd2.ca...@j-davis.com
---
 src/backend/catalog/namespace.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index 37a69e9023..0b6e0d711c 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -171,11 +171,10 @@ typedef struct SearchPathCacheEntry
 	List	   *oidlist;		/* namespace OIDs that pass ACL checks */
 	List	   *finalPath;		/* cached final computed search path */
 	Oid			firstNS;		/* first explicitly-listed namespace */
+	uint32		hash;			/* needed for simplehash */
 	bool		temp_missing;
 	bool		forceRecompute; /* force recompute of finalPath */
-
-	/* needed for simplehash */
-	char		status;
+	char		status;			/* needed for simplehash */
 }			SearchPathCacheEntry;
 
 /*
@@ -270,6 +269,8 @@ spcachekey_equal(SearchPathCacheKey a, SearchPathCacheKey b)
 #define SH_EQUAL(tb, a, b)		spcachekey_equal(a, b)
 #define SH_SCOPE		static inline
 #define SH_DECLARE
+#define SH_GET_HASH(tb, a)		a->hash
+#define SH_STORE_HASH
 #define SH_DEFINE
 #include "lib/simplehash.h"
 
-- 
2.34.1

From 46e09b225217bef79a57cc4f8450ed19be8f21ba Mon Sep 17 00:00:00 2001
From: Jeff Davis 
Date: Thu, 19 Oct 2023 15:48:16 -0700
Subject: [PATCH v10 1/2] Optimize SearchPathCache by saving the last entry.

Repeated lookups are common, so it's worth it to check the last entry
before doing another hash lookup.

Discussion: https://postgr.es/m/04c8592dbd694e4114a3ed87139a7a04e4363030.camel%40j-davis.com
---
 src/backend/catalog/namespace.c | 88 +
 1 file changed, 57 insertions(+), 31 deletions(-)

diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index 5027efc91d..37a69e9023 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -241,7 +241,8 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
  *
  * The search path cache is based on a wrapper around a simplehash hash table
  * (nsphash, defined below). The spcache wrapper deals with OOM while trying
- * to initialize a key, and also offers a more convenient API.
+ * to initialize a key, optimizes repeated lookups of the same key, and also
+ * offers a more convenient API.
  */
 
 static inline uint32
@@ -281,6 +282,7 @@ spcachekey_equal(SearchPathCacheKey a, SearchPathCacheKey b)
 #define SPCACHE_RESET_THRESHOLD		256
 
 static nsphash_hash * SearchPathCache = NULL;
+static SearchPathCacheEntry * LastSearchPathCacheEntry = NULL;
 
 /*
  * Create or reset search_path cache as necessary.
@@ -295,6 +297,7 @@ spcache_init(void)
 		return;
 
 	MemoryContextReset(SearchPathCacheContext);
+	LastSearchPathCacheEntry = NULL;
 	/* arbitrary initial starting size of 16 elements */
 	SearchPathCache = nsphash_create(SearchPathCacheContext, 16, NULL);
 	searchPathCacheValid = true;
@@ -307,12 +310,25 @@ spcache_init(void)
 static SearchPathCacheEntry *
 spcache_lookup(const char *searchPath, Oid roleid)
 {
-	SearchPathCacheKey cachekey = {
-		.searchPath = searchPath,
-		.roleid = roleid
-	};
+	if (LastSearchPathCacheEntry &&
+		LastSearchPathCacheEntry->key.roleid == roleid &&
+		strcmp(LastSearchPathCacheEntry->key.searchPath, searchPath) == 0)
+	{
+		return LastSearchPathCacheEntry;
+	}
+	else
+	{
+		SearchPathCacheEntry *entry;
+		SearchPathCacheKey cachekey = {
+			.searchPath = searchPath,
+			.roleid = roleid
+		};
+
+		entry = nsphash_lookup(SearchPathCache, cachekey);
 
-	return nsphash_lookup(SearchPathCache, cachekey);
+		LastSearchPath

Re: Faster "SET search_path"

2023-11-16 Thread Jeff Davis
On Tue, 2023-11-14 at 20:13 -0800, Jeff Davis wrote:
> On Thu, 2023-10-19 at 19:01 -0700, Jeff Davis wrote:
> > 0003: Cache for recomputeNamespacePath.
> 
> Committed with some further simplification around the OOM handling.

While I considered OOM during hash key initialization, I missed some
other potential out-of-memory hazards. Attached a fixup patch 0003,
which re-introduces one list copy but it simplifies things
substantially in addition to being safer around OOM conditions.

> > 0004: Use the same cache to optimize check_search_path().
> > 0005: Optimize cache for repeated lookups of the same value.

Also attached new versions of these patches.

Regards,
Jeff Davis

From 6ed0785c29a43ccbd36aa3f9bd3be751131a6bf2 Mon Sep 17 00:00:00 2001
From: Jeff Davis 
Date: Thu, 19 Oct 2023 15:48:16 -0700
Subject: [PATCH v9 5/5] Optimize SearchPathCache by saving the last entry.

Repeated lookups are common, so it's worth it to check the last entry
before doing another hash lookup.

Discussion: https://postgr.es/m/04c8592dbd694e4114a3ed87139a7a04e4363030.camel%40j-davis.com
---
 src/backend/catalog/namespace.c | 88 +
 1 file changed, 57 insertions(+), 31 deletions(-)

diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index 0785f2b797..f540da7ebc 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -241,7 +241,8 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
  *
  * The search path cache is based on a wrapper around a simplehash hash table
  * (nsphash, defined below). The spcache wrapper deals with OOM while trying
- * to initialize a key, and also offers a more convenient API.
+ * to initialize a key, optimizes repeated lookups of the same key, and also
+ * offers a more convenient API.
  */
 
 static inline uint32
@@ -281,6 +282,7 @@ spcachekey_equal(SearchPathCacheKey a, SearchPathCacheKey b)
 #define SPCACHE_RESET_THRESHOLD		256
 
 static nsphash_hash * SearchPathCache = NULL;
+static SearchPathCacheEntry * LastSearchPathCacheEntry = NULL;
 
 /*
  * Create or reset search_path cache as necessary.
@@ -295,6 +297,7 @@ spcache_init(void)
 			return;
 
 	MemoryContextReset(SearchPathCacheContext);
+	LastSearchPathCacheEntry = NULL;
 	/* arbitrary initial starting size of 16 elements */
 	SearchPathCache = nsphash_create(SearchPathCacheContext, 16, NULL);
 	searchPathCacheValid = true;
@@ -307,12 +310,25 @@ spcache_init(void)
 static SearchPathCacheEntry *
 spcache_lookup(const char *searchPath, Oid roleid)
 {
-	SearchPathCacheKey cachekey = {
-		.searchPath = searchPath,
-		.roleid = roleid
-	};
+	if (LastSearchPathCacheEntry &&
+		LastSearchPathCacheEntry->key.roleid == roleid &&
+		strcmp(LastSearchPathCacheEntry->key.searchPath, searchPath) == 0)
+	{
+		return LastSearchPathCacheEntry;
+	}
+	else
+	{
+		SearchPathCacheEntry *entry;
+		SearchPathCacheKey cachekey = {
+			.searchPath = searchPath,
+			.roleid = roleid
+		};
+
+		entry = nsphash_lookup(SearchPathCache, cachekey);
 
-	return nsphash_lookup(SearchPathCache, cachekey);
+		LastSearchPathCacheEntry = entry;
+		return entry;
+	}
 }
 
 /*
@@ -324,35 +340,45 @@ spcache_lookup(const char *searchPath, Oid roleid)
 static SearchPathCacheEntry *
 spcache_insert(const char *searchPath, Oid roleid)
 {
-	SearchPathCacheEntry *entry;
-	SearchPathCacheKey cachekey = {
-		.searchPath = searchPath,
-		.roleid = roleid
-	};
-
-	/*
-	 * searchPath is not saved in SearchPathCacheContext. First perform a
-	 * lookup, and copy searchPath only if we need to create a new entry.
-	 */
-	entry = nsphash_lookup(SearchPathCache, cachekey);
-
-	if (!entry)
+	if (LastSearchPathCacheEntry &&
+		LastSearchPathCacheEntry->key.roleid == roleid &&
+		strcmp(LastSearchPathCacheEntry->key.searchPath, searchPath) == 0)
 	{
-		bool		found;
+		return LastSearchPathCacheEntry;
+	}
+	else
+	{
+		SearchPathCacheEntry *entry;
+		SearchPathCacheKey cachekey = {
+			.searchPath = searchPath,
+			.roleid = roleid
+		};
 
-		cachekey.searchPath = MemoryContextStrdup(SearchPathCacheContext, searchPath);
-		entry = nsphash_insert(SearchPathCache, cachekey, &found);
-		Assert(!found);
+		/*
+		 * searchPath is not saved in SearchPathCacheContext. First perform a
+		 * lookup, and copy searchPath only if we need to create a new entry.
+		 */
+		entry = nsphash_lookup(SearchPathCache, cachekey);
 
-		entry->oidlist = NIL;
-		entry->finalPath = NIL;
-		entry->firstNS = InvalidOid;
-		entry->temp_missing = false;
-		entry->forceRecompute = false;
-		/* do not touch entry->status, used by simplehash */
-	}
+		if (!entry)
+		{
+			bool		found;
+
+			cachekey.searchPath = MemoryContextStrdup(SearchPathCacheContext, searchPath);
+			entry = nsphash_insert(SearchPathCache, cachekey, &found);
+			Assert(!found);
+
+			entry->oidlist = NIL;
+			entry->finalPath = NIL;
+			entry->firstNS = InvalidOid;
+			entry->temp_missing = false;
+			entry->forceRecompute = false;
+			/* do not 

Re: Faster "SET search_path"

2023-11-14 Thread Jeff Davis
On Thu, 2023-10-19 at 19:01 -0700, Jeff Davis wrote:
> 0003: Cache for recomputeNamespacePath.

Committed with some further simplification around the OOM handling.
Instead of using MCXT_ALLOC_NO_OOM, it just temporarily sets the cache
invalid while copying the string, and sets it valid again before
returning. That way, if an OOM occurs during the string copy, the cache
will be reset.

Also some minor cleanup.

> 0004: Use the same cache to optimize check_search_path().
> 0005: Optimize cache for repeated lookups of the same value.

I intend to commit these fairly soon, as well.

Regards,
Jeff Davis





Re: Faster "SET search_path"

2023-10-19 Thread Jeff Davis
On Fri, 2023-09-15 at 11:31 -0700, Jeff Davis wrote:
> On Tue, 2023-09-12 at 13:55 -0700, Jeff Davis wrote:
> > On Mon, 2023-08-07 at 15:39 -0700, Nathan Bossart wrote:
> > > 0003 is looking pretty good, too, but I think we
> > > should get some more eyes on it, given the complexity.
> > 
> > Attached rebased version of 0003.
> 
> Is someone else planning to look at 0003, or should I just proceed?
> It
> seems to be clearly wanted, and I think it's better to get it in this
> 'fest than to wait.

Attaching a new version, as well as some additional optimizations.

Changes:

* I separated it into more small functions, and generally refactored
quite a bit trying to make it easier to review.

* The new version is more careful about what might happen during an
OOM, or in weird cases such as a huge number of distinct search_path
strings.

0003: Cache for recomputeNamespacePath.
0004: Use the same cache to optimize check_search_path().
0005: Optimize cache for repeated lookups of the same value.

Applying the same tests as described in the first message[1], the new
numbers are:

  baseline:   4.4s
  test query:
without patch:   12.3s
0003: 8.8s
0003,0004:7.4s
0003,0004,0005:   7.0s

This brings the slowdown from 180% on master down to about 60%. Still
not where we want to be exactly, but more tolerable.

The profile shows a few more areas worth looking at, so I suppose a bit
more effort could bring it down further. find_option(), for instance,
is annoyingly slow because it does case folding.

Regards,
Jeff Davis

[1]
https://www.postgresql.org/message-id/04c8592dbd694e4114a3ed87139a7a04e4363030.camel%40j-davis.com
From 955cce952e8fbc8d117d4df1876e56feaec1d944 Mon Sep 17 00:00:00 2001
From: Jeff Davis 
Date: Wed, 26 Jul 2023 12:55:09 -0700
Subject: [PATCH v7 3/6] Add cache for recomputeNamespacePath().

When search_path is changed to something that was previously set, and
no invalidation happened in between, use the cached list of namespace
OIDs rather than recomputing them. This avoids syscache lookups and
ACL checks.

Important when the search_path changes frequently, such as when set in
proconfig.

Discussion: https://postgr.es/m/04c8592dbd694e4114a3ed87139a7a04e4363030.camel%40j-davis.com
---
 src/backend/catalog/namespace.c | 440 ++--
 1 file changed, 365 insertions(+), 75 deletions(-)

diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index ff1bfae1a3..258c97b590 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -41,6 +41,7 @@
 #include "catalog/pg_ts_template.h"
 #include "catalog/pg_type.h"
 #include "commands/dbcommands.h"
+#include "common/hashfn.h"
 #include "funcapi.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
@@ -109,11 +110,12 @@
  * activeSearchPath is always the actually active path; it points to
  * to baseSearchPath which is the list derived from namespace_search_path.
  *
- * If baseSearchPathValid is false, then baseSearchPath (and other
- * derived variables) need to be recomputed from namespace_search_path.
- * We mark it invalid upon an assignment to namespace_search_path or receipt
- * of a syscache invalidation event for pg_namespace.  The recomputation
- * is done during the next lookup attempt.
+ * If baseSearchPathValid is false, then baseSearchPath (and other derived
+ * variables) need to be recomputed from namespace_search_path, or retrieved
+ * from the search path cache if there haven't been any syscache
+ * invalidations.  We mark it invalid upon an assignment to
+ * namespace_search_path or receipt of a syscache invalidation event for
+ * pg_namespace.  The recomputation is done during the next lookup attempt.
  *
  * Any namespaces mentioned in namespace_search_path that are not readable
  * by the current user ID are simply left out of baseSearchPath; so
@@ -153,6 +155,27 @@ static Oid	namespaceUser = InvalidOid;
 
 /* The above four values are valid only if baseSearchPathValid */
 static bool baseSearchPathValid = true;
+static bool searchPathCacheValid = false;
+static MemoryContext SearchPathCacheContext = NULL;
+
+typedef struct SearchPathCacheKey
+{
+	const char	*searchPath;
+	Oid			 roleid;
+} SearchPathCacheKey;
+
+typedef struct SearchPathCacheEntry
+{
+	SearchPathCacheKey	 key;
+	List*oidlist;	/* namespace OIDs that pass ACL checks */
+	List*finalPath;	/* cached final computed search path */
+	Oid	 firstNS;	/* first explicitly-listed namespace */
+	bool temp_missing;
+	bool forceRecompute; /* force recompute of finalPath */
+
+	/* needed for simplehash */
+	char status;
+} SearchPathCacheEntry;
 
 /*
  * myTempNamespace is InvalidOid until and unless a TEMP namespace is set up
@@ -206,6 +229,143 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
 		   bool include_out_arguments, int pronargs,
 		   int **argnumbers);
 
+/*
+ * Recomputing the n

Re: Faster "SET search_path"

2023-09-15 Thread Jeff Davis
On Tue, 2023-09-12 at 13:55 -0700, Jeff Davis wrote:
> On Mon, 2023-08-07 at 15:39 -0700, Nathan Bossart wrote:
> > 0003 is looking pretty good, too, but I think we
> > should get some more eyes on it, given the complexity.
> 
> Attached rebased version of 0003.

Is someone else planning to look at 0003, or should I just proceed? It
seems to be clearly wanted, and I think it's better to get it in this
'fest than to wait.

Regards,
Jeff Davis





Re: Faster "SET search_path"

2023-09-12 Thread Jeff Davis
On Mon, 2023-08-07 at 15:39 -0700, Nathan Bossart wrote:
> 0003 is looking pretty good, too, but I think we
> should get some more eyes on it, given the complexity.

Attached rebased version of 0003.

-- 
Jeff Davis
PostgreSQL Contributor Team - AWS


From f5b055aea1bf08928de1bffcfd7b202e28847595 Mon Sep 17 00:00:00 2001
From: Jeff Davis 
Date: Wed, 26 Jul 2023 12:55:09 -0700
Subject: [PATCH v5 3/3] Add cache for recomputeNamespacePath().

When search_path is changed to something that was previously set, and
no invalidation happened in between, use the cached list of namespace
OIDs rather than recomputing them. This avoids syscache lookups and
ACL checks.

Important when the search_path changes frequently, such as when set in
proconfig.

Discussion: https://postgr.es/m/04c8592dbd694e4114a3ed87139a7a04e4363030.camel%40j-davis.com
---
 src/backend/catalog/namespace.c | 349 ++--
 1 file changed, 286 insertions(+), 63 deletions(-)

diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index 4ceae038ec..8b97b3b529 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -41,6 +41,7 @@
 #include "catalog/pg_ts_template.h"
 #include "catalog/pg_type.h"
 #include "commands/dbcommands.h"
+#include "common/hashfn.h"
 #include "funcapi.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
@@ -109,11 +110,12 @@
  * activeSearchPath is always the actually active path; it points to
  * to baseSearchPath which is the list derived from namespace_search_path.
  *
- * If baseSearchPathValid is false, then baseSearchPath (and other
- * derived variables) need to be recomputed from namespace_search_path.
- * We mark it invalid upon an assignment to namespace_search_path or receipt
- * of a syscache invalidation event for pg_namespace.  The recomputation
- * is done during the next lookup attempt.
+ * If baseSearchPathValid is false, then baseSearchPath (and other derived
+ * variables) need to be recomputed from namespace_search_path, or retrieved
+ * from the search path cache if there haven't been any syscache
+ * invalidations.  We mark it invalid upon an assignment to
+ * namespace_search_path or receipt of a syscache invalidation event for
+ * pg_namespace.  The recomputation is done during the next lookup attempt.
  *
  * Any namespaces mentioned in namespace_search_path that are not readable
  * by the current user ID are simply left out of baseSearchPath; so
@@ -153,6 +155,35 @@ static Oid	namespaceUser = InvalidOid;
 
 /* The above four values are valid only if baseSearchPathValid */
 static bool baseSearchPathValid = true;
+static bool searchPathCacheValid = false;
+static MemoryContext SearchPathCacheContext = NULL;
+
+typedef struct SearchPathCacheKey
+{
+	const char	*searchPath;
+	Oid			 roleid;
+} SearchPathCacheKey;
+
+typedef struct SearchPathCacheEntry
+{
+	SearchPathCacheKey	*key;
+	List*oidlist;	/* namespace OIDs that pass ACL checks */
+	List*finalPath;	/* cached final computed search path */
+	Oid	 firstNS;	/* first explicitly-listed namespace */
+	bool temp_missing;
+
+	/*
+	 * If true, some namespaces were previously filtered out of finalPath by a
+	 * namespace search hook. The next time it's read from the cache, we need
+	 * to recreate finalPath from the oidlist.
+	 */
+	bool hook_filtered;
+
+	/* needed for simplehash */
+	char status;
+} SearchPathCacheEntry;
+
+static SearchPathCacheEntry *CurrentSearchPathCacheEntry = NULL;
 
 /*
  * myTempNamespace is InvalidOid until and unless a TEMP namespace is set up
@@ -193,6 +224,45 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
 		   bool include_out_arguments, int pronargs,
 		   int **argnumbers);
 
+/*
+ * Recomputing the namespace path can be costly when done frequently, such as
+ * when a function has search_path set in proconfig. Add a cache that can be
+ * used when the search_path changes to a value that was previously set, and
+ * no invalidation intervened.
+ *
+ * The cache is a simple hash table in its own memory context, with a key of
+ * search_path and role ID.
+ */
+
+static inline uint32
+nspcachekey_hash(SearchPathCacheKey *key)
+{
+	const unsigned char	*bytes = (const unsigned char *)key->searchPath;
+	int	 blen  = strlen(key->searchPath);
+
+	return hash_combine(hash_bytes(bytes, blen),
+		hash_uint32(key->roleid));
+}
+
+static inline bool
+nspcachekey_equal(SearchPathCacheKey *a, SearchPathCacheKey *b)
+{
+	return a->roleid == b->roleid &&
+		strcmp(a->searchPath, b->searchPath) == 0;
+}
+
+#define SH_PREFIX		nsphash
+#define SH_ELEMENT_TYPE	SearchPathCacheEntry
+#define SH_KEY_TYPE		SearchPathCacheKey *
+#define	SH_KEY			key
+#define SH_HASH_KEY(tb, key)   	nspcachekey_hash(key)
+#define SH_EQUAL(tb, a, b)		nspcachekey_equal(a, b)
+#define	SH_SCOPE		static inline
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+static nsphash_hash *SearchPathCache = NULL;
 
 /*
 

Re: Faster "SET search_path"

2023-08-22 Thread Jeff Davis
On Wed, 2023-08-16 at 15:09 -0700, Jeff Davis wrote:
> To bring the overhead closer to zero we need to somehow avoid
> repeating
> so much work in guc.c, though. If we don't go around it, another
> approach would be to separate GUC setting into two phases: one that
> does the checks, and one that performs the final change. All the real
> work (hash lookup, guc_strdup, and check_search_path) is done in the
> "check" phase.
> 
> It's a bit painful to reorganize the code in guc.c, though, so that
> might need to happen in a few parts and will depend on how great the
> demand is.

I looked into this, and one problem is that there are two different
memory allocators at work. In guc.c, the new value is guc_strdup'd, and
needs to be saved until it replaces the old value. But if the caller
(in fmgr.c) saves the state in fmgr_security_definer_cache, it would be
saving a guc_strdup'd value, and we'd need to be careful about freeing
that if (and only if) the final set step of the GUC doesn't happen. Or,
we'd need to somehow use pstrdup() instead, and somehow tell guc.c not
to guc_free() it.

Those are obviously solvable, but I don't see a clean way to do so.
Perhaps it's still better than circumventing guc.c entirely (because
that approach probably suffers from the same problem, in addition to
the problem you raised), but I'm not sure if it's worth it to take on
this level of complexity. Suggestions welcome.

Right now I'm still intending to get the cache in place, which doesn't
have these downsides and provides a nice speedup.

Regards,
Jeff Davis





Re: Faster "SET search_path"

2023-08-16 Thread Jeff Davis
On Tue, 2023-08-15 at 13:04 -0400, Robert Haas wrote:
> I suspect that dodging the GUC stack machinery is not a very good
> idea. The timing of when TRY/CATCH blocks are called is different
> from
> when subtransactions are aborted, and that distinction has messed me
> up more than once when trying to code various things.

...

> I can't say there isn't a way to make something like what you're
> talking about here work, but I bet it will be difficult to get all of
> the corner cases right, and I suspect that trying to speed up the
> existing mechanism is a better plan than trying to go around it.

The SearchPathCache that I implemented (0003) is an example of speeding
up the existing mechnism that and already offers a huge speedup. I'll
focus on getting that in first. That patch has had some review and I'm
tempted to commit it soon, but Nathan has requested another set of eyes
on it.

To bring the overhead closer to zero we need to somehow avoid repeating
so much work in guc.c, though. If we don't go around it, another
approach would be to separate GUC setting into two phases: one that
does the checks, and one that performs the final change. All the real
work (hash lookup, guc_strdup, and check_search_path) is done in the
"check" phase.

It's a bit painful to reorganize the code in guc.c, though, so that
might need to happen in a few parts and will depend on how great the
demand is.

Regards,
Jeff Davis





Re: Faster "SET search_path"

2023-08-15 Thread Robert Haas
On Mon, Aug 7, 2023 at 7:24 PM Jeff Davis  wrote:
> I might just avoid guc.c entirely and directly set
> namespace_search_path and baseSearchPathValid=false. The main thing we
> lose there is the GUC stack (AtEOXact_GUC()), but there's already a
> PG_TRY/PG_FINALLY block in fmgr_security_definer, so we can use that to
> change it back safely. (I think? I need to convince myself that it's
> safe to skip the work in guc.c, at least for the special case of
> search_path, and that it won't be too fragile.)

I suspect that dodging the GUC stack machinery is not a very good
idea. The timing of when TRY/CATCH blocks are called is different from
when subtransactions are aborted, and that distinction has messed me
up more than once when trying to code various things. Specifically,
changes that happen in a CATCH block happen before we actually reach
Abort(Sub)Transaction, which sort of makes it sound like you'd be
reverting back to the old value too early. The GUC stack is also
pretty complicated because of the SET/SAVE and LOCAL/non-LOCAL stuff.
I can't say there isn't a way to make something like what you're
talking about here work, but I bet it will be difficult to get all of
the corner cases right, and I suspect that trying to speed up the
existing mechanism is a better plan than trying to go around it.

-- 
Robert Haas
EDB: http://www.enterprisedb.com




Re: Faster "SET search_path"

2023-08-07 Thread Jeff Davis
On Mon, 2023-08-07 at 15:39 -0700, Nathan Bossart wrote:
> 0001 and 0002 LGTM.

I'll probably go ahead with 0001 soon. Simple and effective.

But for 0002, I was thinking about trying to optimize so
check_search_path() only gets called once at the beginning, rather than
for each function invocation. It's a bit of a hack right now, but it
should be correct because it's just doing a syntactic validity check
and that doesn't depend on catalog contents. If I can find a
reasonably-nice way to do this, then there's no reason to optimize the
check itself, and we wouldn't have to maintain that separate parsing
code.

I might just avoid guc.c entirely and directly set
namespace_search_path and baseSearchPathValid=false. The main thing we
lose there is the GUC stack (AtEOXact_GUC()), but there's already a
PG_TRY/PG_FINALLY block in fmgr_security_definer, so we can use that to
change it back safely. (I think? I need to convince myself that it's
safe to skip the work in guc.c, at least for the special case of
search_path, and that it won't be too fragile.)

I started down this road and it gets even closer in performance to the
plain function call. I don't know if we'll ever call it zero cost, but
safety has a cost sometimes.

>   0003 is looking pretty good, too, but I think we
> should get some more eyes on it, given the complexity.

I'm happy with 0003 and I'm fairly sure we want it, but I agree that
another set of eyes would be nice so I'll give it a bit more time. I
could also improve the testing; any suggestions here are welcome.

> I wonder if it'd be faster to downcase first and then pass it to
> hash_bytes().  This would require modifying the key, which might be a
> deal-breaker, but maybe there's some way to improve find_option() for
> all
> GUCs.

I thought about that and I don't think modifying the argument is a good
API.

For the match function, I added a fast path for strcmp()==0 with a
fallback to case folding (on the assumption that the case is consistent
in most cases, especially built-in ones). For the hash function, I used
a stack buffer and downcased the first K bytes into that, then did
hash_bytes(buff, K). Those improved things a bit but it wasn't an
exciting amount so I didn't post it. I made a patch to port guc_hashtab
to simplehash because it seems like a better fit than hsearch.h; but
again, I didn't see exciting numbers from doing so, so I didn't post
it.

> 
Maybe we could use perfect hashing for the built-in GUCs, and fall back
to simplehash for the custom GUCs?

Regardless, I'm mostly interested in search_path because I want to get
to the point where we can recommend that users specify it for every
function that depends on it. A special case to facilitate that without
compromising (much) on performance seems reasonable to me.

Regards,
Jeff Davis





Re: Faster "SET search_path"

2023-08-07 Thread Nathan Bossart
0001 and 0002 LGTM.  0003 is looking pretty good, too, but I think we
should get some more eyes on it, given the complexity.

On Mon, Aug 07, 2023 at 12:57:27PM -0700, Jeff Davis wrote:
> (Aside: part of the reason set_config_option() is slow is because of
> the lookup in guc_hashtab. That's slower than some other hash lookups
> because it does case-folding, which needs to be done in both the hash
> function and also the match function. The hash lookups in
> SearchPathCache are significantly faster. I also have a patch to change
> guc_hashtab to simplehash, but I didn't see much difference.)

I wonder if it'd be faster to downcase first and then pass it to
hash_bytes().  This would require modifying the key, which might be a
deal-breaker, but maybe there's some way to improve find_option() for all
GUCs.

> But in general I'd prefer to optimize cases that are going to work
> nicely by default for a lot of users (especially switching to a safe
> search path), without the need for obscure knowledge about the
> performance implications of the session search_path. And to the extent
> that we do optimize for the pre-existing search_path, I'd like to
> understand the inherent overhead of changing the search path versus
> incidental overhead.

+1

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com




Re: Faster "SET search_path"

2023-08-07 Thread Jeff Davis
On Wed, 2023-08-02 at 01:14 -0400, Isaac Morland wrote:
> > > On Sat, 2023-07-29 at 12:44 -0400, Isaac Morland wrote:
> > > > Essentially, "just" observe efficiently (somehow) that no
> > > > change is
> > > > needed, and skip changing it?

...

> Speaking as someone who uses a lot of stored procedures, many of
> which call one another, I definitely want this optimization.

If we try to avoid the set_config_option() entirely, we'd have to
introduce the invalidation logic into fmgr.c somehow, and there would
be a performance cliff for users who change their session's
search_path.

Instead, in v4 (attached) I introduced a fast path (patch 0004,
experimental) for setting search_path that uses the same low-level
invalidation logic in namespace.c, but avoids most of the overhead of
set_config_option().

(Aside: part of the reason set_config_option() is slow is because of
the lookup in guc_hashtab. That's slower than some other hash lookups
because it does case-folding, which needs to be done in both the hash
function and also the match function. The hash lookups in
SearchPathCache are significantly faster. I also have a patch to change
guc_hashtab to simplehash, but I didn't see much difference.)

New timings (with session's search_path set to a, b):

  inc()plain function: 4.1s
  inc_secdef() SECURITY DEFINER:   4.3s
  inc_wm() SET work_mem = '4GB':   6.3s
  inc_ab() SET search_path = a, b: 5.4s
  inc_bc() SET search_path = b, c: 5.6s

I reported the numbers a bit differently here. All numbers are using v4
of the patch. The plain function is a baseline; SECURITY DEFINER is for
measuring the overhead of the fmgr_security_definer wrapper; inc_ab()
is for setting the search_path to the same value it's already set to;
and inc_bc() is for setting the search_path to a new value.

There's still a difference between inc() (with no proconfigs) and
inc_ab(), so you can certainly argue that some or all of that can be
avoided if the search_path is unchanged. For instance, adding the new
GUC level causes AtEOXact_GUC to be run, and that does some work. Maybe
it's even possible to avoid the fmgr_security_definer wrapper entirely
(though it would be tricky to do so).

But in general I'd prefer to optimize cases that are going to work
nicely by default for a lot of users (especially switching to a safe
search path), without the need for obscure knowledge about the
performance implications of the session search_path. And to the extent
that we do optimize for the pre-existing search_path, I'd like to
understand the inherent overhead of changing the search path versus
incidental overhead.


-- 
Jeff Davis
PostgreSQL Contributor Team - AWS


From f45ee8c5ea6517907000d1d3566dcc766ca25732 Mon Sep 17 00:00:00 2001
From: Jeff Davis 
Date: Thu, 27 Jul 2023 15:11:42 -0700
Subject: [PATCH v4 1/4] Transform proconfig for faster execution.

Store function config settings as a list of (name, value) pairs to
avoid the need to parse for each function invocation.
---
 src/backend/utils/fmgr/fmgr.c | 29 +++---
 src/backend/utils/misc/guc.c  | 45 ---
 src/include/utils/guc.h   |  2 ++
 3 files changed, 59 insertions(+), 17 deletions(-)

diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c
index 9208c31fe0..6b2d7d7be3 100644
--- a/src/backend/utils/fmgr/fmgr.c
+++ b/src/backend/utils/fmgr/fmgr.c
@@ -612,7 +612,8 @@ struct fmgr_security_definer_cache
 {
 	FmgrInfo	flinfo;			/* lookup info for target function */
 	Oid			userid;			/* userid to set, or InvalidOid */
-	ArrayType  *proconfig;		/* GUC values to set, or NULL */
+	List	   *configNames;	/* GUC names to set, or NIL */
+	List	   *configValues;	/* GUC values to set, or NIL */
 	Datum		arg;			/* passthrough argument for plugin modules */
 };
 
@@ -634,6 +635,8 @@ fmgr_security_definer(PG_FUNCTION_ARGS)
 	FmgrInfo   *save_flinfo;
 	Oid			save_userid;
 	int			save_sec_context;
+	ListCell   *lc1;
+	ListCell   *lc2;
 	volatile int save_nestlevel;
 	PgStat_FunctionCallUsage fcusage;
 
@@ -666,8 +669,11 @@ fmgr_security_definer(PG_FUNCTION_ARGS)
 &isnull);
 		if (!isnull)
 		{
+			ArrayType *array;
 			oldcxt = MemoryContextSwitchTo(fcinfo->flinfo->fn_mcxt);
-			fcache->proconfig = DatumGetArrayTypePCopy(datum);
+			array = DatumGetArrayTypeP(datum);
+			TransformGUCArray(array, &fcache->configNames,
+			  &fcache->configValues);
 			MemoryContextSwitchTo(oldcxt);
 		}
 
@@ -680,7 +686,7 @@ fmgr_security_definer(PG_FUNCTION_ARGS)
 
 	/* GetUserIdAndSecContext is cheap enough that no harm in a wasted call */
 	GetUserIdAndSecContext(&save_userid, &save_sec_context);
-	if (fcache->proconfig)		/* Need a new GUC nesting level */
+	if (fcache->configNames != NIL)		/* Need a new GUC nesting level */
 		save_nestlevel = NewGUCNestLevel();
 	else
 		save_nestlevel = 0;		/* keep compiler quiet */
@@ -689,12 +695,17 @@ fmgr_security_definer(PG_FUNCTION_ARGS)
 		SetUserIdAndSecContext(fcache->

Re: Faster "SET search_path"

2023-08-02 Thread Jeff Davis
On Tue, 2023-08-01 at 21:52 -0700, Nathan Bossart wrote:
> I
> typically see this done with two ѕeparate lists and forboth().

Agreed, done.

> 
> Any reason not to use hash_combine() here?

Thank you, fixed.

> > I changed it to move the hook so that it's called after retrieving
> > from
> > the cache.
...
> I think you are right.  This adds some complexity, but I don't have
> anything else to propose at the moment.

I reworked it a bit to avoid allocations in most cases, and it only
reprocesses the oidlist and runs the hooks when necessary (and even
then, still benefits from the cached OIDs that have already passed the
ACL checks).

Now I'm seeing timings around 7.1s, which is starting to look really
nice.

> 
> I'm not following why this logic was moved.

There was previously a comment:

"We want to detect the case where the effective value of the base
search path variables didn't change.  As long as we're doing so, we can
avoid copying the OID list unnecessarily."

But with v2 the copy had already happened by that point. In v3, there
is no copy at all.
> 

Regards,
Jeff Davis


From d9a3d4364731a152096d4a8c8cf6a81c1e86f8ab Mon Sep 17 00:00:00 2001
From: Jeff Davis 
Date: Thu, 27 Jul 2023 15:11:42 -0700
Subject: [PATCH v3 1/3] Transform proconfig for faster execution.

Store function config settings as a list of (name, value) pairs to
avoid the need to parse for each function invocation.
---
 src/backend/utils/fmgr/fmgr.c | 29 +++---
 src/backend/utils/misc/guc.c  | 45 ---
 src/include/utils/guc.h   |  2 ++
 3 files changed, 59 insertions(+), 17 deletions(-)

diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c
index 9208c31fe0..6b2d7d7be3 100644
--- a/src/backend/utils/fmgr/fmgr.c
+++ b/src/backend/utils/fmgr/fmgr.c
@@ -612,7 +612,8 @@ struct fmgr_security_definer_cache
 {
 	FmgrInfo	flinfo;			/* lookup info for target function */
 	Oid			userid;			/* userid to set, or InvalidOid */
-	ArrayType  *proconfig;		/* GUC values to set, or NULL */
+	List	   *configNames;	/* GUC names to set, or NIL */
+	List	   *configValues;	/* GUC values to set, or NIL */
 	Datum		arg;			/* passthrough argument for plugin modules */
 };
 
@@ -634,6 +635,8 @@ fmgr_security_definer(PG_FUNCTION_ARGS)
 	FmgrInfo   *save_flinfo;
 	Oid			save_userid;
 	int			save_sec_context;
+	ListCell   *lc1;
+	ListCell   *lc2;
 	volatile int save_nestlevel;
 	PgStat_FunctionCallUsage fcusage;
 
@@ -666,8 +669,11 @@ fmgr_security_definer(PG_FUNCTION_ARGS)
 &isnull);
 		if (!isnull)
 		{
+			ArrayType *array;
 			oldcxt = MemoryContextSwitchTo(fcinfo->flinfo->fn_mcxt);
-			fcache->proconfig = DatumGetArrayTypePCopy(datum);
+			array = DatumGetArrayTypeP(datum);
+			TransformGUCArray(array, &fcache->configNames,
+			  &fcache->configValues);
 			MemoryContextSwitchTo(oldcxt);
 		}
 
@@ -680,7 +686,7 @@ fmgr_security_definer(PG_FUNCTION_ARGS)
 
 	/* GetUserIdAndSecContext is cheap enough that no harm in a wasted call */
 	GetUserIdAndSecContext(&save_userid, &save_sec_context);
-	if (fcache->proconfig)		/* Need a new GUC nesting level */
+	if (fcache->configNames != NIL)		/* Need a new GUC nesting level */
 		save_nestlevel = NewGUCNestLevel();
 	else
 		save_nestlevel = 0;		/* keep compiler quiet */
@@ -689,12 +695,17 @@ fmgr_security_definer(PG_FUNCTION_ARGS)
 		SetUserIdAndSecContext(fcache->userid,
 			   save_sec_context | SECURITY_LOCAL_USERID_CHANGE);
 
-	if (fcache->proconfig)
+	forboth(lc1, fcache->configNames, lc2, fcache->configValues)
 	{
-		ProcessGUCArray(fcache->proconfig,
-		(superuser() ? PGC_SUSET : PGC_USERSET),
-		PGC_S_SESSION,
-		GUC_ACTION_SAVE);
+		GucContext	 context = superuser() ? PGC_SUSET : PGC_USERSET;
+		GucSource	 source	 = PGC_S_SESSION;
+		GucAction	 action	 = GUC_ACTION_SAVE;
+		char		*name	 = lfirst(lc1);
+		char		*value	 = lfirst(lc2);
+
+		(void) set_config_option(name, value,
+ context, source,
+ action, true, 0, false);
 	}
 
 	/* function manager hook */
@@ -737,7 +748,7 @@ fmgr_security_definer(PG_FUNCTION_ARGS)
 
 	fcinfo->flinfo = save_flinfo;
 
-	if (fcache->proconfig)
+	if (fcache->configNames != NIL)
 		AtEOXact_GUC(true, save_nestlevel);
 	if (OidIsValid(fcache->userid))
 		SetUserIdAndSecContext(save_userid, save_sec_context);
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 5308896c87..fa96b79192 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -6213,16 +6213,13 @@ ParseLongOption(const char *string, char **name, char **value)
 			*cp = '_';
 }
 
-
 /*
- * Handle options fetched from pg_db_role_setting.setconfig,
- * pg_proc.proconfig, etc.  Caller must specify proper context/source/action.
- *
- * The array parameter must be an array of TEXT (it must not be NULL).
+ * Transform array of GUC settings into a list of (name, value) pairs. The
+ * list is faster to process in cases where the settings must be applie

Re: Faster "SET search_path"

2023-08-01 Thread Jeff Davis
On Wed, 2023-08-02 at 01:14 -0400, Isaac Morland wrote:
> I don’t think the fact that an optimization might suddenly not work
> in a certain situation is a reason not to optimize. What would our
> query planner look like if we took that approach?

...

> Instead, we should try to find ways of making the performance more
> transparent. We already have some features for this, but maybe more
> can be done.

Exactly; for the planner we have EXPLAIN. We could try to expose GUC
switching costs that way.

Or maybe users can start thinking of search_path as a performance-
related setting to be careful with.

Or we could try harder to optimize the switching costs so that it's not
so bad. I just started and I already saw some nice speedups; with a few
of the easy fixes done, the remaining opportunities should be more
visible in the profiler.

Regards,
Jeff Davis




Re: Faster "SET search_path"

2023-08-01 Thread Jeff Davis
On Tue, 2023-08-01 at 22:07 -0700, Nathan Bossart wrote:
> I wonder if this is a good enough reason to _not_ proceed with this
> optimization.  At the moment, I'm on the fence about it.

I was wondering the same thing. It's something that could reasonably be
explained to users; it's not what I'd call "magical", it's just
avoiding an unnecessary SET. But I could certainly see it as a cause of
confusion: "why is this query faster for user foo than user bar?".

Another concern is that the default search_path is "$foo, public" but
the safe search_path is "pg_catalog, pg_temp". So if we automatically
switch to the safe search path for functions in index expressions (as I
think we should do, at least ideally), then they'd be slow by default.
We'd need to start advising people to set their search_path to
"pg_catalog, pg_temp" to speed things up.

I'm not opposed to this optimization, but I'm not sure about it either.

Regards,
Jeff Davis





Re: Faster "SET search_path"

2023-08-01 Thread Isaac Morland
On Wed, 2 Aug 2023 at 01:07, Nathan Bossart 
wrote:

> On Mon, Jul 31, 2023 at 10:28:31PM -0700, Jeff Davis wrote:
> > On Sat, 2023-07-29 at 12:44 -0400, Isaac Morland wrote:
> >> Essentially, "just" observe efficiently (somehow) that no change is
> >> needed, and skip changing it?
> >
> > I gave this a try and it speeds things up some more.
> >
> > There might be a surprise factor with an optimization like that,
> > though. If someone becomes accustomed to the function running fast,
> > then changing the search_path in the caller could slow things down a
> > lot and it would be hard for the user to understand what happened.
>
> I wonder if this is a good enough reason to _not_ proceed with this
> optimization.  At the moment, I'm on the fence about it.
>

Speaking as someone who uses a lot of stored procedures, many of which call
one another, I definitely want this optimization.

I don’t think the fact that an optimization might suddenly not work in a
certain situation is a reason not to optimize. What would our query planner
look like if we took that approach? Many people regularly fix performance
problems by making inscrutable adjustments to queries, sometimes after
having accidentally ruined performance by modifying the query. Instead, we
should try to find ways of making the performance more transparent. We
already have some features for this, but maybe more can be done.


Re: Faster "SET search_path"

2023-08-01 Thread Nathan Bossart
On Mon, Jul 31, 2023 at 10:28:31PM -0700, Jeff Davis wrote:
> On Sat, 2023-07-29 at 12:44 -0400, Isaac Morland wrote:
>> Essentially, "just" observe efficiently (somehow) that no change is
>> needed, and skip changing it?
> 
> I gave this a try and it speeds things up some more.
> 
> There might be a surprise factor with an optimization like that,
> though. If someone becomes accustomed to the function running fast,
> then changing the search_path in the caller could slow things down a
> lot and it would be hard for the user to understand what happened.

I wonder if this is a good enough reason to _not_ proceed with this
optimization.  At the moment, I'm on the fence about it.

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com




Re: Faster "SET search_path"

2023-08-01 Thread Nathan Bossart
On Tue, Aug 01, 2023 at 04:59:33PM -0700, Jeff Davis wrote:
> + List*pair= lfirst(lc);
> + char*name= linitial(pair);
> + char*value   = lsecond(pair);

This is definitely a nitpick, but this List of Lists business feels strange
to me for some reason.  There might be some code that does this, but I
typically see this done with two ѕeparate lists and forboth().  I doubt it
makes terribly much difference in the end, and I really don't have a strong
opinion either way, but it seemed like something that would inevitably come
up in this thread.  Is there any particular reason you chose to do it this
way?

> SplitIdentifierString() modifies its input, and it doesn't seem like a
> net win in readability if the API sometimes modifies its input
> (requiring a copy in the caller) and sometimes does not. I'm open to
> suggestion about a refactor here, but I didn't see a clean way to do
> so.

That's fine by me.

> +static inline uint32
> +nspcachekey_hash(SearchPathCacheKey *key)
> +{
> + const unsigned char *bytes = (const unsigned char *)key->searchPath;
> + int  blen  = 
> strlen(key->searchPath);
> +
> + return hash_bytes(bytes, blen) ^ hash_uint32(key->roleid);
> +}

Any reason not to use hash_combine() here?

> I changed it to move the hook so that it's called after retrieving from
> the cache. Most of the speedup is still there with no behavior change.
> It also means I had to move the deduplication to happen after
> retrieving from the cache, which doesn't seem great but in practice the
> search path list is not very long so I don't think it will make much
> difference. (Arguably, we can do the deduplication before caching, but
> I didn't see a big difference so I didn't want to split hairs on the
> semantics.)

I think you are right.  This adds some complexity, but I don't have
anything else to propose at the moment.

> - /* Now safe to assign to state variables. */
> - list_free(baseSearchPath);
> - baseSearchPath = newpath;
> - baseCreationNamespace = firstNS;
> - baseTempCreationPending = temp_missing;
>   }
>  
> + /* Now safe to assign to state variables. */
> + list_free(baseSearchPath);
> + baseSearchPath = finalPath;
> + baseCreationNamespace = firstNS;
> + baseTempCreationPending = temp_missing;

I'm not following why this logic was moved.

> New timings:
>
>   baseline:  4.2s
>   test query:
> without patch:  13.1s
> 0001:   11.7s
> 0001+0002:  10.5s
> 0001+0002+0003:  8.1s

Nice.  It makes sense that 0003 would provide the most benefit.

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com




Re: Faster "SET search_path"

2023-08-01 Thread Jeff Davis
On Sat, 2023-07-29 at 21:51 -0700, Nathan Bossart wrote:
> On Sat, Jul 29, 2023 at 08:59:01AM -0700, Jeff Davis wrote:
> > 0001: Transform the settings in proconfig into a List for faster
> > processing. This is simple and speeds up any proconfig setting.
> 
> This looks straightforward.  It might be worth combining the common
> parts
> of ProcessGUCArray() and TransformGUCArray() if there is a clean way
> to do
> so.

I changed the former to call the latter to share code. A few extra
allocations in the ProcessGUCArray() path, but it doesn't look like
that would matter.

> > 0002: Introduce CheckIdentifierString(), which is a faster version
> > of
> > SplitIdentifierString() that only validates, and can be used in
> > check_search_path().
> 
> This also looks straightforward.  And I'd make the same comment as
> above
> for SplitIdentifierString() and CheckIdentifierString().  Perhaps we
> could
> have folks set SplitIdentifierString's namelist argument to NULL to
> indicate that it only needs to validate.

SplitIdentifierString() modifies its input, and it doesn't seem like a
net win in readability if the API sometimes modifies its input
(requiring a copy in the caller) and sometimes does not. I'm open to
suggestion about a refactor here, but I didn't see a clean way to do
so.

> 
> > One behavior change in 0003 is that retrieving a cached OID list
> > doesn't call InvokeNamespaceSearchHook(). It would be easy enough
> > to
> > disable caching when a hook exists, but I didn't see a reason to
> > expect
> > that "SET search_path" must invoke that hook each time. Invoking it
> > when computing for the first time, or after a real invalidation,
> > seemed
> > fine to me. Feedback on that is welcome.
> 
> Could a previously cached list be used to circumvent a hook that was
> just
> reconfigured to ERROR for certain namespaces?  If something like that
> is
> possible, then we might need to disable the cache when there is a
> hook.

I changed it to move the hook so that it's called after retrieving from
the cache. Most of the speedup is still there with no behavior change.
It also means I had to move the deduplication to happen after
retrieving from the cache, which doesn't seem great but in practice the
search path list is not very long so I don't think it will make much
difference. (Arguably, we can do the deduplication before caching, but
I didn't see a big difference so I didn't want to split hairs on the
semantics.)

New timings:

  baseline:  4.2s
  test query:
without patch:  13.1s
0001:   11.7s
0001+0002:  10.5s
0001+0002+0003:  8.1s

New patches attached.

Regards,
Jeff Davis

From f0b826ffec2b21ea9eb595771ebc83d271f1e3a1 Mon Sep 17 00:00:00 2001
From: Jeff Davis 
Date: Thu, 27 Jul 2023 15:11:42 -0700
Subject: [PATCH v2 1/3] Transform proconfig for faster execution.

Store function config settings as a list of (name, value) pairs to
avoid the need to parse for each function invocation.
---
 src/backend/utils/fmgr/fmgr.c | 27 ---
 src/backend/utils/misc/guc.c  | 49 ---
 src/include/utils/guc.h   |  1 +
 3 files changed, 59 insertions(+), 18 deletions(-)

diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c
index 9208c31fe0..3b49684991 100644
--- a/src/backend/utils/fmgr/fmgr.c
+++ b/src/backend/utils/fmgr/fmgr.c
@@ -612,7 +612,7 @@ struct fmgr_security_definer_cache
 {
 	FmgrInfo	flinfo;			/* lookup info for target function */
 	Oid			userid;			/* userid to set, or InvalidOid */
-	ArrayType  *proconfig;		/* GUC values to set, or NULL */
+	List	   *configList;		/* GUC values to set, or NULL */
 	Datum		arg;			/* passthrough argument for plugin modules */
 };
 
@@ -634,6 +634,7 @@ fmgr_security_definer(PG_FUNCTION_ARGS)
 	FmgrInfo   *save_flinfo;
 	Oid			save_userid;
 	int			save_sec_context;
+	ListCell   *lc;
 	volatile int save_nestlevel;
 	PgStat_FunctionCallUsage fcusage;
 
@@ -666,8 +667,10 @@ fmgr_security_definer(PG_FUNCTION_ARGS)
 &isnull);
 		if (!isnull)
 		{
+			ArrayType *array;
 			oldcxt = MemoryContextSwitchTo(fcinfo->flinfo->fn_mcxt);
-			fcache->proconfig = DatumGetArrayTypePCopy(datum);
+			array = DatumGetArrayTypeP(datum);
+			fcache->configList = TransformGUCArray(array);
 			MemoryContextSwitchTo(oldcxt);
 		}
 
@@ -680,7 +683,7 @@ fmgr_security_definer(PG_FUNCTION_ARGS)
 
 	/* GetUserIdAndSecContext is cheap enough that no harm in a wasted call */
 	GetUserIdAndSecContext(&save_userid, &save_sec_context);
-	if (fcache->proconfig)		/* Need a new GUC nesting level */
+	if (fcache->configList != NIL)		/* Need a new GUC nesting level */
 		save_nestlevel = NewGUCNestLevel();
 	else
 		save_nestlevel = 0;		/* keep compiler quiet */
@@ -689,12 +692,18 @@ fmgr_security_definer(PG_FUNCTION_ARGS)
 		SetUserIdAndSecContext(fcache->userid,
 			   save_sec_context | SECURITY_LOCAL_USERID_CHANGE);
 
-	if (fcache->proconfig)
+	foreach(lc, fcache->configList)
 	{
-		Proce

Re: Faster "SET search_path"

2023-07-31 Thread Jeff Davis
On Sat, 2023-07-29 at 12:44 -0400, Isaac Morland wrote:
> Essentially, "just" observe efficiently (somehow) that no change is
> needed, and skip changing it?

I gave this a try and it speeds things up some more.

There might be a surprise factor with an optimization like that,
though. If someone becomes accustomed to the function running fast,
then changing the search_path in the caller could slow things down a
lot and it would be hard for the user to understand what happened.

Also, there are a few implementation complexities because I think we
need to still trigger the same invalidation that happens in
assign_search_path().

Regards,
Jeff Davis





Re: Faster "SET search_path"

2023-07-31 Thread Robert Haas
On Sat, Jul 29, 2023 at 11:59 AM Jeff Davis  wrote:
> Unfortunately, adding a "SET search_path" clause to functions slows
> them down. The attached patches close the performance gap
> substantially.

I haven't reviewed the code but I like the concept a lot. This is badly needed.

-- 
Robert Haas
EDB: http://www.enterprisedb.com




Re: Faster "SET search_path"

2023-07-29 Thread Nathan Bossart
On Sat, Jul 29, 2023 at 08:59:01AM -0700, Jeff Davis wrote:
> 0001: Transform the settings in proconfig into a List for faster
> processing. This is simple and speeds up any proconfig setting.

This looks straightforward.  It might be worth combining the common parts
of ProcessGUCArray() and TransformGUCArray() if there is a clean way to do
so.

> 0002: Introduce CheckIdentifierString(), which is a faster version of
> SplitIdentifierString() that only validates, and can be used in
> check_search_path().

This also looks straightforward.  And I'd make the same comment as above
for SplitIdentifierString() and CheckIdentifierString().  Perhaps we could
have folks set SplitIdentifierString's namelist argument to NULL to
indicate that it only needs to validate.

> 0003: Cache of previous search_path settings. The key is the raw
> namespace_search_path string and the role OID, and it caches the
> computed OID list. Changes to the search_path setting or the role can
> retrieve the cached OID list as long as nothing else invalidates the
> cache (changes to the temp schema or a syscache invalidation of
> pg_namespace or pg_role).

At a glance, this looks pretty reasonable, too.

> One behavior change in 0003 is that retrieving a cached OID list
> doesn't call InvokeNamespaceSearchHook(). It would be easy enough to
> disable caching when a hook exists, but I didn't see a reason to expect
> that "SET search_path" must invoke that hook each time. Invoking it
> when computing for the first time, or after a real invalidation, seemed
> fine to me. Feedback on that is welcome.

Could a previously cached list be used to circumvent a hook that was just
reconfigured to ERROR for certain namespaces?  If something like that is
possible, then we might need to disable the cache when there is a hook.

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com




Re: Faster "SET search_path"

2023-07-29 Thread Isaac Morland
On Sat, 29 Jul 2023 at 11:59, Jeff Davis  wrote:

Unfortunately, adding a "SET search_path" clause to functions slows
> them down. The attached patches close the performance gap
> substantially.
>
> Changes:
>
> 0001: Transform the settings in proconfig into a List for faster
> processing. This is simple and speeds up any proconfig setting.
>
> 0002: Introduce CheckIdentifierString(), which is a faster version of
> SplitIdentifierString() that only validates, and can be used in
> check_search_path().
>
> 0003: Cache of previous search_path settings. The key is the raw
> namespace_search_path string and the role OID, and it caches the
> computed OID list. Changes to the search_path setting or the role can
> retrieve the cached OID list as long as nothing else invalidates the
> cache (changes to the temp schema or a syscache invalidation of
> pg_namespace or pg_role).
>

I'm glad to see this work. Something related to consider, not sure if this
is helpful: can the case of the caller's search_path happening to be the
same as the SET search_path setting be optimized? Essentially, "just"
observe efficiently (somehow) that no change is needed, and skip changing
it? I ask because substantially all my functions are written using "SET
search_path FROM CURRENT", and then many of them call each other. As a
result, in my use I would say that the common case is a function being
called by another function, where both have the same search_path setting.
So ideally, the search_path would not be changed at all when entering and
exiting the callee.


Faster "SET search_path"

2023-07-29 Thread Jeff Davis
Improve performance of "SET search_path".

Motivation:

Creating functions with a "SET search_path" clause is safer and more
secure because the function behavior doesn't change based on the
caller's search_path setting.

Setting search_path in the function declaration is especially important
for SECURITY DEFINER functions[1], but even SECURITY INVOKER functions
can be executed more like SECURITY DEFINER in some contexts (e.g.
REINDEX executing an index function). Also, it's just error-prone to
depend on the caller's search_path unless there's a specific reason you
want to do that.

Unfortunately, adding a "SET search_path" clause to functions slows
them down. The attached patches close the performance gap
substantially.

Changes:

0001: Transform the settings in proconfig into a List for faster
processing. This is simple and speeds up any proconfig setting.

0002: Introduce CheckIdentifierString(), which is a faster version of
SplitIdentifierString() that only validates, and can be used in
check_search_path().

0003: Cache of previous search_path settings. The key is the raw
namespace_search_path string and the role OID, and it caches the
computed OID list. Changes to the search_path setting or the role can
retrieve the cached OID list as long as nothing else invalidates the
cache (changes to the temp schema or a syscache invalidation of
pg_namespace or pg_role).

One behavior change in 0003 is that retrieving a cached OID list
doesn't call InvokeNamespaceSearchHook(). It would be easy enough to
disable caching when a hook exists, but I didn't see a reason to expect
that "SET search_path" must invoke that hook each time. Invoking it
when computing for the first time, or after a real invalidation, seemed
fine to me. Feedback on that is welcome.

Test:

  CREATE SCHEMA a;
  CREATE SCHEMA b;
  CREATE TABLE big(i) AS SELECT generate_series(1,2000);
  VACUUM big; CHECKPOINT;
  CREATE FUNCTION inc(int) RETURNS INT
LANGUAGE plpgsql
AS $$ begin return $1+1; end; $$;
  CREATE FUNCTION inc_ab(int) RETURNS INT
LANGUAGE plpgsql SET search_path = a, b
AS $$ begin return $1+1; end; $$;

  -- baseline
  EXPLAIN ANALYZE SELECT inc(i) FROM big;

  -- test query
  EXPLAIN ANALYZE SELECT inc_ab(i) FROM big;

Results:

  baseline:  4.3s
  test query:
without patch:  14.7s
0001:   13.6s
0001,0002:  10.4s
0001,0002,0003:  8.6s

Timings were inconsistent for me so I took the middle of three runs.

It's a lot faster than without the patch. It's still 2X worse than not
specifying any search_path (baseline), but I think it brings it into
"usable" territory for more use cases.

Regards,
Jeff Davis

[1]
https://www.postgresql.org/docs/current/sql-createfunction.html#SQL-CREATEFUNCTION-SECURITY
From c05ee846d6d9099642d58933924a00d80262 Mon Sep 17 00:00:00 2001
From: Jeff Davis 
Date: Thu, 27 Jul 2023 15:11:42 -0700
Subject: [PATCH v1 1/3] Transform proconfig for faster execution.

Store function config settings as a list of (name, value) pairs to
avoid the need to parse for each function invocation.
---
 src/backend/utils/fmgr/fmgr.c | 27 ++--
 src/backend/utils/misc/guc.c  | 58 +++
 src/include/utils/guc.h   |  1 +
 3 files changed, 77 insertions(+), 9 deletions(-)

diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c
index 9208c31fe0..3b49684991 100644
--- a/src/backend/utils/fmgr/fmgr.c
+++ b/src/backend/utils/fmgr/fmgr.c
@@ -612,7 +612,7 @@ struct fmgr_security_definer_cache
 {
 	FmgrInfo	flinfo;			/* lookup info for target function */
 	Oid			userid;			/* userid to set, or InvalidOid */
-	ArrayType  *proconfig;		/* GUC values to set, or NULL */
+	List	   *configList;		/* GUC values to set, or NULL */
 	Datum		arg;			/* passthrough argument for plugin modules */
 };
 
@@ -634,6 +634,7 @@ fmgr_security_definer(PG_FUNCTION_ARGS)
 	FmgrInfo   *save_flinfo;
 	Oid			save_userid;
 	int			save_sec_context;
+	ListCell   *lc;
 	volatile int save_nestlevel;
 	PgStat_FunctionCallUsage fcusage;
 
@@ -666,8 +667,10 @@ fmgr_security_definer(PG_FUNCTION_ARGS)
 &isnull);
 		if (!isnull)
 		{
+			ArrayType *array;
 			oldcxt = MemoryContextSwitchTo(fcinfo->flinfo->fn_mcxt);
-			fcache->proconfig = DatumGetArrayTypePCopy(datum);
+			array = DatumGetArrayTypeP(datum);
+			fcache->configList = TransformGUCArray(array);
 			MemoryContextSwitchTo(oldcxt);
 		}
 
@@ -680,7 +683,7 @@ fmgr_security_definer(PG_FUNCTION_ARGS)
 
 	/* GetUserIdAndSecContext is cheap enough that no harm in a wasted call */
 	GetUserIdAndSecContext(&save_userid, &save_sec_context);
-	if (fcache->proconfig)		/* Need a new GUC nesting level */
+	if (fcache->configList != NIL)		/* Need a new GUC nesting level */
 		save_nestlevel = NewGUCNestLevel();
 	else
 		save_nestlevel = 0;		/* keep compiler quiet */
@@ -689,12 +692,18 @@ fmgr_security_definer(PG_FUNCTION_ARGS)
 		SetUserIdAndSecContext(fcache->userid,
 			   save_sec_context |