From 0d6171e0ad039af624be31f44eb60c3788e275b7 Mon Sep 17 00:00:00 2001
From: Aleksander Alekseev <aleksander@timescale.com>
Date: Tue, 5 Mar 2024 14:13:40 +0300
Subject: [PATCH v4] Improve performance of type cache cleanup

This patch significantly improves the performance in cases when user creates
several thousands of temporary tables.

Firstly, the patch adds a local map between a relation and its type as it was
previously suggested in the comments for TypeCacheRelCallback(). Unfortunately,
syscache can't be used here because TypeCacheRelCallback() can be called
outside of a transaction.

Secondly, it modifies TypeCacheTypCallback() so that it finds an entry in the
hash table by O(1). Linear scan was used previously because the hash algorithm
differed from the one used by syscache. Additionally dynahash had no interface
for finding entries with a given hash value.

Author: Teodor Sigaev
Reviewed-by: Aleksander Alekseev
Discussion: https://postgr.es/m/5812a6e5-68ae-4d84-9d85-b443176966a1@sigaev.ru
---
 src/backend/utils/cache/typcache.c | 211 +++++++++++++++++++++--------
 src/backend/utils/hash/dynahash.c  | 106 +++++++++------
 src/include/utils/hsearch.h        |   4 +
 3 files changed, 221 insertions(+), 100 deletions(-)

diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c
index 0d4d0b0a15..9145088f44 100644
--- a/src/backend/utils/cache/typcache.c
+++ b/src/backend/utils/cache/typcache.c
@@ -77,6 +77,15 @@
 /* The main type cache hashtable searched by lookup_type_cache */
 static HTAB *TypeCacheHash = NULL;
 
+/* The map from relation's oid to its type oid */
+typedef struct mapRelTypeEntry
+{
+	Oid	typrelid;
+	Oid type_id;
+} mapRelTypeEntry;
+
+static HTAB *mapRelType = NULL;
+
 /* List of type cache entries for domain types */
 static TypeCacheEntry *firstDomainTypeEntry = NULL;
 
@@ -330,6 +339,15 @@ static TupleDesc find_or_make_matching_shared_tupledesc(TupleDesc tupdesc);
 static dsa_pointer share_tupledesc(dsa_area *area, TupleDesc tupdesc,
 								   uint32 typmod);
 
+/*
+ * Hash function compatible with one-arg system cache hash function.
+ */
+static uint32
+type_cache_syshash(const void *key, Size keysize)
+{
+	Assert(keysize == sizeof(Oid));
+	return GetSysCacheHashValue1(TYPEOID, ObjectIdGetDatum(*(const Oid*)key));
+}
 
 /*
  * lookup_type_cache
@@ -355,8 +373,20 @@ lookup_type_cache(Oid type_id, int flags)
 
 		ctl.keysize = sizeof(Oid);
 		ctl.entrysize = sizeof(TypeCacheEntry);
+		/*
+		 * TypeEntry takes hash value from the system cache. For TypeCacheHash
+		 * we use the same hash in order to speedup search by hash value. This
+		 * is used by hash_seq_init_with_hash_value().
+		 */
+		ctl.hash = type_cache_syshash;
+
 		TypeCacheHash = hash_create("Type information cache", 64,
-									&ctl, HASH_ELEM | HASH_BLOBS);
+									&ctl, HASH_ELEM | HASH_FUNCTION);
+
+		ctl.keysize = sizeof(Oid);
+		ctl.entrysize = sizeof(mapRelTypeEntry);
+		mapRelType = hash_create("Map reloid to typeoid", 64,
+								 &ctl, HASH_ELEM | HASH_BLOBS);
 
 		/* Also set up callbacks for SI invalidations */
 		CacheRegisterRelcacheCallback(TypeCacheRelCallback, (Datum) 0);
@@ -407,8 +437,7 @@ lookup_type_cache(Oid type_id, int flags)
 
 		/* These fields can never change, by definition */
 		typentry->type_id = type_id;
-		typentry->type_id_hash = GetSysCacheHashValue1(TYPEOID,
-													   ObjectIdGetDatum(type_id));
+		typentry->type_id_hash = get_hash_value(TypeCacheHash, &type_id);
 
 		/* Keep this part in sync with the code below */
 		typentry->typlen = typtup->typlen;
@@ -470,6 +499,24 @@ lookup_type_cache(Oid type_id, int flags)
 		ReleaseSysCache(tp);
 	}
 
+	/*
+	 * Add a record to the relation->type map. We don't bother if type will
+	 * become disconnected from the relation. Although it seems to be impossible,
+	 * storing old data is safe in any case. In the worst case scenario we will
+	 * just do an extra cleanup of a cache entry.
+	 */
+	if (OidIsValid(typentry->typrelid) && typentry->typtype == TYPTYPE_COMPOSITE)
+	{
+		mapRelTypeEntry *relentry;
+
+		relentry = (mapRelTypeEntry*) hash_search(mapRelType,
+												  &typentry->typrelid,
+												  HASH_ENTER, NULL);
+
+		relentry->typrelid = typentry->typrelid;
+		relentry->type_id = typentry->type_id;
+	}
+
 	/*
 	 * Look up opclasses if we haven't already and any dependent info is
 	 * requested.
@@ -2264,6 +2311,33 @@ SharedRecordTypmodRegistryAttach(SharedRecordTypmodRegistry *registry)
 	CurrentSession->shared_typmod_table = typmod_table;
 }
 
+static void
+invalidateCompositeTypeCacheEntry(TypeCacheEntry *typentry)
+{
+	/* Delete tupdesc if we have it */
+	if (typentry->tupDesc != NULL)
+	{
+		/*
+		 * Release our refcount and free the tupdesc if none remain. We can't
+		 * use DecrTupleDescRefCount here because this reference is not logged
+		 * by the current resource owner.
+		 */
+		Assert(typentry->tupDesc->tdrefcount > 0);
+		if (--typentry->tupDesc->tdrefcount == 0)
+			FreeTupleDesc(typentry->tupDesc);
+		typentry->tupDesc = NULL;
+
+		/*
+		 * Also clear tupDesc_identifier, so that anyone watching
+		 * it will realize that the tupdesc has changed.
+		 */
+		typentry->tupDesc_identifier = 0;
+	}
+
+	/* Reset equality/comparison/hashing validity information */
+	typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS;
+}
+
 /*
  * TypeCacheRelCallback
  *		Relcache inval callback function
@@ -2273,63 +2347,46 @@ SharedRecordTypmodRegistryAttach(SharedRecordTypmodRegistry *registry)
  * whatever info we have cached about the composite type's comparability.
  *
  * This is called when a relcache invalidation event occurs for the given
- * relid.  We must scan the whole typcache hash since we don't know the
- * type OID corresponding to the relid.  We could do a direct search if this
- * were a syscache-flush callback on pg_type, but then we would need all
- * ALTER-TABLE-like commands that could modify a rowtype to issue syscache
- * invals against the rel's pg_type OID.  The extra SI signaling could very
- * well cost more than we'd save, since in most usages there are not very
- * many entries in a backend's typcache.  The risk of bugs-of-omission seems
- * high, too.
- *
- * Another possibility, with only localized impact, is to maintain a second
- * hashtable that indexes composite-type typcache entries by their typrelid.
- * But it's still not clear it's worth the trouble.
+ * relid.  We can't use syscache to find a type corresponding to the given
+ * relation because the code can be called outside of transaction. Thus we use
+ * a dedicated relid->type map, mapRelType.
  */
 static void
 TypeCacheRelCallback(Datum arg, Oid relid)
 {
-	HASH_SEQ_STATUS status;
 	TypeCacheEntry *typentry;
 
-	/* TypeCacheHash must exist, else this callback wouldn't be registered */
-	hash_seq_init(&status, TypeCacheHash);
-	while ((typentry = (TypeCacheEntry *) hash_seq_search(&status)) != NULL)
+	/*
+	 * mapRelType and TypeCacheHash should exist, otherwise this callback
+	 * wouldn't be registered
+	 */
+
+	if (OidIsValid(relid))
 	{
-		if (typentry->typtype == TYPTYPE_COMPOSITE)
+		mapRelTypeEntry *relentry;
+
+		relentry = (mapRelTypeEntry *) hash_search(mapRelType,
+												  &relid,
+												  HASH_FIND, NULL);
+
+		if (relentry != NULL)
 		{
-			/* Skip if no match, unless we're zapping all composite types */
-			if (relid != typentry->typrelid && relid != InvalidOid)
-				continue;
+			typentry = (TypeCacheEntry *) hash_search(TypeCacheHash,
+													  &relentry->type_id,
+													  HASH_FIND, NULL);
 
-			/* Delete tupdesc if we have it */
-			if (typentry->tupDesc != NULL)
+			if (typentry != NULL)
 			{
-				/*
-				 * Release our refcount, and free the tupdesc if none remain.
-				 * (Can't use DecrTupleDescRefCount because this reference is
-				 * not logged in current resource owner.)
-				 */
-				Assert(typentry->tupDesc->tdrefcount > 0);
-				if (--typentry->tupDesc->tdrefcount == 0)
-					FreeTupleDesc(typentry->tupDesc);
-				typentry->tupDesc = NULL;
+				Assert(typentry->typtype == TYPTYPE_COMPOSITE);
+				Assert(relid == typentry->typrelid);
 
-				/*
-				 * Also clear tupDesc_identifier, so that anything watching
-				 * that will realize that the tupdesc has possibly changed.
-				 * (Alternatively, we could specify that to detect possible
-				 * tupdesc change, one must check for tupDesc != NULL as well
-				 * as tupDesc_identifier being the same as what was previously
-				 * seen.  That seems error-prone.)
-				 */
-				typentry->tupDesc_identifier = 0;
+				invalidateCompositeTypeCacheEntry(typentry);
 			}
-
-			/* Reset equality/comparison/hashing validity information */
-			typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS;
 		}
-		else if (typentry->typtype == TYPTYPE_DOMAIN)
+
+		for (typentry = firstDomainTypeEntry;
+			 typentry != NULL;
+			 typentry = typentry->nextDomain)
 		{
 			/*
 			 * If it's domain over composite, reset flags.  (We don't bother
@@ -2341,6 +2398,35 @@ TypeCacheRelCallback(Datum arg, Oid relid)
 				typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS;
 		}
 	}
+	else
+	{
+		HASH_SEQ_STATUS status;
+
+		/*
+		 * Relid = 0, so we need to reset all composite types in cache. Also, we
+		 * should reset flags for domain types, and we loop over all entries
+		 * in hash, so, do it in a single scan.
+		 */
+		hash_seq_init(&status, TypeCacheHash);
+		while ((typentry = (TypeCacheEntry *) hash_seq_search(&status)) != NULL)
+		{
+			if (typentry->typtype == TYPTYPE_COMPOSITE)
+			{
+				invalidateCompositeTypeCacheEntry(typentry);
+			}
+			else if (typentry->typtype == TYPTYPE_DOMAIN)
+			{
+				/*
+				 * If it's domain over composite, reset flags.  (We don't bother
+				 * trying to determine whether the specific base type needs a
+				 * reset.)  Note that if we haven't determined whether the base
+				 * type is composite, we don't need to reset anything.
+				 */
+				if (typentry->flags & TCFLAGS_DOMAIN_BASE_IS_COMPOSITE)
+					typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS;
+			}
+		}
+	}
 }
 
 /*
@@ -2358,20 +2444,27 @@ TypeCacheTypCallback(Datum arg, int cacheid, uint32 hashvalue)
 	TypeCacheEntry *typentry;
 
 	/* TypeCacheHash must exist, else this callback wouldn't be registered */
-	hash_seq_init(&status, TypeCacheHash);
+
+	/*
+	 * By convection, zero hash value is passed to the callback as a sign
+	 * that it's time to invalidate the cache. See sinval.c, inval.c and
+	 * InvalidateSystemCachesExtended().
+	 */
+	if (hashvalue == 0)
+		hash_seq_init(&status, TypeCacheHash);
+	else
+		hash_seq_init_with_hash_value(&status, TypeCacheHash, hashvalue);
+
 	while ((typentry = (TypeCacheEntry *) hash_seq_search(&status)) != NULL)
 	{
-		/* Is this the targeted type row (or it's a total cache flush)? */
-		if (hashvalue == 0 || typentry->type_id_hash == hashvalue)
-		{
-			/*
-			 * Mark the data obtained directly from pg_type as invalid.  Also,
-			 * if it's a domain, typnotnull might've changed, so we'll need to
-			 * recalculate its constraints.
-			 */
-			typentry->flags &= ~(TCFLAGS_HAVE_PG_TYPE_DATA |
-								 TCFLAGS_CHECKED_DOMAIN_CONSTRAINTS);
-		}
+		Assert(hashvalue == 0 || typentry->type_id_hash == hashvalue);
+		/*
+		 * Mark the data obtained directly from pg_type as invalid.  Also,
+		 * if it's a domain, typnotnull might've changed, so we'll need to
+		 * recalculate its constraints.
+		 */
+		typentry->flags &= ~(TCFLAGS_HAVE_PG_TYPE_DATA |
+							 TCFLAGS_CHECKED_DOMAIN_CONSTRAINTS);
 	}
 }
 
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index a4152080b5..3fc8b7ff5e 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -962,6 +962,33 @@ hash_search(HTAB *hashp,
 									   foundPtr);
 }
 
+/*
+ * Do initial lookup of a bucket by the given hash value.
+ */
+static HASHBUCKET*
+hash_initial_lookup(HTAB *hashp, uint32 hashvalue, uint32 *bucket)
+{
+	HASHHDR    *hctl = hashp->hctl;
+	HASHSEGMENT segp;
+	long		segment_num;
+	long		segment_ndx;
+
+	/*
+	 * Do the initial lookup
+	 */
+	*bucket = calc_bucket(hctl, hashvalue);
+
+	segment_num = *bucket >> hashp->sshift;
+	segment_ndx = MOD(*bucket, hashp->ssize);
+
+	segp = hashp->dir[segment_num];
+
+	if (segp == NULL)
+		hash_corrupted(hashp);
+
+	return &segp[segment_ndx];
+}
+
 void *
 hash_search_with_hash_value(HTAB *hashp,
 							const void *keyPtr,
@@ -973,9 +1000,6 @@ hash_search_with_hash_value(HTAB *hashp,
 	int			freelist_idx = FREELIST_IDX(hctl, hashvalue);
 	Size		keysize;
 	uint32		bucket;
-	long		segment_num;
-	long		segment_ndx;
-	HASHSEGMENT segp;
 	HASHBUCKET	currBucket;
 	HASHBUCKET *prevBucketPtr;
 	HashCompareFunc match;
@@ -1008,17 +1032,7 @@ hash_search_with_hash_value(HTAB *hashp,
 	/*
 	 * Do the initial lookup
 	 */
-	bucket = calc_bucket(hctl, hashvalue);
-
-	segment_num = bucket >> hashp->sshift;
-	segment_ndx = MOD(bucket, hashp->ssize);
-
-	segp = hashp->dir[segment_num];
-
-	if (segp == NULL)
-		hash_corrupted(hashp);
-
-	prevBucketPtr = &segp[segment_ndx];
+	prevBucketPtr = hash_initial_lookup(hashp, hashvalue, &bucket);
 	currBucket = *prevBucketPtr;
 
 	/*
@@ -1159,14 +1173,10 @@ hash_update_hash_key(HTAB *hashp,
 					 const void *newKeyPtr)
 {
 	HASHELEMENT *existingElement = ELEMENT_FROM_KEY(existingEntry);
-	HASHHDR    *hctl = hashp->hctl;
 	uint32		newhashvalue;
 	Size		keysize;
 	uint32		bucket;
 	uint32		newbucket;
-	long		segment_num;
-	long		segment_ndx;
-	HASHSEGMENT segp;
 	HASHBUCKET	currBucket;
 	HASHBUCKET *prevBucketPtr;
 	HASHBUCKET *oldPrevPtr;
@@ -1187,17 +1197,7 @@ hash_update_hash_key(HTAB *hashp,
 	 * this to be able to unlink it from its hash chain, but as a side benefit
 	 * we can verify the validity of the passed existingEntry pointer.
 	 */
-	bucket = calc_bucket(hctl, existingElement->hashvalue);
-
-	segment_num = bucket >> hashp->sshift;
-	segment_ndx = MOD(bucket, hashp->ssize);
-
-	segp = hashp->dir[segment_num];
-
-	if (segp == NULL)
-		hash_corrupted(hashp);
-
-	prevBucketPtr = &segp[segment_ndx];
+	prevBucketPtr = hash_initial_lookup(hashp, existingElement->hashvalue, &bucket);
 	currBucket = *prevBucketPtr;
 
 	while (currBucket != NULL)
@@ -1219,18 +1219,7 @@ hash_update_hash_key(HTAB *hashp,
 	 * chain we want to put the entry into.
 	 */
 	newhashvalue = hashp->hash(newKeyPtr, hashp->keysize);
-
-	newbucket = calc_bucket(hctl, newhashvalue);
-
-	segment_num = newbucket >> hashp->sshift;
-	segment_ndx = MOD(newbucket, hashp->ssize);
-
-	segp = hashp->dir[segment_num];
-
-	if (segp == NULL)
-		hash_corrupted(hashp);
-
-	prevBucketPtr = &segp[segment_ndx];
+	prevBucketPtr = hash_initial_lookup(hashp, newhashvalue, &newbucket);
 	currBucket = *prevBucketPtr;
 
 	/*
@@ -1423,10 +1412,27 @@ hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
 	status->hashp = hashp;
 	status->curBucket = 0;
 	status->curEntry = NULL;
+	status->hasHashvalue = false;
 	if (!hashp->frozen)
 		register_seq_scan(hashp);
 }
 
+/*
+ * Same as above but scan by the given hash value.
+ * See also hash_seq_search().
+ */
+void
+hash_seq_init_with_hash_value(HASH_SEQ_STATUS *status, HTAB *hashp,
+							  uint32 hashvalue)
+{
+	hash_seq_init(status, hashp);
+
+	status->hasHashvalue = true;
+	status->hashvalue = hashvalue;
+
+	status->curEntry = *hash_initial_lookup(hashp, hashvalue, &status->curBucket);
+}
+
 void *
 hash_seq_search(HASH_SEQ_STATUS *status)
 {
@@ -1440,6 +1446,24 @@ hash_seq_search(HASH_SEQ_STATUS *status)
 	uint32		curBucket;
 	HASHELEMENT *curElem;
 
+	if (status->hasHashvalue)
+	{
+		/*
+		 * Scan entries only in the current bucket because only this bucket can
+		 * contain entries with the given hash value.
+		 */
+		while ((curElem = status->curEntry) != NULL)
+		{
+			status->curEntry = curElem->link;
+			if (status->hashvalue != curElem->hashvalue)
+				continue;
+			return (void *) ELEMENTKEY(curElem);
+		}
+
+		hash_seq_term(status);
+		return NULL;
+	}
+
 	if ((curElem = status->curEntry) != NULL)
 	{
 		/* Continuing scan of curBucket... */
diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h
index da26941f6d..c99d74625f 100644
--- a/src/include/utils/hsearch.h
+++ b/src/include/utils/hsearch.h
@@ -122,6 +122,8 @@ typedef struct
 	HTAB	   *hashp;
 	uint32		curBucket;		/* index of current bucket */
 	HASHELEMENT *curEntry;		/* current entry in bucket */
+	bool		hasHashvalue;	/* true if hashvalue was provided */
+	uint32		hashvalue;		/* hashvalue to start seqscan over hash */
 } HASH_SEQ_STATUS;
 
 /*
@@ -141,6 +143,8 @@ extern bool hash_update_hash_key(HTAB *hashp, void *existingEntry,
 								 const void *newKeyPtr);
 extern long hash_get_num_entries(HTAB *hashp);
 extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp);
+extern void hash_seq_init_with_hash_value(HASH_SEQ_STATUS *status, HTAB *hashp,
+										  uint32 hashvalue);
 extern void *hash_seq_search(HASH_SEQ_STATUS *status);
 extern void hash_seq_term(HASH_SEQ_STATUS *status);
 extern void hash_freeze(HTAB *hashp);
-- 
2.44.0

