Hi, Andres

Thanks a lot for the review!

> Why are we keeping that list / the "batch" allocation pattern? It
> doesn't actually seem useful to me after these changes.  Given that we
> don't shrink hash-tables, we could just use the hash-table memory for
> this, no?

I don't think we can do that. According to comments:

```
 * NOTE: once allocated, TabStatusArray structures are never moved or deleted
 [...]
 * This allows relcache pgstat_info pointers to be treated as long-lived data,
 * avoiding repeated searches in pgstat_initstats() when a relation is
 * repeatedly opened during a transaction.
```

It is my understanding that dynahash can't give same guarantees.

> 'faster ... lookup' doesn't strike me as a useful comment, because it's
> only useful in relation of the current code to the new code.

Agree, fixed to 'O(1) ... lookup'.

> It's not really freed...

Right, it's actually zeroed. Fixed.

> Arguably it'd be better to HASH_ENTER directly here, instead of doing
> two lookups.

Good point. Fixed.

> Think it'd be better to move the hash creation into its own function.

Agree, fixed.

On Mon, Mar 06, 2017 at 12:01:50PM -0800, Andres Freund wrote:
> Hi,
> 
> This issue has bothered me in non-partitioned use-cases recently, so
> thanks for taking it up.
> 
> 
> On 2017-03-06 18:22:17 +0300, Aleksander Alekseev wrote:
> > diff --git a/src/backend/postmaster/pgstat.c 
> > b/src/backend/postmaster/pgstat.c
> > index 2fb9a8bf58..fa906e7930 100644
> > --- a/src/backend/postmaster/pgstat.c
> > +++ b/src/backend/postmaster/pgstat.c
> > @@ -160,6 +160,16 @@ typedef struct TabStatusArray
> >  
> >  static TabStatusArray *pgStatTabList = NULL;
> 
> Why are we keeping that list / the "batch" allocation pattern? It
> doesn't actually seem useful to me after these changes.  Given that we
> don't shrink hash-tables, we could just use the hash-table memory for
> this, no?
> 
> I think a separate list that only contains things that are "active" in
> the current transaction makes sense, because the current logic requires
> iterating over a potentially very large array at transaction commit.
> 
> 
> > +/* pgStatTabHash entry */
> > +typedef struct TabStatHashEntry
> > +{
> > +   Oid t_id;
> > +   PgStat_TableStatus* tsa_entry;
> > +} TabStatHashEntry;
> > +
> > +/* Hash table for faster t_id -> tsa_entry lookup */
> > +static HTAB *pgStatTabHash = NULL;
> > +
> 
> 'faster ... lookup' doesn't strike me as a useful comment, because it's
> only useful in relation of the current code to the new code.
> 
> 
> 
> >  /*
> >   * Backends store per-function info that's waiting to be sent to the 
> > collector
> >   * in this hash table (indexed by function OID).
> > @@ -815,7 +825,13 @@ pgstat_report_stat(bool force)
> >                             pgstat_send_tabstat(this_msg);
> >                             this_msg->m_nentries = 0;
> >                     }
> > +
> > +                   /*
> > +                    * Entry will be freed soon so we need to remove it 
> > from the lookup table.
> > +                    */
> > +                   hash_search(pgStatTabHash, &entry->t_id, HASH_REMOVE, 
> > NULL);
> >             }
> 
> It's not really freed...
> 
> 
> >  static PgStat_TableStatus *
> >  get_tabstat_entry(Oid rel_id, bool isshared)
> >  {
> > +   TabStatHashEntry* hash_entry;
> >     PgStat_TableStatus *entry;
> >     TabStatusArray *tsa;
> > -   TabStatusArray *prev_tsa;
> > -   int                     i;
> > +
> > +   /* Try to find an entry */
> > +   entry = find_tabstat_entry(rel_id);
> > +   if(entry != NULL)
> > +           return entry;
> 
> Arguably it'd be better to HASH_ENTER directly here, instead of doing
> two lookups.
> 
> 
> >     /*
> > -    * Search the already-used tabstat slots for this relation.
> > +    * Entry doesn't exist - creating one.
> > +    * First make sure there is a free space in a last element of 
> > pgStatTabList.
> >      */
> > -   prev_tsa = NULL;
> > -   for (tsa = pgStatTabList; tsa != NULL; prev_tsa = tsa, tsa = 
> > tsa->tsa_next)
> > +   if (!pgStatTabList)
> >     {
> > -           for (i = 0; i < tsa->tsa_used; i++)
> > -           {
> > -                   entry = &tsa->tsa_entries[i];
> > -                   if (entry->t_id == rel_id)
> > -                           return entry;
> > -           }
> > +           HASHCTL     ctl;
> >  
> > -           if (tsa->tsa_used < TABSTAT_QUANTUM)
> > +           Assert(!pgStatTabHash);
> > +
> > +           memset(&ctl, 0, sizeof(ctl));
> > +           ctl.keysize = sizeof(Oid);
> > +           ctl.entrysize = sizeof(TabStatHashEntry);
> > +           ctl.hcxt = TopMemoryContext;
> > +
> > +           pgStatTabHash = hash_create("pgstat t_id to tsa_entry lookup 
> > table",
> > +                   TABSTAT_QUANTUM, &ctl, HASH_ELEM | HASH_BLOBS | 
> > HASH_CONTEXT);
> 
> Think it'd be better to move the hash creation into its own function.
> 
> 
> - Andres

-- 
Best regards,
Aleksander Alekseev
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 7cacb1e9b2..08aaf1dac1 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -160,6 +160,16 @@ typedef struct TabStatusArray
 
 static TabStatusArray *pgStatTabList = NULL;
 
+/* pgStatTabHash entry */
+typedef struct TabStatHashEntry
+{
+	Oid t_id;
+	PgStat_TableStatus* tsa_entry;
+} TabStatHashEntry;
+
+/* Hash table for O(1) t_id -> tsa_entry lookup */
+static HTAB *pgStatTabHash = NULL;
+
 /*
  * Backends store per-function info that's waiting to be sent to the collector
  * in this hash table (indexed by function OID).
@@ -815,7 +825,13 @@ pgstat_report_stat(bool force)
 				pgstat_send_tabstat(this_msg);
 				this_msg->m_nentries = 0;
 			}
+
+			/*
+			 * Entry will be zeroed soon so we need to remove it from the lookup table.
+			 */
+			hash_search(pgStatTabHash, &entry->t_id, HASH_REMOVE, NULL);
 		}
+
 		/* zero out TableStatus structs after use */
 		MemSet(tsa->tsa_entries, 0,
 			   tsa->tsa_used * sizeof(PgStat_TableStatus));
@@ -1667,59 +1683,77 @@ pgstat_initstats(Relation rel)
 }
 
 /*
+ * Make sure pgStatTabList and pgStatTabHash are initialized.
+ */
+static void
+make_sure_stat_tab_initialized()
+{
+	HASHCTL     ctl;
+
+	if (pgStatTabList)
+		return;
+
+	Assert(!pgStatTabHash);
+
+	memset(&ctl, 0, sizeof(ctl));
+	ctl.keysize = sizeof(Oid);
+	ctl.entrysize = sizeof(TabStatHashEntry);
+	ctl.hcxt = TopMemoryContext;
+
+	pgStatTabHash = hash_create("pgstat t_id to tsa_entry lookup table",
+		TABSTAT_QUANTUM, &ctl, HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+
+	pgStatTabList = (TabStatusArray *) MemoryContextAllocZero(TopMemoryContext,
+												sizeof(TabStatusArray));
+}
+
+/*
  * get_tabstat_entry - find or create a PgStat_TableStatus entry for rel
  */
 static PgStat_TableStatus *
 get_tabstat_entry(Oid rel_id, bool isshared)
 {
+	TabStatHashEntry* hash_entry;
 	PgStat_TableStatus *entry;
 	TabStatusArray *tsa;
-	TabStatusArray *prev_tsa;
-	int			i;
+	bool found;
+
+	make_sure_stat_tab_initialized();
+
+	/*
+	 * Find an entry or create a new one.
+	 */
+	hash_entry = hash_search(pgStatTabHash, &rel_id, HASH_ENTER, &found);
+	if(found)
+		return hash_entry->tsa_entry;
 
 	/*
-	 * Search the already-used tabstat slots for this relation.
+	 * `hash_entry` was just created and now we have to fill it.
+	 * First make sure there is a free space in a last element of pgStatTabList.
 	 */
-	prev_tsa = NULL;
-	for (tsa = pgStatTabList; tsa != NULL; prev_tsa = tsa, tsa = tsa->tsa_next)
+	tsa = pgStatTabList;
+	while(tsa->tsa_used == TABSTAT_QUANTUM)
 	{
-		for (i = 0; i < tsa->tsa_used; i++)
+		if(tsa->tsa_next == NULL)
 		{
-			entry = &tsa->tsa_entries[i];
-			if (entry->t_id == rel_id)
-				return entry;
+			tsa->tsa_next = (TabStatusArray *) MemoryContextAllocZero(TopMemoryContext,
+														sizeof(TabStatusArray));
 		}
 
-		if (tsa->tsa_used < TABSTAT_QUANTUM)
-		{
-			/*
-			 * It must not be present, but we found a free slot instead. Fine,
-			 * let's use this one.  We assume the entry was already zeroed,
-			 * either at creation or after last use.
-			 */
-			entry = &tsa->tsa_entries[tsa->tsa_used++];
-			entry->t_id = rel_id;
-			entry->t_shared = isshared;
-			return entry;
-		}
+		tsa = tsa->tsa_next;
 	}
 
 	/*
-	 * We ran out of tabstat slots, so allocate more.  Be sure they're zeroed.
-	 */
-	tsa = (TabStatusArray *) MemoryContextAllocZero(TopMemoryContext,
-													sizeof(TabStatusArray));
-	if (prev_tsa)
-		prev_tsa->tsa_next = tsa;
-	else
-		pgStatTabList = tsa;
-
-	/*
-	 * Use the first entry of the new TabStatusArray.
+	 * Add an entry.
 	 */
 	entry = &tsa->tsa_entries[tsa->tsa_used++];
 	entry->t_id = rel_id;
 	entry->t_shared = isshared;
+
+	/*
+	 * Add a corespinding entry to pgStatTabHash.
+	 */
+	hash_entry->tsa_entry = entry;
 	return entry;
 }
 
@@ -1731,22 +1765,19 @@ get_tabstat_entry(Oid rel_id, bool isshared)
 PgStat_TableStatus *
 find_tabstat_entry(Oid rel_id)
 {
-	PgStat_TableStatus *entry;
-	TabStatusArray *tsa;
-	int			i;
+	TabStatHashEntry* hash_entry;
 
-	for (tsa = pgStatTabList; tsa != NULL; tsa = tsa->tsa_next)
-	{
-		for (i = 0; i < tsa->tsa_used; i++)
-		{
-			entry = &tsa->tsa_entries[i];
-			if (entry->t_id == rel_id)
-				return entry;
-		}
-	}
+	/*
+	 * There are no entries at all.
+	 */
+	if(!pgStatTabHash)
+		return NULL;
 
-	/* Not present */
-	return NULL;
+	hash_entry = hash_search(pgStatTabHash, &rel_id, HASH_FIND, NULL);
+	if(!hash_entry)
+		return NULL;
+
+	return hash_entry->tsa_entry;
 }
 
 /*

Attachment: signature.asc
Description: PGP signature

Reply via email to