Hi, Andres Thanks a lot for the review!
> Why are we keeping that list / the "batch" allocation pattern? It > doesn't actually seem useful to me after these changes. Given that we > don't shrink hash-tables, we could just use the hash-table memory for > this, no? I don't think we can do that. According to comments: ``` * NOTE: once allocated, TabStatusArray structures are never moved or deleted [...] * This allows relcache pgstat_info pointers to be treated as long-lived data, * avoiding repeated searches in pgstat_initstats() when a relation is * repeatedly opened during a transaction. ``` It is my understanding that dynahash can't give same guarantees. > 'faster ... lookup' doesn't strike me as a useful comment, because it's > only useful in relation of the current code to the new code. Agree, fixed to 'O(1) ... lookup'. > It's not really freed... Right, it's actually zeroed. Fixed. > Arguably it'd be better to HASH_ENTER directly here, instead of doing > two lookups. Good point. Fixed. > Think it'd be better to move the hash creation into its own function. Agree, fixed. On Mon, Mar 06, 2017 at 12:01:50PM -0800, Andres Freund wrote: > Hi, > > This issue has bothered me in non-partitioned use-cases recently, so > thanks for taking it up. > > > On 2017-03-06 18:22:17 +0300, Aleksander Alekseev wrote: > > diff --git a/src/backend/postmaster/pgstat.c > > b/src/backend/postmaster/pgstat.c > > index 2fb9a8bf58..fa906e7930 100644 > > --- a/src/backend/postmaster/pgstat.c > > +++ b/src/backend/postmaster/pgstat.c > > @@ -160,6 +160,16 @@ typedef struct TabStatusArray > > > > static TabStatusArray *pgStatTabList = NULL; > > Why are we keeping that list / the "batch" allocation pattern? It > doesn't actually seem useful to me after these changes. Given that we > don't shrink hash-tables, we could just use the hash-table memory for > this, no? > > I think a separate list that only contains things that are "active" in > the current transaction makes sense, because the current logic requires > iterating over a potentially very large array at transaction commit. > > > > +/* pgStatTabHash entry */ > > +typedef struct TabStatHashEntry > > +{ > > + Oid t_id; > > + PgStat_TableStatus* tsa_entry; > > +} TabStatHashEntry; > > + > > +/* Hash table for faster t_id -> tsa_entry lookup */ > > +static HTAB *pgStatTabHash = NULL; > > + > > 'faster ... lookup' doesn't strike me as a useful comment, because it's > only useful in relation of the current code to the new code. > > > > > /* > > * Backends store per-function info that's waiting to be sent to the > > collector > > * in this hash table (indexed by function OID). > > @@ -815,7 +825,13 @@ pgstat_report_stat(bool force) > > pgstat_send_tabstat(this_msg); > > this_msg->m_nentries = 0; > > } > > + > > + /* > > + * Entry will be freed soon so we need to remove it > > from the lookup table. > > + */ > > + hash_search(pgStatTabHash, &entry->t_id, HASH_REMOVE, > > NULL); > > } > > It's not really freed... > > > > static PgStat_TableStatus * > > get_tabstat_entry(Oid rel_id, bool isshared) > > { > > + TabStatHashEntry* hash_entry; > > PgStat_TableStatus *entry; > > TabStatusArray *tsa; > > - TabStatusArray *prev_tsa; > > - int i; > > + > > + /* Try to find an entry */ > > + entry = find_tabstat_entry(rel_id); > > + if(entry != NULL) > > + return entry; > > Arguably it'd be better to HASH_ENTER directly here, instead of doing > two lookups. > > > > /* > > - * Search the already-used tabstat slots for this relation. > > + * Entry doesn't exist - creating one. > > + * First make sure there is a free space in a last element of > > pgStatTabList. > > */ > > - prev_tsa = NULL; > > - for (tsa = pgStatTabList; tsa != NULL; prev_tsa = tsa, tsa = > > tsa->tsa_next) > > + if (!pgStatTabList) > > { > > - for (i = 0; i < tsa->tsa_used; i++) > > - { > > - entry = &tsa->tsa_entries[i]; > > - if (entry->t_id == rel_id) > > - return entry; > > - } > > + HASHCTL ctl; > > > > - if (tsa->tsa_used < TABSTAT_QUANTUM) > > + Assert(!pgStatTabHash); > > + > > + memset(&ctl, 0, sizeof(ctl)); > > + ctl.keysize = sizeof(Oid); > > + ctl.entrysize = sizeof(TabStatHashEntry); > > + ctl.hcxt = TopMemoryContext; > > + > > + pgStatTabHash = hash_create("pgstat t_id to tsa_entry lookup > > table", > > + TABSTAT_QUANTUM, &ctl, HASH_ELEM | HASH_BLOBS | > > HASH_CONTEXT); > > Think it'd be better to move the hash creation into its own function. > > > - Andres -- Best regards, Aleksander Alekseev
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 7cacb1e9b2..08aaf1dac1 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -160,6 +160,16 @@ typedef struct TabStatusArray static TabStatusArray *pgStatTabList = NULL; +/* pgStatTabHash entry */ +typedef struct TabStatHashEntry +{ + Oid t_id; + PgStat_TableStatus* tsa_entry; +} TabStatHashEntry; + +/* Hash table for O(1) t_id -> tsa_entry lookup */ +static HTAB *pgStatTabHash = NULL; + /* * Backends store per-function info that's waiting to be sent to the collector * in this hash table (indexed by function OID). @@ -815,7 +825,13 @@ pgstat_report_stat(bool force) pgstat_send_tabstat(this_msg); this_msg->m_nentries = 0; } + + /* + * Entry will be zeroed soon so we need to remove it from the lookup table. + */ + hash_search(pgStatTabHash, &entry->t_id, HASH_REMOVE, NULL); } + /* zero out TableStatus structs after use */ MemSet(tsa->tsa_entries, 0, tsa->tsa_used * sizeof(PgStat_TableStatus)); @@ -1667,59 +1683,77 @@ pgstat_initstats(Relation rel) } /* + * Make sure pgStatTabList and pgStatTabHash are initialized. + */ +static void +make_sure_stat_tab_initialized() +{ + HASHCTL ctl; + + if (pgStatTabList) + return; + + Assert(!pgStatTabHash); + + memset(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(TabStatHashEntry); + ctl.hcxt = TopMemoryContext; + + pgStatTabHash = hash_create("pgstat t_id to tsa_entry lookup table", + TABSTAT_QUANTUM, &ctl, HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); + + pgStatTabList = (TabStatusArray *) MemoryContextAllocZero(TopMemoryContext, + sizeof(TabStatusArray)); +} + +/* * get_tabstat_entry - find or create a PgStat_TableStatus entry for rel */ static PgStat_TableStatus * get_tabstat_entry(Oid rel_id, bool isshared) { + TabStatHashEntry* hash_entry; PgStat_TableStatus *entry; TabStatusArray *tsa; - TabStatusArray *prev_tsa; - int i; + bool found; + + make_sure_stat_tab_initialized(); + + /* + * Find an entry or create a new one. + */ + hash_entry = hash_search(pgStatTabHash, &rel_id, HASH_ENTER, &found); + if(found) + return hash_entry->tsa_entry; /* - * Search the already-used tabstat slots for this relation. + * `hash_entry` was just created and now we have to fill it. + * First make sure there is a free space in a last element of pgStatTabList. */ - prev_tsa = NULL; - for (tsa = pgStatTabList; tsa != NULL; prev_tsa = tsa, tsa = tsa->tsa_next) + tsa = pgStatTabList; + while(tsa->tsa_used == TABSTAT_QUANTUM) { - for (i = 0; i < tsa->tsa_used; i++) + if(tsa->tsa_next == NULL) { - entry = &tsa->tsa_entries[i]; - if (entry->t_id == rel_id) - return entry; + tsa->tsa_next = (TabStatusArray *) MemoryContextAllocZero(TopMemoryContext, + sizeof(TabStatusArray)); } - if (tsa->tsa_used < TABSTAT_QUANTUM) - { - /* - * It must not be present, but we found a free slot instead. Fine, - * let's use this one. We assume the entry was already zeroed, - * either at creation or after last use. - */ - entry = &tsa->tsa_entries[tsa->tsa_used++]; - entry->t_id = rel_id; - entry->t_shared = isshared; - return entry; - } + tsa = tsa->tsa_next; } /* - * We ran out of tabstat slots, so allocate more. Be sure they're zeroed. - */ - tsa = (TabStatusArray *) MemoryContextAllocZero(TopMemoryContext, - sizeof(TabStatusArray)); - if (prev_tsa) - prev_tsa->tsa_next = tsa; - else - pgStatTabList = tsa; - - /* - * Use the first entry of the new TabStatusArray. + * Add an entry. */ entry = &tsa->tsa_entries[tsa->tsa_used++]; entry->t_id = rel_id; entry->t_shared = isshared; + + /* + * Add a corespinding entry to pgStatTabHash. + */ + hash_entry->tsa_entry = entry; return entry; } @@ -1731,22 +1765,19 @@ get_tabstat_entry(Oid rel_id, bool isshared) PgStat_TableStatus * find_tabstat_entry(Oid rel_id) { - PgStat_TableStatus *entry; - TabStatusArray *tsa; - int i; + TabStatHashEntry* hash_entry; - for (tsa = pgStatTabList; tsa != NULL; tsa = tsa->tsa_next) - { - for (i = 0; i < tsa->tsa_used; i++) - { - entry = &tsa->tsa_entries[i]; - if (entry->t_id == rel_id) - return entry; - } - } + /* + * There are no entries at all. + */ + if(!pgStatTabHash) + return NULL; - /* Not present */ - return NULL; + hash_entry = hash_search(pgStatTabHash, &rel_id, HASH_FIND, NULL); + if(!hash_entry) + return NULL; + + return hash_entry->tsa_entry; } /*
signature.asc
Description: PGP signature