Here's an updated version of this patch that takes care of the issues I
reported previously: no more repalloc() of the requests array; it's now
an slist, which makes the code much more natural IMV. And no more
messing around with doing sprintf to create a separate sprintf pattern
for the per-db stats file; instead have a function to return the name
that uses just the pgstat dir as stored by GUC. I think this can be
further simplified still.
I haven't reviewed the rest yet; please do give this a try to confirm
that the speedups previously reported are still there (i.e. I didn't
completely blew it).
Thanks
--
Álvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
*** a/src/backend/postmaster/pgstat.c
--- b/src/backend/postmaster/pgstat.c
***************
*** 38,43 ****
--- 38,44 ----
#include "access/xact.h"
#include "catalog/pg_database.h"
#include "catalog/pg_proc.h"
+ #include "lib/ilist.h"
#include "libpq/ip.h"
#include "libpq/libpq.h"
#include "libpq/pqsignal.h"
***************
*** 66,73 ****
* Paths for the statistics files (relative to installation's $PGDATA).
* ----------
*/
! #define PGSTAT_STAT_PERMANENT_FILENAME "global/pgstat.stat"
! #define PGSTAT_STAT_PERMANENT_TMPFILE "global/pgstat.tmp"
/* ----------
* Timer definitions.
--- 67,75 ----
* Paths for the statistics files (relative to installation's $PGDATA).
* ----------
*/
! #define PGSTAT_STAT_PERMANENT_DIRECTORY "pg_stat"
! #define PGSTAT_STAT_PERMANENT_FILENAME "pg_stat/global.stat"
! #define PGSTAT_STAT_PERMANENT_TMPFILE "pg_stat/global.tmp"
/* ----------
* Timer definitions.
***************
*** 115,120 **** int pgstat_track_activity_query_size = 1024;
--- 117,123 ----
* Built from GUC parameter
* ----------
*/
+ char *pgstat_stat_directory = NULL;
char *pgstat_stat_filename = NULL;
char *pgstat_stat_tmpname = NULL;
***************
*** 219,229 **** static int localNumBackends = 0;
*/
static PgStat_GlobalStats globalStats;
! /* Last time the collector successfully wrote the stats file */
! static TimestampTz last_statwrite;
! /* Latest statistics request time from backends */
! static TimestampTz last_statrequest;
static volatile bool need_exit = false;
static volatile bool got_SIGHUP = false;
--- 222,237 ----
*/
static PgStat_GlobalStats globalStats;
! /* Write request info for each database */
! typedef struct DBWriteRequest
! {
! Oid databaseid; /* OID of the database to write */
! TimestampTz request_time; /* timestamp of the last write request */
! slist_node next;
! } DBWriteRequest;
! /* Latest statistics request time from backends for each DB */
! static slist_head last_statrequests = SLIST_STATIC_INIT(last_statrequests);
static volatile bool need_exit = false;
static volatile bool got_SIGHUP = false;
***************
*** 252,262 **** static void pgstat_sighup_handler(SIGNAL_ARGS);
static PgStat_StatDBEntry *pgstat_get_db_entry(Oid databaseid, bool create);
static PgStat_StatTabEntry *pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry,
Oid tableoid, bool create);
! static void pgstat_write_statsfile(bool permanent);
! static HTAB *pgstat_read_statsfile(Oid onlydb, bool permanent);
static void backend_read_statsfile(void);
static void pgstat_read_current_status(void);
static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
static void pgstat_send_funcstats(void);
static HTAB *pgstat_collect_oids(Oid catalogid);
--- 260,276 ----
static PgStat_StatDBEntry *pgstat_get_db_entry(Oid databaseid, bool create);
static PgStat_StatTabEntry *pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry,
Oid tableoid, bool create);
! static void pgstat_write_statsfile(bool permanent, bool force);
! static void pgstat_write_db_statsfile(PgStat_StatDBEntry * dbentry, bool permanent);
! static void pgstat_write_db_dummyfile(Oid databaseid);
! static HTAB *pgstat_read_statsfile(Oid onlydb, bool permanent, bool onlydbs);
! static void pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash, bool permanent);
static void backend_read_statsfile(void);
static void pgstat_read_current_status(void);
+ static bool pgstat_write_statsfile_needed(void);
+ static bool pgstat_db_requested(Oid databaseid);
+
static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
static void pgstat_send_funcstats(void);
static HTAB *pgstat_collect_oids(Oid catalogid);
***************
*** 285,291 **** static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int le
static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
-
/* ------------------------------------------------------------
* Public functions called from postmaster follow
* ------------------------------------------------------------
--- 299,304 ----
***************
*** 549,556 **** startup_failed:
void
pgstat_reset_all(void)
{
! unlink(pgstat_stat_filename);
! unlink(PGSTAT_STAT_PERMANENT_FILENAME);
}
#ifdef EXEC_BACKEND
--- 562,605 ----
void
pgstat_reset_all(void)
{
! DIR * dir;
! struct dirent * entry;
!
! dir = AllocateDir(pgstat_stat_directory);
! while ((entry = ReadDir(dir, pgstat_stat_directory)) != NULL)
! {
! char *fname;
! int totlen;
!
! if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
! continue;
!
! totlen = strlen(pgstat_stat_directory) + strlen(entry->d_name) + 2;
! fname = palloc(totlen);
!
! snprintf(fname, totlen, "%s/%s", pgstat_stat_directory, entry->d_name);
! unlink(fname);
! pfree(fname);
! }
! FreeDir(dir);
!
! dir = AllocateDir(PGSTAT_STAT_PERMANENT_DIRECTORY);
! while ((entry = ReadDir(dir, PGSTAT_STAT_PERMANENT_DIRECTORY)) != NULL)
! {
! char *fname;
! int totlen;
!
! if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
! continue;
!
! totlen = strlen(pgstat_stat_directory) + strlen(entry->d_name) + 2;
! fname = palloc(totlen);
!
! snprintf(fname, totlen, "%s/%s", PGSTAT_STAT_PERMANENT_FILENAME, entry->d_name);
! unlink(fname);
! pfree(fname);
! }
! FreeDir(dir);
}
#ifdef EXEC_BACKEND
***************
*** 1408,1420 **** pgstat_ping(void)
* ----------
*/
static void
! pgstat_send_inquiry(TimestampTz clock_time, TimestampTz cutoff_time)
{
PgStat_MsgInquiry msg;
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_INQUIRY);
msg.clock_time = clock_time;
msg.cutoff_time = cutoff_time;
pgstat_send(&msg, sizeof(msg));
}
--- 1457,1470 ----
* ----------
*/
static void
! pgstat_send_inquiry(TimestampTz clock_time, TimestampTz cutoff_time, Oid databaseid)
{
PgStat_MsgInquiry msg;
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_INQUIRY);
msg.clock_time = clock_time;
msg.cutoff_time = cutoff_time;
+ msg.databaseid = databaseid;
pgstat_send(&msg, sizeof(msg));
}
***************
*** 3004,3009 **** PgstatCollectorMain(int argc, char *argv[])
--- 3054,3060 ----
int len;
PgStat_Msg msg;
int wr;
+ bool first_write = true;
IsUnderPostmaster = true; /* we are a postmaster subprocess now */
***************
*** 3053,3069 **** PgstatCollectorMain(int argc, char *argv[])
init_ps_display("stats collector process", "", "", "");
/*
- * Arrange to write the initial status file right away
- */
- last_statrequest = GetCurrentTimestamp();
- last_statwrite = last_statrequest - 1;
-
- /*
* Read in an existing statistics stats file or initialize the stats to
! * zero.
*/
pgStatRunningInCollector = true;
! pgStatDBHash = pgstat_read_statsfile(InvalidOid, true);
/*
* Loop to process messages until we get SIGQUIT or detect ungraceful
--- 3104,3114 ----
init_ps_display("stats collector process", "", "", "");
/*
* Read in an existing statistics stats file or initialize the stats to
! * zero (read data for all databases, including table/func stats).
*/
pgStatRunningInCollector = true;
! pgStatDBHash = pgstat_read_statsfile(InvalidOid, true, false);
/*
* Loop to process messages until we get SIGQUIT or detect ungraceful
***************
*** 3107,3116 **** PgstatCollectorMain(int argc, char *argv[])
/*
* Write the stats file if a new request has arrived that is not
! * satisfied by existing file.
*/
! if (last_statwrite < last_statrequest)
! pgstat_write_statsfile(false);
/*
* Try to receive and process a message. This will not block,
--- 3152,3165 ----
/*
* Write the stats file if a new request has arrived that is not
! * satisfied by existing file (force writing all files if it's
! * the first write after startup).
*/
! if (first_write || pgstat_write_statsfile_needed())
! {
! pgstat_write_statsfile(false, first_write);
! first_write = false;
! }
/*
* Try to receive and process a message. This will not block,
***************
*** 3269,3275 **** PgstatCollectorMain(int argc, char *argv[])
/*
* Save the final stats to reuse at next startup.
*/
! pgstat_write_statsfile(true);
exit(0);
}
--- 3318,3324 ----
/*
* Save the final stats to reuse at next startup.
*/
! pgstat_write_statsfile(true, true);
exit(0);
}
***************
*** 3349,3354 **** pgstat_get_db_entry(Oid databaseid, bool create)
--- 3398,3404 ----
result->n_block_write_time = 0;
result->stat_reset_timestamp = GetCurrentTimestamp();
+ result->stats_timestamp = 0;
memset(&hash_ctl, 0, sizeof(hash_ctl));
hash_ctl.keysize = sizeof(Oid);
***************
*** 3429,3451 **** pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, Oid tableoid, bool create)
* shutting down only), remove the temporary file so that backends
* starting up under a new postmaster can't read the old data before
* the new collector is ready.
* ----------
*/
static void
! pgstat_write_statsfile(bool permanent)
{
HASH_SEQ_STATUS hstat;
- HASH_SEQ_STATUS tstat;
- HASH_SEQ_STATUS fstat;
PgStat_StatDBEntry *dbentry;
- PgStat_StatTabEntry *tabentry;
- PgStat_StatFuncEntry *funcentry;
FILE *fpout;
int32 format_id;
const char *tmpfile = permanent ? PGSTAT_STAT_PERMANENT_TMPFILE : pgstat_stat_tmpname;
const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
int rc;
/*
* Open the statistics temp file to write out the current values.
*/
--- 3479,3503 ----
* shutting down only), remove the temporary file so that backends
* starting up under a new postmaster can't read the old data before
* the new collector is ready.
+ *
+ * When 'allDbs' is false, only the requested databases (listed in
+ * last_statrequests) will be written. If 'allDbs' is true, all databases
+ * will be written.
* ----------
*/
static void
! pgstat_write_statsfile(bool permanent, bool allDbs)
{
HASH_SEQ_STATUS hstat;
PgStat_StatDBEntry *dbentry;
FILE *fpout;
int32 format_id;
const char *tmpfile = permanent ? PGSTAT_STAT_PERMANENT_TMPFILE : pgstat_stat_tmpname;
const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
int rc;
+ elog(DEBUG1, "writing statsfile '%s'", statfile);
+
/*
* Open the statistics temp file to write out the current values.
*/
***************
*** 3484,3489 **** pgstat_write_statsfile(bool permanent)
--- 3536,3555 ----
while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
{
/*
+ * Write our the tables and functions into a separate file, but only
+ * if the database is in the requests or if all DBs are to be written.
+ *
+ * We need to do this before the dbentry write to write the proper
+ * timestamp to the global file.
+ */
+ if (allDbs || pgstat_db_requested(dbentry->databaseid))
+ {
+ elog(DEBUG1, "writing statsfile for DB %d", dbentry->databaseid);
+ dbentry->stats_timestamp = globalStats.stats_timestamp;
+ pgstat_write_db_statsfile(dbentry, permanent);
+ }
+
+ /*
* Write out the DB entry including the number of live backends. We
* don't write the tables or functions pointers, since they're of no
* use to any other process.
***************
*** 3493,3521 **** pgstat_write_statsfile(bool permanent)
(void) rc; /* we'll check for error with ferror */
/*
- * Walk through the database's access stats per table.
- */
- hash_seq_init(&tstat, dbentry->tables);
- while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != NULL)
- {
- fputc('T', fpout);
- rc = fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout);
- (void) rc; /* we'll check for error with ferror */
- }
-
- /*
- * Walk through the database's function stats table.
- */
- hash_seq_init(&fstat, dbentry->functions);
- while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&fstat)) != NULL)
- {
- fputc('F', fpout);
- rc = fwrite(funcentry, sizeof(PgStat_StatFuncEntry), 1, fpout);
- (void) rc; /* we'll check for error with ferror */
- }
-
- /*
* Mark the end of this DB
*/
fputc('d', fpout);
}
--- 3559,3568 ----
(void) rc; /* we'll check for error with ferror */
/*
* Mark the end of this DB
+ *
+ * TODO Does using these chars still make sense, when the tables/func
+ * stats are moved to a separate file?
*/
fputc('d', fpout);
}
***************
*** 3527,3532 **** pgstat_write_statsfile(bool permanent)
--- 3574,3607 ----
*/
fputc('E', fpout);
+ /* In any case, we can just throw away all the db requests, but we need to
+ * write dummy files for databases without a stat entry (it would cause
+ * issues in pgstat_read_db_statsfile_timestamp and pgstat wait timeouts).
+ * This may happen e.g. for shared DB (oid = 0) right after initdb.
+ */
+ if (!slist_is_empty(&last_statrequests))
+ {
+ slist_mutable_iter iter;
+
+ slist_foreach_modify(iter, &last_statrequests)
+ {
+ DBWriteRequest *req = slist_container(DBWriteRequest, next,
+ iter.cur);
+
+ /*
+ * Create dummy files for requested databases without a proper
+ * dbentry. It's much easier this way than dealing with multiple
+ * timestamps, possibly existing but not yet written DBs etc.
+ * */
+ if (!pgstat_get_db_entry(req->databaseid, false))
+ pgstat_write_db_dummyfile(req->databaseid);
+
+ pfree(req);
+ }
+
+ slist_init(&last_statrequests);
+ }
+
if (ferror(fpout))
{
ereport(LOG,
***************
*** 3552,3608 **** pgstat_write_statsfile(bool permanent)
tmpfile, statfile)));
unlink(tmpfile);
}
- else
- {
- /*
- * Successful write, so update last_statwrite.
- */
- last_statwrite = globalStats.stats_timestamp;
-
- /*
- * If there is clock skew between backends and the collector, we could
- * receive a stats request time that's in the future. If so, complain
- * and reset last_statrequest. Resetting ensures that no inquiry
- * message can cause more than one stats file write to occur.
- */
- if (last_statrequest > last_statwrite)
- {
- char *reqtime;
- char *mytime;
-
- /* Copy because timestamptz_to_str returns a static buffer */
- reqtime = pstrdup(timestamptz_to_str(last_statrequest));
- mytime = pstrdup(timestamptz_to_str(last_statwrite));
- elog(LOG, "last_statrequest %s is later than collector's time %s",
- reqtime, mytime);
- pfree(reqtime);
- pfree(mytime);
-
- last_statrequest = last_statwrite;
- }
- }
if (permanent)
unlink(pgstat_stat_filename);
}
/* ----------
* pgstat_read_statsfile() -
*
* Reads in an existing statistics collector file and initializes the
* databases' hash table (whose entries point to the tables' hash tables).
* ----------
*/
static HTAB *
! pgstat_read_statsfile(Oid onlydb, bool permanent)
{
PgStat_StatDBEntry *dbentry;
PgStat_StatDBEntry dbbuf;
- PgStat_StatTabEntry *tabentry;
- PgStat_StatTabEntry tabbuf;
- PgStat_StatFuncEntry funcbuf;
- PgStat_StatFuncEntry *funcentry;
HASHCTL hash_ctl;
HTAB *dbhash;
HTAB *tabhash = NULL;
--- 3627,3905 ----
tmpfile, statfile)));
unlink(tmpfile);
}
if (permanent)
unlink(pgstat_stat_filename);
}
+ /*
+ * return the length that a DB stat file would have (including terminating \0)
+ *
+ * XXX We could avoid this overhead by caching a maximum length in
+ * assign_pgstat_temp_directory; also the distinctions on "permanent" and
+ * "tempname" seem pointless (what do you mean to save one byte of stack
+ * space!?)
+ */
+ static int
+ get_dbstat_file_len(bool permanent, bool tempname, Oid databaseid)
+ {
+ char tmp[1];
+ int len;
+
+ /* don't actually print, but return how many chars would be used */
+ len = snprintf(tmp, 1, "%s/db_%u.%s",
+ permanent ? "pg_stat" : pgstat_stat_directory,
+ databaseid,
+ tempname ? "tmp" : "stat");
+ /* XXX pointless? */
+ if (len >= MAXPGPATH)
+ elog(PANIC, "pgstat path too long");
+
+ /* count terminating \0 */
+ return len + 1;
+ }
+
+ /*
+ * return the filename for a DB stat file; filename is the output buffer,
+ * and len is its length.
+ */
+ static void
+ get_dbstat_filename(bool permanent, bool tempname, Oid databaseid,
+ char *filename, int len)
+ {
+ #ifdef USE_ASSERT_CHECKING
+ int printed;
+
+ printed =
+ #endif
+ snprintf(filename, len, "%s/db_%u.%s",
+ permanent ? "pg_stat" : pgstat_stat_directory,
+ databaseid,
+ tempname ? "tmp" : "stat");
+ Assert(printed <= len);
+ }
+
+ /* ----------
+ * pgstat_write_db_statsfile() -
+ *
+ * Tell the news. This writes stats file for a single database.
+ *
+ * If writing to the permanent file (happens when the collector is
+ * shutting down only), remove the temporary file so that backends
+ * starting up under a new postmaster can't read the old data before
+ * the new collector is ready.
+ * ----------
+ */
+ static void
+ pgstat_write_db_statsfile(PgStat_StatDBEntry * dbentry, bool permanent)
+ {
+ HASH_SEQ_STATUS tstat;
+ HASH_SEQ_STATUS fstat;
+ PgStat_StatTabEntry *tabentry;
+ PgStat_StatFuncEntry *funcentry;
+ FILE *fpout;
+ int32 format_id;
+ Oid dbid = dbentry->databaseid;
+ int rc;
+ int tmpfilelen = get_dbstat_file_len(permanent, true, dbid);
+ char tmpfile[tmpfilelen];
+ int statfilelen = get_dbstat_file_len(permanent, false, dbid);
+ char statfile[statfilelen];
+
+ get_dbstat_filename(permanent, true, dbid, tmpfile, tmpfilelen);
+ get_dbstat_filename(permanent, false, dbid, statfile, statfilelen);
+
+ elog(DEBUG1, "writing statsfile '%s'", statfile);
+
+ /*
+ * Open the statistics temp file to write out the current values.
+ */
+ fpout = AllocateFile(tmpfile, PG_BINARY_W);
+ if (fpout == NULL)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not open temporary statistics file \"%s\": %m",
+ tmpfile)));
+ return;
+ }
+
+ /*
+ * Write the file header --- currently just a format ID.
+ */
+ format_id = PGSTAT_FILE_FORMAT_ID;
+ rc = fwrite(&format_id, sizeof(format_id), 1, fpout);
+ (void) rc; /* we'll check for error with ferror */
+
+ /*
+ * Write the timestamp.
+ */
+ rc = fwrite(&(globalStats.stats_timestamp), sizeof(globalStats.stats_timestamp), 1, fpout);
+ (void) rc; /* we'll check for error with ferror */
+
+ /*
+ * Walk through the database's access stats per table.
+ */
+ hash_seq_init(&tstat, dbentry->tables);
+ while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != NULL)
+ {
+ fputc('T', fpout);
+ rc = fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout);
+ (void) rc; /* we'll check for error with ferror */
+ }
+
+ /*
+ * Walk through the database's function stats table.
+ */
+ hash_seq_init(&fstat, dbentry->functions);
+ while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&fstat)) != NULL)
+ {
+ fputc('F', fpout);
+ rc = fwrite(funcentry, sizeof(PgStat_StatFuncEntry), 1, fpout);
+ (void) rc; /* we'll check for error with ferror */
+ }
+
+ /*
+ * No more output to be done. Close the temp file and replace the old
+ * pgstat.stat with it. The ferror() check replaces testing for error
+ * after each individual fputc or fwrite above.
+ */
+ fputc('E', fpout);
+
+ if (ferror(fpout))
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not write temporary statistics file \"%s\": %m",
+ tmpfile)));
+ FreeFile(fpout);
+ unlink(tmpfile);
+ }
+ else if (FreeFile(fpout) < 0)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not close temporary statistics file \"%s\": %m",
+ tmpfile)));
+ unlink(tmpfile);
+ }
+ else if (rename(tmpfile, statfile) < 0)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
+ tmpfile, statfile)));
+ unlink(tmpfile);
+ }
+
+ if (permanent)
+ {
+ elog(DEBUG1, "removing temporary stat file '%s'", tmpfile);
+ unlink(tmpfile);
+ }
+ }
+
+
+ /* ----------
+ * pgstat_write_db_dummyfile() -
+ *
+ * All this does is writing a dummy stat file for databases without dbentry
+ * yet. It basically writes just a file header - format ID and a timestamp.
+ * ----------
+ */
+ static void
+ pgstat_write_db_dummyfile(Oid databaseid)
+ {
+ FILE *fpout;
+ int32 format_id;
+ int rc;
+ int tmpfilelen = get_dbstat_file_len(false, true, databaseid);
+ char tmpfile[tmpfilelen];
+ int statfilelen = get_dbstat_file_len(false, false, databaseid);
+ char statfile[statfilelen];
+
+ get_dbstat_filename(false, true, databaseid, tmpfile, tmpfilelen);
+ get_dbstat_filename(false, false, databaseid, statfile, statfilelen);
+
+ elog(DEBUG1, "writing statsfile '%s'", statfile);
+
+ /*
+ * Open the statistics temp file to write out the current values.
+ */
+ fpout = AllocateFile(tmpfile, PG_BINARY_W);
+ if (fpout == NULL)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not open temporary statistics file \"%s\": %m",
+ tmpfile)));
+ return;
+ }
+
+ /*
+ * Write the file header --- currently just a format ID.
+ */
+ format_id = PGSTAT_FILE_FORMAT_ID;
+ rc = fwrite(&format_id, sizeof(format_id), 1, fpout);
+ (void) rc; /* we'll check for error with ferror */
+
+ /*
+ * Write the timestamp.
+ */
+ rc = fwrite(&(globalStats.stats_timestamp), sizeof(globalStats.stats_timestamp), 1, fpout);
+ (void) rc; /* we'll check for error with ferror */
+
+ /*
+ * No more output to be done. Close the temp file and replace the old
+ * pgstat.stat with it. The ferror() check replaces testing for error
+ * after each individual fputc or fwrite above.
+ */
+ fputc('E', fpout);
+
+ if (ferror(fpout))
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not write temporary dummy statistics file \"%s\": %m",
+ tmpfile)));
+ FreeFile(fpout);
+ unlink(tmpfile);
+ }
+ else if (FreeFile(fpout) < 0)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not close temporary dummy statistics file \"%s\": %m",
+ tmpfile)));
+ unlink(tmpfile);
+ }
+ else if (rename(tmpfile, statfile) < 0)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not rename temporary dummy statistics file \"%s\" to \"%s\": %m",
+ tmpfile, statfile)));
+ unlink(tmpfile);
+ }
+
+ }
/* ----------
* pgstat_read_statsfile() -
*
* Reads in an existing statistics collector file and initializes the
* databases' hash table (whose entries point to the tables' hash tables).
+ *
+ * Allows reading only the global stats (at database level), which is just
+ * enough for many purposes (e.g. autovacuum launcher etc.). If this is
+ * sufficient for you, use onlydbs=true.
* ----------
*/
static HTAB *
! pgstat_read_statsfile(Oid onlydb, bool permanent, bool onlydbs)
{
PgStat_StatDBEntry *dbentry;
PgStat_StatDBEntry dbbuf;
HASHCTL hash_ctl;
HTAB *dbhash;
HTAB *tabhash = NULL;
***************
*** 3613,3618 **** pgstat_read_statsfile(Oid onlydb, bool permanent)
--- 3910,3920 ----
const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
/*
+ * If we want a db-level stats only, we don't want a particular db.
+ */
+ Assert(!((onlydb != InvalidOid) && onlydbs));
+
+ /*
* The tables will live in pgStatLocalContext.
*/
pgstat_setup_memcxt();
***************
*** 3758,3763 **** pgstat_read_statsfile(Oid onlydb, bool permanent)
--- 4060,4075 ----
*/
tabhash = dbentry->tables;
funchash = dbentry->functions;
+
+ /*
+ * Read the data from the file for this database. If there was
+ * onlydb specified (!= InvalidOid), we would not get here because
+ * of a break above. So we don't need to recheck.
+ */
+ if (!onlydbs)
+ pgstat_read_db_statsfile(dbentry->databaseid, tabhash, funchash,
+ permanent);
+
break;
/*
***************
*** 3768,3773 **** pgstat_read_statsfile(Oid onlydb, bool permanent)
--- 4080,4177 ----
funchash = NULL;
break;
+ case 'E':
+ goto done;
+
+ default:
+ ereport(pgStatRunningInCollector ? LOG : WARNING,
+ (errmsg("corrupted statistics file \"%s\"",
+ statfile)));
+ goto done;
+ }
+ }
+
+ done:
+ FreeFile(fpin);
+
+ if (permanent)
+ unlink(PGSTAT_STAT_PERMANENT_FILENAME);
+
+ return dbhash;
+ }
+
+
+ /* ----------
+ * pgstat_read_db_statsfile() -
+ *
+ * Reads in an existing statistics collector db file and initializes the
+ * tables and functions hash tables (for the database identified by Oid).
+ * ----------
+ */
+ static void
+ pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash, bool permanent)
+ {
+ PgStat_StatTabEntry *tabentry;
+ PgStat_StatTabEntry tabbuf;
+ PgStat_StatFuncEntry funcbuf;
+ PgStat_StatFuncEntry *funcentry;
+ FILE *fpin;
+ int32 format_id;
+ TimestampTz timestamp;
+ bool found;
+ int statfilelen = get_dbstat_file_len(permanent, false, databaseid);
+ char statfile[statfilelen];
+
+ get_dbstat_filename(permanent, false, databaseid, statfile, statfilelen);
+
+ /*
+ * Try to open the status file. If it doesn't exist, the backends simply
+ * return zero for anything and the collector simply starts from scratch
+ * with empty counters.
+ *
+ * ENOENT is a possibility if the stats collector is not running or has
+ * not yet written the stats file the first time. Any other failure
+ * condition is suspicious.
+ */
+ if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
+ {
+ if (errno != ENOENT)
+ ereport(pgStatRunningInCollector ? LOG : WARNING,
+ (errcode_for_file_access(),
+ errmsg("could not open statistics file \"%s\": %m",
+ statfile)));
+ return;
+ }
+
+ /*
+ * Verify it's of the expected format.
+ */
+ if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id)
+ || format_id != PGSTAT_FILE_FORMAT_ID)
+ {
+ ereport(pgStatRunningInCollector ? LOG : WARNING,
+ (errmsg("corrupted statistics file \"%s\"", statfile)));
+ goto done;
+ }
+
+ /*
+ * Read global stats struct
+ */
+ if (fread(×tamp, 1, sizeof(timestamp), fpin) != sizeof(timestamp))
+ {
+ ereport(pgStatRunningInCollector ? LOG : WARNING,
+ (errmsg("corrupted statistics file \"%s\"", statfile)));
+ goto done;
+ }
+
+ /*
+ * We found an existing collector stats file. Read it and put all the
+ * hashtable entries into place.
+ */
+ for (;;)
+ {
+ switch (fgetc(fpin))
+ {
/*
* 'T' A PgStat_StatTabEntry follows.
*/
***************
*** 3854,3878 **** done:
FreeFile(fpin);
if (permanent)
! unlink(PGSTAT_STAT_PERMANENT_FILENAME);
! return dbhash;
}
/* ----------
! * pgstat_read_statsfile_timestamp() -
*
! * Attempt to fetch the timestamp of an existing stats file.
* Returns TRUE if successful (timestamp is stored at *ts).
* ----------
*/
static bool
! pgstat_read_statsfile_timestamp(bool permanent, TimestampTz *ts)
{
! PgStat_GlobalStats myGlobalStats;
FILE *fpin;
int32 format_id;
! const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
/*
* Try to open the status file. As above, anything but ENOENT is worthy
--- 4258,4294 ----
FreeFile(fpin);
if (permanent)
! {
! int statfilelen = get_dbstat_file_len(permanent, false, databaseid);
! char statfile[statfilelen];
! get_dbstat_filename(permanent, false, databaseid, statfile, statfilelen);
!
! elog(DEBUG1, "removing permanent stats file '%s'", statfile);
! unlink(statfile);
! }
!
! return;
}
+
/* ----------
! * pgstat_read_db_statsfile_timestamp() -
*
! * Attempt to fetch the timestamp of an existing stats file (for a DB).
* Returns TRUE if successful (timestamp is stored at *ts).
* ----------
*/
static bool
! pgstat_read_db_statsfile_timestamp(Oid databaseid, bool permanent, TimestampTz *ts)
{
! TimestampTz timestamp;
FILE *fpin;
int32 format_id;
! int filenamelen = get_dbstat_file_len(permanent, false, databaseid);
! char statfile[filenamelen];
!
! get_dbstat_filename(permanent, false, databaseid, statfile, filenamelen);
/*
* Try to open the status file. As above, anything but ENOENT is worthy
***************
*** 3903,3909 **** pgstat_read_statsfile_timestamp(bool permanent, TimestampTz *ts)
/*
* Read global stats struct
*/
! if (fread(&myGlobalStats, 1, sizeof(myGlobalStats), fpin) != sizeof(myGlobalStats))
{
ereport(pgStatRunningInCollector ? LOG : WARNING,
(errmsg("corrupted statistics file \"%s\"", statfile)));
--- 4319,4325 ----
/*
* Read global stats struct
*/
! if (fread(×tamp, 1, sizeof(TimestampTz), fpin) != sizeof(TimestampTz))
{
ereport(pgStatRunningInCollector ? LOG : WARNING,
(errmsg("corrupted statistics file \"%s\"", statfile)));
***************
*** 3911,3917 **** pgstat_read_statsfile_timestamp(bool permanent, TimestampTz *ts)
return false;
}
! *ts = myGlobalStats.stats_timestamp;
FreeFile(fpin);
return true;
--- 4327,4333 ----
return false;
}
! *ts = timestamp;
FreeFile(fpin);
return true;
***************
*** 3947,3953 **** backend_read_statsfile(void)
CHECK_FOR_INTERRUPTS();
! ok = pgstat_read_statsfile_timestamp(false, &file_ts);
cur_ts = GetCurrentTimestamp();
/* Calculate min acceptable timestamp, if we didn't already */
--- 4363,4369 ----
CHECK_FOR_INTERRUPTS();
! ok = pgstat_read_db_statsfile_timestamp(MyDatabaseId, false, &file_ts);
cur_ts = GetCurrentTimestamp();
/* Calculate min acceptable timestamp, if we didn't already */
***************
*** 4006,4012 **** backend_read_statsfile(void)
pfree(mytime);
}
! pgstat_send_inquiry(cur_ts, min_ts);
break;
}
--- 4422,4428 ----
pfree(mytime);
}
! pgstat_send_inquiry(cur_ts, min_ts, MyDatabaseId);
break;
}
***************
*** 4016,4022 **** backend_read_statsfile(void)
/* Not there or too old, so kick the collector and wait a bit */
if ((count % PGSTAT_INQ_LOOP_COUNT) == 0)
! pgstat_send_inquiry(cur_ts, min_ts);
pg_usleep(PGSTAT_RETRY_DELAY * 1000L);
}
--- 4432,4438 ----
/* Not there or too old, so kick the collector and wait a bit */
if ((count % PGSTAT_INQ_LOOP_COUNT) == 0)
! pgstat_send_inquiry(cur_ts, min_ts, MyDatabaseId);
pg_usleep(PGSTAT_RETRY_DELAY * 1000L);
}
***************
*** 4026,4034 **** backend_read_statsfile(void)
/* Autovacuum launcher wants stats about all databases */
if (IsAutoVacuumLauncherProcess())
! pgStatDBHash = pgstat_read_statsfile(InvalidOid, false);
else
! pgStatDBHash = pgstat_read_statsfile(MyDatabaseId, false);
}
--- 4442,4457 ----
/* Autovacuum launcher wants stats about all databases */
if (IsAutoVacuumLauncherProcess())
! /*
! * FIXME Does it really need info including tables/functions? Or is it enough to read
! * database-level stats? It seems to me the launcher needs PgStat_StatDBEntry only
! * (at least that's how I understand the rebuild_database_list() in autovacuum.c),
! * because pgstat_stattabentries are used in do_autovacuum() only, that that's what's
! * executed in workers ... So maybe we'd be just fine by reading in the dbentries?
! */
! pgStatDBHash = pgstat_read_statsfile(InvalidOid, false, true);
else
! pgStatDBHash = pgstat_read_statsfile(MyDatabaseId, false, false);
}
***************
*** 4084,4109 **** pgstat_clear_snapshot(void)
static void
pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len)
{
/*
! * Advance last_statrequest if this requestor has a newer cutoff time
! * than any previous request.
*/
! if (msg->cutoff_time > last_statrequest)
! last_statrequest = msg->cutoff_time;
/*
! * If the requestor's local clock time is older than last_statwrite, we
* should suspect a clock glitch, ie system time going backwards; though
* the more likely explanation is just delayed message receipt. It is
* worth expending a GetCurrentTimestamp call to be sure, since a large
* retreat in the system clock reading could otherwise cause us to neglect
* to update the stats file for a long time.
*/
! if (msg->clock_time < last_statwrite)
{
TimestampTz cur_ts = GetCurrentTimestamp();
! if (cur_ts < last_statwrite)
{
/*
* Sure enough, time went backwards. Force a new stats file write
--- 4507,4559 ----
static void
pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len)
{
+ slist_iter iter;
+ bool found = false;
+ DBWriteRequest *newreq;
+ PgStat_StatDBEntry *dbentry;
+
+ elog(DEBUG1, "received inquiry for %d", msg->databaseid);
+
+ /*
+ * Find the last write request for this DB (found=true in that case). Plain
+ * linear search, not really worth doing any magic here (probably).
+ */
+ slist_foreach(iter, &last_statrequests)
+ {
+ DBWriteRequest *req = slist_container(DBWriteRequest, next, iter.cur);
+
+ if (req->databaseid != msg->databaseid)
+ continue;
+
+ if (msg->cutoff_time > req->request_time)
+ req->request_time = msg->cutoff_time;
+ found = true;
+ return;
+ }
+
/*
! * There's no request for this DB yet, so create one.
*/
! newreq = palloc(sizeof(DBWriteRequest));
!
! newreq->databaseid = msg->databaseid;
! newreq->request_time = msg->clock_time;
! slist_push_head(&last_statrequests, &newreq->next);
/*
! * If the requestor's local clock time is older than stats_timestamp, we
* should suspect a clock glitch, ie system time going backwards; though
* the more likely explanation is just delayed message receipt. It is
* worth expending a GetCurrentTimestamp call to be sure, since a large
* retreat in the system clock reading could otherwise cause us to neglect
* to update the stats file for a long time.
*/
! dbentry = pgstat_get_db_entry(msg->databaseid, false);
! if ((dbentry != NULL) && (msg->clock_time < dbentry->stats_timestamp))
{
TimestampTz cur_ts = GetCurrentTimestamp();
! if (cur_ts < dbentry->stats_timestamp)
{
/*
* Sure enough, time went backwards. Force a new stats file write
***************
*** 4113,4127 **** pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len)
char *mytime;
/* Copy because timestamptz_to_str returns a static buffer */
! writetime = pstrdup(timestamptz_to_str(last_statwrite));
mytime = pstrdup(timestamptz_to_str(cur_ts));
! elog(LOG, "last_statwrite %s is later than collector's time %s",
! writetime, mytime);
pfree(writetime);
pfree(mytime);
! last_statrequest = cur_ts;
! last_statwrite = last_statrequest - 1;
}
}
}
--- 4563,4578 ----
char *mytime;
/* Copy because timestamptz_to_str returns a static buffer */
! writetime = pstrdup(timestamptz_to_str(dbentry->stats_timestamp));
mytime = pstrdup(timestamptz_to_str(cur_ts));
! elog(LOG,
! "stats_timestamp %s is later than collector's time %s for db %d",
! writetime, mytime, dbentry->databaseid);
pfree(writetime);
pfree(mytime);
! newreq->request_time = cur_ts;
! dbentry->stats_timestamp = cur_ts - 1;
}
}
}
***************
*** 4270,4298 **** pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len)
static void
pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len)
{
PgStat_StatDBEntry *dbentry;
/*
* Lookup the database in the hashtable.
*/
! dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
/*
! * If found, remove it.
*/
if (dbentry)
{
if (dbentry->tables != NULL)
hash_destroy(dbentry->tables);
if (dbentry->functions != NULL)
hash_destroy(dbentry->functions);
if (hash_search(pgStatDBHash,
! (void *) &(dbentry->databaseid),
HASH_REMOVE, NULL) == NULL)
ereport(ERROR,
! (errmsg("database hash table corrupted "
! "during cleanup --- abort")));
}
}
--- 4721,4757 ----
static void
pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len)
{
+ Oid dbid = msg->m_databaseid;
PgStat_StatDBEntry *dbentry;
/*
* Lookup the database in the hashtable.
*/
! dbentry = pgstat_get_db_entry(dbid, false);
/*
! * If found, remove it (along with the db statfile).
*/
if (dbentry)
{
+ int statfilelen = get_dbstat_file_len(true, false, dbid);
+ char statfile[statfilelen];
+
+ get_dbstat_filename(true, false, dbid, statfile, statfilelen);
+
+ elog(DEBUG1, "removing %s", statfile);
+ unlink(statfile);
+
if (dbentry->tables != NULL)
hash_destroy(dbentry->tables);
if (dbentry->functions != NULL)
hash_destroy(dbentry->functions);
if (hash_search(pgStatDBHash,
! (void *) &dbid,
HASH_REMOVE, NULL) == NULL)
ereport(ERROR,
! (errmsg("database hash table corrupted during cleanup --- abort")));
}
}
***************
*** 4687,4689 **** pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len)
--- 5146,5206 ----
HASH_REMOVE, NULL);
}
}
+
+ /* ----------
+ * pgstat_write_statsfile_needed() -
+ *
+ * Checks whether there's a db stats request, requiring a file write.
+ *
+ * TODO Seems that thanks the way we handle last_statrequests (erase after
+ * a write), this is unnecessary. Just check that there's at least one
+ * request and you're done. Although there might be delayed requests ...
+ * ----------
+ */
+ static bool
+ pgstat_write_statsfile_needed(void)
+ {
+ PgStat_StatDBEntry *dbentry;
+ slist_iter iter;
+
+ /* Check the databases if they need to refresh the stats. */
+ slist_foreach(iter, &last_statrequests)
+ {
+ DBWriteRequest *req = slist_container(DBWriteRequest, next, iter.cur);
+
+ dbentry = pgstat_get_db_entry(req->databaseid, false);
+
+ /* No dbentry yet or too old. */
+ if (!dbentry || (dbentry->stats_timestamp < req->request_time))
+ {
+ return true;
+ }
+ }
+
+ /* Well, everything was written recently ... */
+ return false;
+ }
+
+ /* ----------
+ * pgstat_write_statsfile_needed() -
+ *
+ * Checks whether stats for a particular DB need to be written to a file).
+ * ----------
+ */
+
+ static bool
+ pgstat_db_requested(Oid databaseid)
+ {
+ slist_iter iter;
+
+ /* Check the databases if they need to refresh the stats. */
+ slist_foreach(iter, &last_statrequests)
+ {
+ DBWriteRequest *req = slist_container(DBWriteRequest, next, iter.cur);
+
+ if (req->databaseid == databaseid)
+ return true;
+ }
+
+ return false;
+ }
*** a/src/backend/utils/misc/guc.c
--- b/src/backend/utils/misc/guc.c
***************
*** 8704,8717 **** static void
assign_pgstat_temp_directory(const char *newval, void *extra)
{
/* check_canonical_path already canonicalized newval for us */
char *tname;
char *fname;
! tname = guc_malloc(ERROR, strlen(newval) + 12); /* /pgstat.tmp */
! sprintf(tname, "%s/pgstat.tmp", newval);
! fname = guc_malloc(ERROR, strlen(newval) + 13); /* /pgstat.stat */
! sprintf(fname, "%s/pgstat.stat", newval);
if (pgstat_stat_tmpname)
free(pgstat_stat_tmpname);
pgstat_stat_tmpname = tname;
--- 8704,8726 ----
assign_pgstat_temp_directory(const char *newval, void *extra)
{
/* check_canonical_path already canonicalized newval for us */
+ char *dname;
char *tname;
char *fname;
! /* directory */
! dname = guc_malloc(ERROR, strlen(newval) + 1); /* runtime dir */
! sprintf(dname, "%s", newval);
+ /* global stats */
+ tname = guc_malloc(ERROR, strlen(newval) + 12); /* /global.tmp */
+ sprintf(tname, "%s/global.tmp", newval);
+ fname = guc_malloc(ERROR, strlen(newval) + 13); /* /global.stat */
+ sprintf(fname, "%s/global.stat", newval);
+
+ if (pgstat_stat_directory)
+ free(pgstat_stat_directory);
+ pgstat_stat_directory = dname;
if (pgstat_stat_tmpname)
free(pgstat_stat_tmpname);
pgstat_stat_tmpname = tname;
*** a/src/bin/initdb/initdb.c
--- b/src/bin/initdb/initdb.c
***************
*** 192,197 **** const char *subdirs[] = {
--- 192,198 ----
"base",
"base/1",
"pg_tblspc",
+ "pg_stat",
"pg_stat_tmp"
};
*** a/src/include/pgstat.h
--- b/src/include/pgstat.h
***************
*** 205,210 **** typedef struct PgStat_MsgInquiry
--- 205,211 ----
PgStat_MsgHdr m_hdr;
TimestampTz clock_time; /* observed local clock time */
TimestampTz cutoff_time; /* minimum acceptable file timestamp */
+ Oid databaseid; /* requested DB (InvalidOid => all DBs) */
} PgStat_MsgInquiry;
***************
*** 514,520 **** typedef union PgStat_Msg
* ------------------------------------------------------------
*/
! #define PGSTAT_FILE_FORMAT_ID 0x01A5BC9A
/* ----------
* PgStat_StatDBEntry The collector's data per database
--- 515,521 ----
* ------------------------------------------------------------
*/
! #define PGSTAT_FILE_FORMAT_ID 0xA240CA47
/* ----------
* PgStat_StatDBEntry The collector's data per database
***************
*** 545,550 **** typedef struct PgStat_StatDBEntry
--- 546,552 ----
PgStat_Counter n_block_write_time;
TimestampTz stat_reset_timestamp;
+ TimestampTz stats_timestamp; /* time of db stats file update */
/*
* tables and functions must be last in the struct, because we don't write
***************
*** 722,727 **** extern bool pgstat_track_activities;
--- 724,730 ----
extern bool pgstat_track_counts;
extern int pgstat_track_functions;
extern PGDLLIMPORT int pgstat_track_activity_query_size;
+ extern char *pgstat_stat_directory;
extern char *pgstat_stat_tmpname;
extern char *pgstat_stat_filename;
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers