On 3.2.2013 21:54, Jeff Janes wrote:
> On Sat, Feb 2, 2013 at 2:33 PM, Jeff Janes <[email protected]> wrote:
>> On Sat, Jan 5, 2013 at 8:03 PM, Tomas Vondra <[email protected]> wrote:
>>> On 3.1.2013 20:33, Magnus Hagander wrote:
>>>>
>>>> Yeah, +1 for a separate directory not in global.
>>>
>>> OK, I moved the files from "global/stat" to "stat".
>>
>> This has a warning:
>>
>> pgstat.c:5132: warning: 'pgstat_write_statsfile_needed' was used with
>> no prototype before its definition
>>
>> I plan to do some performance testing, but that will take a while so I
>> wanted to post this before I get distracted.
>
> Running "vacuumdb -a" on a cluster with 1000 db with 200 tables (x
> serial primary key) in each, I get log messages like this:
>
> last_statwrite 23682-06-18 22:36:52.960194-07 is later than
> collector's time 2013-02-03 12:49:19.700629-08 for db 16387
>
> Note the bizarre year in the first time stamp.
>
> If it matters, I got this after shutting down the cluster, blowing
> away $DATA/stat/*, then restarting it and invoking vacuumdb.
I somehow expected that hash_search zeroes all the fields of a new
entry, but looking at pgstat_get_db_entry that obviously is not the
case. So stats_timestamp (which tracks timestamp of the last write for a
DB) was random - that's where the bizarre year values came from.
I've added a proper initialization (to 0), and now it works as expected.
Although the whole sequence of errors I was getting was this:
LOG: last_statwrite 11133-08-28 19:22:31.711744+02 is later than
collector's time 2013-02-04 00:54:21.113439+01 for db 19093
WARNING: pgstat wait timeout
LOG: last_statwrite 39681-12-23 18:48:48.9093+01 is later than
collector's time 2013-02-04 00:54:31.424681+01 for db 46494
FATAL: could not find block containing chunk 0x2af4a60
LOG: statistics collector process (PID 10063) exited with exit code 1
WARNING: pgstat wait timeout
WARNING: pgstat wait timeout
I'm not entirely sure where the FATAL came from, but it seems it was
somehow related to the issue - it was quite reproducible, although I
don't see how exactly could this happen. There relevant block of code
looks like this:
char *writetime;
char *mytime;
/* Copy because timestamptz_to_str returns a static buffer */
writetime = pstrdup(timestamptz_to_str(dbentry->stats_timestamp));
mytime = pstrdup(timestamptz_to_str(cur_ts));
elog(LOG, "last_statwrite %s is later than collector's time %s for "
"db %d", writetime, mytime, dbentry->databaseid);
pfree(writetime);
pfree(mytime);
which seems quite fine to mee. I'm not sure how one of the pfree calls
could fail?
Anyway, attached is a patch that fixes all three issues, i.e.
1) the un-initialized timestamp
2) the "void" ommited from the signature
3) rename to "pg_stat" instead of just "stat"
Tomas
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index d318db9..6d0efe9 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -64,10 +64,14 @@
/* ----------
* Paths for the statistics files (relative to installation's $PGDATA).
+ * Permanent and temprorary, global and per-database files.
* ----------
*/
-#define PGSTAT_STAT_PERMANENT_FILENAME "global/pgstat.stat"
-#define PGSTAT_STAT_PERMANENT_TMPFILE "global/pgstat.tmp"
+#define PGSTAT_STAT_PERMANENT_DIRECTORY "pg_stat"
+#define PGSTAT_STAT_PERMANENT_FILENAME "pg_stat/global.stat"
+#define PGSTAT_STAT_PERMANENT_TMPFILE "pg_stat/global.tmp"
+#define PGSTAT_STAT_PERMANENT_DB_FILENAME "pg_stat/%d.stat"
+#define PGSTAT_STAT_PERMANENT_DB_TMPFILE "pg_stat/%d.tmp"
/* ----------
* Timer definitions.
@@ -115,8 +119,11 @@ int pgstat_track_activity_query_size = 1024;
* Built from GUC parameter
* ----------
*/
+char *pgstat_stat_directory = NULL;
char *pgstat_stat_filename = NULL;
char *pgstat_stat_tmpname = NULL;
+char *pgstat_stat_db_filename = NULL;
+char *pgstat_stat_db_tmpname = NULL;
/*
* BgWriter global statistics counters (unused in other processes).
@@ -219,11 +226,16 @@ static int localNumBackends = 0;
*/
static PgStat_GlobalStats globalStats;
-/* Last time the collector successfully wrote the stats file */
-static TimestampTz last_statwrite;
+/* Write request info for each database */
+typedef struct DBWriteRequest
+{
+ Oid databaseid; /* OID of the database to write */
+ TimestampTz request_time; /* timestamp of the last write request */
+} DBWriteRequest;
-/* Latest statistics request time from backends */
-static TimestampTz last_statrequest;
+/* Latest statistics request time from backends for each DB */
+static DBWriteRequest * last_statrequests = NULL;
+static int num_statrequests = 0;
static volatile bool need_exit = false;
static volatile bool got_SIGHUP = false;
@@ -252,11 +264,17 @@ static void pgstat_sighup_handler(SIGNAL_ARGS);
static PgStat_StatDBEntry *pgstat_get_db_entry(Oid databaseid, bool create);
static PgStat_StatTabEntry *pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry,
Oid tableoid, bool create);
-static void pgstat_write_statsfile(bool permanent);
-static HTAB *pgstat_read_statsfile(Oid onlydb, bool permanent);
+static void pgstat_write_statsfile(bool permanent, bool force);
+static void pgstat_write_db_statsfile(PgStat_StatDBEntry * dbentry, bool permanent);
+static void pgstat_write_db_dummyfile(Oid databaseid);
+static HTAB *pgstat_read_statsfile(Oid onlydb, bool permanent, bool onlydbs);
+static void pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash, bool permanent);
static void backend_read_statsfile(void);
static void pgstat_read_current_status(void);
+static bool pgstat_write_statsfile_needed(void);
+static bool pgstat_db_requested(Oid databaseid);
+
static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
static void pgstat_send_funcstats(void);
static HTAB *pgstat_collect_oids(Oid catalogid);
@@ -285,7 +303,6 @@ static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int le
static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
-
/* ------------------------------------------------------------
* Public functions called from postmaster follow
* ------------------------------------------------------------
@@ -549,8 +566,34 @@ startup_failed:
void
pgstat_reset_all(void)
{
- unlink(pgstat_stat_filename);
- unlink(PGSTAT_STAT_PERMANENT_FILENAME);
+ DIR * dir;
+ struct dirent * entry;
+
+ dir = AllocateDir(pgstat_stat_directory);
+ while ((entry = ReadDir(dir, pgstat_stat_directory)) != NULL)
+ {
+ char fname[strlen(pgstat_stat_directory) + strlen(entry->d_name) + 1];
+
+ if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
+ continue;
+
+ sprintf(fname, "%s/%s", pgstat_stat_directory, entry->d_name);
+ unlink(fname);
+ }
+ FreeDir(dir);
+
+ dir = AllocateDir(PGSTAT_STAT_PERMANENT_DIRECTORY);
+ while ((entry = ReadDir(dir, PGSTAT_STAT_PERMANENT_DIRECTORY)) != NULL)
+ {
+ char fname[strlen(PGSTAT_STAT_PERMANENT_FILENAME) + strlen(entry->d_name) + 1];
+
+ if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
+ continue;
+
+ sprintf(fname, "%s/%s", PGSTAT_STAT_PERMANENT_FILENAME, entry->d_name);
+ unlink(fname);
+ }
+ FreeDir(dir);
}
#ifdef EXEC_BACKEND
@@ -1408,13 +1451,14 @@ pgstat_ping(void)
* ----------
*/
static void
-pgstat_send_inquiry(TimestampTz clock_time, TimestampTz cutoff_time)
+pgstat_send_inquiry(TimestampTz clock_time, TimestampTz cutoff_time, Oid databaseid)
{
PgStat_MsgInquiry msg;
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_INQUIRY);
msg.clock_time = clock_time;
msg.cutoff_time = cutoff_time;
+ msg.databaseid = databaseid;
pgstat_send(&msg, sizeof(msg));
}
@@ -3004,6 +3048,7 @@ PgstatCollectorMain(int argc, char *argv[])
int len;
PgStat_Msg msg;
int wr;
+ bool first_write = true;
IsUnderPostmaster = true; /* we are a postmaster subprocess now */
@@ -3053,17 +3098,11 @@ PgstatCollectorMain(int argc, char *argv[])
init_ps_display("stats collector process", "", "", "");
/*
- * Arrange to write the initial status file right away
- */
- last_statrequest = GetCurrentTimestamp();
- last_statwrite = last_statrequest - 1;
-
- /*
* Read in an existing statistics stats file or initialize the stats to
- * zero.
+ * zero (read data for all databases, including table/func stats).
*/
pgStatRunningInCollector = true;
- pgStatDBHash = pgstat_read_statsfile(InvalidOid, true);
+ pgStatDBHash = pgstat_read_statsfile(InvalidOid, true, false);
/*
* Loop to process messages until we get SIGQUIT or detect ungraceful
@@ -3107,10 +3146,14 @@ PgstatCollectorMain(int argc, char *argv[])
/*
* Write the stats file if a new request has arrived that is not
- * satisfied by existing file.
+ * satisfied by existing file (force writing all files if it's
+ * the first write after startup).
*/
- if (last_statwrite < last_statrequest)
- pgstat_write_statsfile(false);
+ if (first_write || pgstat_write_statsfile_needed())
+ {
+ pgstat_write_statsfile(false, first_write);
+ first_write = false;
+ }
/*
* Try to receive and process a message. This will not block,
@@ -3269,7 +3312,7 @@ PgstatCollectorMain(int argc, char *argv[])
/*
* Save the final stats to reuse at next startup.
*/
- pgstat_write_statsfile(true);
+ pgstat_write_statsfile(true, true);
exit(0);
}
@@ -3349,6 +3392,7 @@ pgstat_get_db_entry(Oid databaseid, bool create)
result->n_block_write_time = 0;
result->stat_reset_timestamp = GetCurrentTimestamp();
+ result->stats_timestamp = 0;
memset(&hash_ctl, 0, sizeof(hash_ctl));
hash_ctl.keysize = sizeof(Oid);
@@ -3429,23 +3473,25 @@ pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, Oid tableoid, bool create)
* shutting down only), remove the temporary file so that backends
* starting up under a new postmaster can't read the old data before
* the new collector is ready.
+ *
+ * When the 'force' is false, only the requested databases (listed in
+ * last_statrequests) will be written. If 'force' is true, all databases
+ * will be written (this is used e.g. at shutdown).
* ----------
*/
static void
-pgstat_write_statsfile(bool permanent)
+pgstat_write_statsfile(bool permanent, bool force)
{
HASH_SEQ_STATUS hstat;
- HASH_SEQ_STATUS tstat;
- HASH_SEQ_STATUS fstat;
PgStat_StatDBEntry *dbentry;
- PgStat_StatTabEntry *tabentry;
- PgStat_StatFuncEntry *funcentry;
FILE *fpout;
int32 format_id;
const char *tmpfile = permanent ? PGSTAT_STAT_PERMANENT_TMPFILE : pgstat_stat_tmpname;
const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
int rc;
+ elog(DEBUG1, "writing statsfile '%s'", statfile);
+
/*
* Open the statistics temp file to write out the current values.
*/
@@ -3484,6 +3530,20 @@ pgstat_write_statsfile(bool permanent)
while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
{
/*
+ * Write our the tables and functions into a separate file, but only
+ * if the database is in the requests or if it's a forced write (then
+ * all the DBs need to be written - e.g. at the shutdown).
+ *
+ * We need to do this before the dbentry write to write the proper
+ * timestamp to the global file.
+ */
+ if (force || pgstat_db_requested(dbentry->databaseid)) {
+ elog(DEBUG1, "writing statsfile for DB %d", dbentry->databaseid);
+ dbentry->stats_timestamp = globalStats.stats_timestamp;
+ pgstat_write_db_statsfile(dbentry, permanent);
+ }
+
+ /*
* Write out the DB entry including the number of live backends. We
* don't write the tables or functions pointers, since they're of no
* use to any other process.
@@ -3493,29 +3553,10 @@ pgstat_write_statsfile(bool permanent)
(void) rc; /* we'll check for error with ferror */
/*
- * Walk through the database's access stats per table.
- */
- hash_seq_init(&tstat, dbentry->tables);
- while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != NULL)
- {
- fputc('T', fpout);
- rc = fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout);
- (void) rc; /* we'll check for error with ferror */
- }
-
- /*
- * Walk through the database's function stats table.
- */
- hash_seq_init(&fstat, dbentry->functions);
- while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&fstat)) != NULL)
- {
- fputc('F', fpout);
- rc = fwrite(funcentry, sizeof(PgStat_StatFuncEntry), 1, fpout);
- (void) rc; /* we'll check for error with ferror */
- }
-
- /*
* Mark the end of this DB
+ *
+ * TODO Does using these chars still make sense, when the tables/func
+ * stats are moved to a separate file?
*/
fputc('d', fpout);
}
@@ -3527,6 +3568,28 @@ pgstat_write_statsfile(bool permanent)
*/
fputc('E', fpout);
+ /* In any case, we can just throw away all the db requests, but we need to
+ * write dummy files for databases without a stat entry (it would cause
+ * issues in pgstat_read_db_statsfile_timestamp and pgstat wait timeouts).
+ * This may happend e.g. for shared DB (oid = 0) right after initdb.
+ */
+ if (last_statrequests != NULL)
+ {
+ int i = 0;
+ for (i = 0; i < num_statrequests; i++)
+ {
+ /* Create dummy files for requested databases without a proper
+ * dbentry. It's much easier this way than dealing with multiple
+ * timestamps, possibly existing but not yet written DBs etc. */
+ if (! pgstat_get_db_entry(last_statrequests[i].databaseid, false))
+ pgstat_write_db_dummyfile(last_statrequests[i].databaseid);
+ }
+
+ pfree(last_statrequests);
+ last_statrequests = NULL;
+ num_statrequests = 0;
+ }
+
if (ferror(fpout))
{
ereport(LOG,
@@ -3552,57 +3615,247 @@ pgstat_write_statsfile(bool permanent)
tmpfile, statfile)));
unlink(tmpfile);
}
- else
+
+ if (permanent)
+ unlink(pgstat_stat_filename);
+}
+
+
+/* ----------
+ * pgstat_write_db_statsfile() -
+ *
+ * Tell the news. This writes stats file for a single database.
+ *
+ * If writing to the permanent file (happens when the collector is
+ * shutting down only), remove the temporary file so that backends
+ * starting up under a new postmaster can't read the old data before
+ * the new collector is ready.
+ * ----------
+ */
+static void
+pgstat_write_db_statsfile(PgStat_StatDBEntry * dbentry, bool permanent)
+{
+ HASH_SEQ_STATUS tstat;
+ HASH_SEQ_STATUS fstat;
+ PgStat_StatTabEntry *tabentry;
+ PgStat_StatFuncEntry *funcentry;
+ FILE *fpout;
+ int32 format_id;
+ const char *tmpfile = permanent ? PGSTAT_STAT_PERMANENT_DB_TMPFILE : pgstat_stat_db_tmpname;
+ const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_DB_FILENAME : pgstat_stat_db_filename;
+ int rc;
+
+ /*
+ * OIDs are 32-bit values, so 10 chars should be safe, +1 for the \0 byte
+ */
+ char db_tmpfile[strlen(tmpfile) + 11];
+ char db_statfile[strlen(statfile) + 11];
+
+ /*
+ * Append database OID at the end of the basic filename (both for tmp and target file).
+ */
+ snprintf(db_tmpfile, strlen(tmpfile) + 11, tmpfile, dbentry->databaseid);
+ snprintf(db_statfile, strlen(statfile) + 11, statfile, dbentry->databaseid);
+
+ elog(DEBUG1, "writing statsfile '%s'", db_statfile);
+
+ /*
+ * Open the statistics temp file to write out the current values.
+ */
+ fpout = AllocateFile(db_tmpfile, PG_BINARY_W);
+ if (fpout == NULL)
{
- /*
- * Successful write, so update last_statwrite.
- */
- last_statwrite = globalStats.stats_timestamp;
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not open temporary statistics file \"%s\": %m",
+ db_tmpfile)));
+ return;
+ }
- /*
- * If there is clock skew between backends and the collector, we could
- * receive a stats request time that's in the future. If so, complain
- * and reset last_statrequest. Resetting ensures that no inquiry
- * message can cause more than one stats file write to occur.
- */
- if (last_statrequest > last_statwrite)
- {
- char *reqtime;
- char *mytime;
-
- /* Copy because timestamptz_to_str returns a static buffer */
- reqtime = pstrdup(timestamptz_to_str(last_statrequest));
- mytime = pstrdup(timestamptz_to_str(last_statwrite));
- elog(LOG, "last_statrequest %s is later than collector's time %s",
- reqtime, mytime);
- pfree(reqtime);
- pfree(mytime);
-
- last_statrequest = last_statwrite;
- }
+ /*
+ * Write the file header --- currently just a format ID.
+ */
+ format_id = PGSTAT_FILE_FORMAT_ID;
+ rc = fwrite(&format_id, sizeof(format_id), 1, fpout);
+ (void) rc; /* we'll check for error with ferror */
+
+ /*
+ * Write the timestamp.
+ */
+ rc = fwrite(&(globalStats.stats_timestamp), sizeof(globalStats.stats_timestamp), 1, fpout);
+ (void) rc; /* we'll check for error with ferror */
+
+ /*
+ * Walk through the database's access stats per table.
+ */
+ hash_seq_init(&tstat, dbentry->tables);
+ while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != NULL)
+ {
+ fputc('T', fpout);
+ rc = fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout);
+ (void) rc; /* we'll check for error with ferror */
}
+ /*
+ * Walk through the database's function stats table.
+ */
+ hash_seq_init(&fstat, dbentry->functions);
+ while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&fstat)) != NULL)
+ {
+ fputc('F', fpout);
+ rc = fwrite(funcentry, sizeof(PgStat_StatFuncEntry), 1, fpout);
+ (void) rc; /* we'll check for error with ferror */
+ }
+
+ /*
+ * No more output to be done. Close the temp file and replace the old
+ * pgstat.stat with it. The ferror() check replaces testing for error
+ * after each individual fputc or fwrite above.
+ */
+ fputc('E', fpout);
+
+ if (ferror(fpout))
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not write temporary statistics file \"%s\": %m",
+ db_tmpfile)));
+ FreeFile(fpout);
+ unlink(db_tmpfile);
+ }
+ else if (FreeFile(fpout) < 0)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not close temporary statistics file \"%s\": %m",
+ db_tmpfile)));
+ unlink(db_tmpfile);
+ }
+ else if (rename(db_tmpfile, db_statfile) < 0)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
+ db_tmpfile, db_statfile)));
+ unlink(db_tmpfile);
+ }
+
if (permanent)
- unlink(pgstat_stat_filename);
+ {
+ char db_statfile[strlen(pgstat_stat_db_filename) + 11];
+ snprintf(db_statfile, strlen(pgstat_stat_db_filename) + 11,
+ pgstat_stat_db_filename, dbentry->databaseid);
+ elog(DEBUG1, "removing temporary stat file '%s'", db_statfile);
+ unlink(db_statfile);
+ }
}
/* ----------
+ * pgstat_write_db_dummyfile() -
+ *
+ * All this does is writing a dummy stat file for databases without dbentry
+ * yet. It basically writes just a file header - format ID and a timestamp.
+ * ----------
+ */
+static void
+pgstat_write_db_dummyfile(Oid databaseid)
+{
+ FILE *fpout;
+ int32 format_id;
+ int rc;
+
+ /*
+ * OIDs are 32-bit values, so 10 chars should be safe, +1 for the \0 byte
+ */
+ char db_tmpfile[strlen(pgstat_stat_db_tmpname) + 11];
+ char db_statfile[strlen(pgstat_stat_db_filename) + 11];
+
+ /*
+ * Append database OID at the end of the basic filename (both for tmp and target file).
+ */
+ snprintf(db_tmpfile, strlen(pgstat_stat_db_tmpname) + 11, pgstat_stat_db_tmpname, databaseid);
+ snprintf(db_statfile, strlen(pgstat_stat_db_filename) + 11, pgstat_stat_db_filename, databaseid);
+
+ elog(DEBUG1, "writing statsfile '%s'", db_statfile);
+
+ /*
+ * Open the statistics temp file to write out the current values.
+ */
+ fpout = AllocateFile(db_tmpfile, PG_BINARY_W);
+ if (fpout == NULL)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not open temporary statistics file \"%s\": %m",
+ db_tmpfile)));
+ return;
+ }
+
+ /*
+ * Write the file header --- currently just a format ID.
+ */
+ format_id = PGSTAT_FILE_FORMAT_ID;
+ rc = fwrite(&format_id, sizeof(format_id), 1, fpout);
+ (void) rc; /* we'll check for error with ferror */
+
+ /*
+ * Write the timestamp.
+ */
+ rc = fwrite(&(globalStats.stats_timestamp), sizeof(globalStats.stats_timestamp), 1, fpout);
+ (void) rc; /* we'll check for error with ferror */
+
+ /*
+ * No more output to be done. Close the temp file and replace the old
+ * pgstat.stat with it. The ferror() check replaces testing for error
+ * after each individual fputc or fwrite above.
+ */
+ fputc('E', fpout);
+
+ if (ferror(fpout))
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not write temporary dummy statistics file \"%s\": %m",
+ db_tmpfile)));
+ FreeFile(fpout);
+ unlink(db_tmpfile);
+ }
+ else if (FreeFile(fpout) < 0)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not close temporary dummy statistics file \"%s\": %m",
+ db_tmpfile)));
+ unlink(db_tmpfile);
+ }
+ else if (rename(db_tmpfile, db_statfile) < 0)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not rename temporary dummy statistics file \"%s\" to \"%s\": %m",
+ db_tmpfile, db_statfile)));
+ unlink(db_tmpfile);
+ }
+
+}
+
+/* ----------
* pgstat_read_statsfile() -
*
* Reads in an existing statistics collector file and initializes the
* databases' hash table (whose entries point to the tables' hash tables).
+ *
+ * Allows reading only the global stats (at database level), which is just
+ * enough for many purposes (e.g. autovacuum launcher etc.). If this is
+ * sufficient for you, use onlydbs=true.
* ----------
*/
static HTAB *
-pgstat_read_statsfile(Oid onlydb, bool permanent)
+pgstat_read_statsfile(Oid onlydb, bool permanent, bool onlydbs)
{
PgStat_StatDBEntry *dbentry;
PgStat_StatDBEntry dbbuf;
- PgStat_StatTabEntry *tabentry;
- PgStat_StatTabEntry tabbuf;
- PgStat_StatFuncEntry funcbuf;
- PgStat_StatFuncEntry *funcentry;
HASHCTL hash_ctl;
HTAB *dbhash;
HTAB *tabhash = NULL;
@@ -3613,6 +3866,11 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
/*
+ * If we want a db-level stats only, we don't want a particular db.
+ */
+ Assert(!((onlydb != InvalidOid) && onlydbs));
+
+ /*
* The tables will live in pgStatLocalContext.
*/
pgstat_setup_memcxt();
@@ -3758,6 +4016,16 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
*/
tabhash = dbentry->tables;
funchash = dbentry->functions;
+
+ /*
+ * Read the data from the file for this database. If there was
+ * onlydb specified (!= InvalidOid), we would not get here because
+ * of a break above. So we don't need to recheck.
+ */
+ if (! onlydbs)
+ pgstat_read_db_statsfile(dbentry->databaseid, tabhash, funchash,
+ permanent);
+
break;
/*
@@ -3768,6 +4036,105 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
funchash = NULL;
break;
+ case 'E':
+ goto done;
+
+ default:
+ ereport(pgStatRunningInCollector ? LOG : WARNING,
+ (errmsg("corrupted statistics file \"%s\"",
+ statfile)));
+ goto done;
+ }
+ }
+
+done:
+ FreeFile(fpin);
+
+ if (permanent)
+ unlink(PGSTAT_STAT_PERMANENT_FILENAME);
+
+ return dbhash;
+}
+
+
+/* ----------
+ * pgstat_read_db_statsfile() -
+ *
+ * Reads in an existing statistics collector db file and initializes the
+ * tables and functions hash tables (for the database identified by Oid).
+ * ----------
+ */
+static void
+pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash, bool permanent)
+{
+ PgStat_StatTabEntry *tabentry;
+ PgStat_StatTabEntry tabbuf;
+ PgStat_StatFuncEntry funcbuf;
+ PgStat_StatFuncEntry *funcentry;
+ FILE *fpin;
+ int32 format_id;
+ TimestampTz timestamp;
+ bool found;
+ const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_DB_FILENAME : pgstat_stat_db_filename;
+
+ /*
+ * OIDs are 32-bit values, so 10 chars should be safe, +1 for the \0 byte
+ */
+ char db_statfile[strlen(statfile) + 11];
+
+ /*
+ * Append database OID at the end of the basic filename (both for tmp and target file).
+ */
+ snprintf(db_statfile, strlen(statfile) + 11, statfile, databaseid);
+
+ /*
+ * Try to open the status file. If it doesn't exist, the backends simply
+ * return zero for anything and the collector simply starts from scratch
+ * with empty counters.
+ *
+ * ENOENT is a possibility if the stats collector is not running or has
+ * not yet written the stats file the first time. Any other failure
+ * condition is suspicious.
+ */
+ if ((fpin = AllocateFile(db_statfile, PG_BINARY_R)) == NULL)
+ {
+ if (errno != ENOENT)
+ ereport(pgStatRunningInCollector ? LOG : WARNING,
+ (errcode_for_file_access(),
+ errmsg("could not open statistics file \"%s\": %m",
+ db_statfile)));
+ return;
+ }
+
+ /*
+ * Verify it's of the expected format.
+ */
+ if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id)
+ || format_id != PGSTAT_FILE_FORMAT_ID)
+ {
+ ereport(pgStatRunningInCollector ? LOG : WARNING,
+ (errmsg("corrupted statistics file \"%s\"", db_statfile)));
+ goto done;
+ }
+
+ /*
+ * Read global stats struct
+ */
+ if (fread(×tamp, 1, sizeof(timestamp), fpin) != sizeof(timestamp))
+ {
+ ereport(pgStatRunningInCollector ? LOG : WARNING,
+ (errmsg("corrupted statistics file \"%s\"", db_statfile)));
+ goto done;
+ }
+
+ /*
+ * We found an existing collector stats file. Read it and put all the
+ * hashtable entries into place.
+ */
+ for (;;)
+ {
+ switch (fgetc(fpin))
+ {
/*
* 'T' A PgStat_StatTabEntry follows.
*/
@@ -3777,7 +4144,7 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
{
ereport(pgStatRunningInCollector ? LOG : WARNING,
(errmsg("corrupted statistics file \"%s\"",
- statfile)));
+ db_statfile)));
goto done;
}
@@ -3795,7 +4162,7 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
{
ereport(pgStatRunningInCollector ? LOG : WARNING,
(errmsg("corrupted statistics file \"%s\"",
- statfile)));
+ db_statfile)));
goto done;
}
@@ -3811,7 +4178,7 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
{
ereport(pgStatRunningInCollector ? LOG : WARNING,
(errmsg("corrupted statistics file \"%s\"",
- statfile)));
+ db_statfile)));
goto done;
}
@@ -3829,7 +4196,7 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
{
ereport(pgStatRunningInCollector ? LOG : WARNING,
(errmsg("corrupted statistics file \"%s\"",
- statfile)));
+ db_statfile)));
goto done;
}
@@ -3845,7 +4212,7 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
default:
ereport(pgStatRunningInCollector ? LOG : WARNING,
(errmsg("corrupted statistics file \"%s\"",
- statfile)));
+ db_statfile)));
goto done;
}
}
@@ -3854,37 +4221,47 @@ done:
FreeFile(fpin);
if (permanent)
- unlink(PGSTAT_STAT_PERMANENT_FILENAME);
+ {
+ char db_statfile[strlen(PGSTAT_STAT_PERMANENT_DB_FILENAME) + 11];
+ snprintf(db_statfile, strlen(PGSTAT_STAT_PERMANENT_DB_FILENAME) + 11,
+ PGSTAT_STAT_PERMANENT_DB_FILENAME, databaseid);
+ elog(DEBUG1, "removing permanent stats file '%s'", db_statfile);
+ unlink(db_statfile);
+ }
- return dbhash;
+ return;
}
/* ----------
- * pgstat_read_statsfile_timestamp() -
+ * pgstat_read_db_statsfile_timestamp() -
*
- * Attempt to fetch the timestamp of an existing stats file.
+ * Attempt to fetch the timestamp of an existing stats file (for a DB).
* Returns TRUE if successful (timestamp is stored at *ts).
* ----------
*/
static bool
-pgstat_read_statsfile_timestamp(bool permanent, TimestampTz *ts)
+pgstat_read_db_statsfile_timestamp(Oid databaseid, bool permanent, TimestampTz *ts)
{
- PgStat_GlobalStats myGlobalStats;
+ TimestampTz timestamp;
FILE *fpin;
int32 format_id;
- const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
+ const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_DB_FILENAME : pgstat_stat_db_filename;
+ char db_statfile[strlen(statfile) + 11];
+
+ /* format the db statfile filename */
+ snprintf(db_statfile, strlen(statfile) + 11, statfile, databaseid);
/*
* Try to open the status file. As above, anything but ENOENT is worthy
* of complaining about.
*/
- if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
+ if ((fpin = AllocateFile(db_statfile, PG_BINARY_R)) == NULL)
{
if (errno != ENOENT)
ereport(pgStatRunningInCollector ? LOG : WARNING,
(errcode_for_file_access(),
errmsg("could not open statistics file \"%s\": %m",
- statfile)));
+ db_statfile)));
return false;
}
@@ -3895,7 +4272,7 @@ pgstat_read_statsfile_timestamp(bool permanent, TimestampTz *ts)
|| format_id != PGSTAT_FILE_FORMAT_ID)
{
ereport(pgStatRunningInCollector ? LOG : WARNING,
- (errmsg("corrupted statistics file \"%s\"", statfile)));
+ (errmsg("corrupted statistics file \"%s\"", db_statfile)));
FreeFile(fpin);
return false;
}
@@ -3903,15 +4280,15 @@ pgstat_read_statsfile_timestamp(bool permanent, TimestampTz *ts)
/*
* Read global stats struct
*/
- if (fread(&myGlobalStats, 1, sizeof(myGlobalStats), fpin) != sizeof(myGlobalStats))
+ if (fread(×tamp, 1, sizeof(TimestampTz), fpin) != sizeof(TimestampTz))
{
ereport(pgStatRunningInCollector ? LOG : WARNING,
- (errmsg("corrupted statistics file \"%s\"", statfile)));
+ (errmsg("corrupted statistics file \"%s\"", db_statfile)));
FreeFile(fpin);
return false;
}
- *ts = myGlobalStats.stats_timestamp;
+ *ts = timestamp;
FreeFile(fpin);
return true;
@@ -3947,7 +4324,7 @@ backend_read_statsfile(void)
CHECK_FOR_INTERRUPTS();
- ok = pgstat_read_statsfile_timestamp(false, &file_ts);
+ ok = pgstat_read_db_statsfile_timestamp(MyDatabaseId, false, &file_ts);
cur_ts = GetCurrentTimestamp();
/* Calculate min acceptable timestamp, if we didn't already */
@@ -4006,7 +4383,7 @@ backend_read_statsfile(void)
pfree(mytime);
}
- pgstat_send_inquiry(cur_ts, min_ts);
+ pgstat_send_inquiry(cur_ts, min_ts, MyDatabaseId);
break;
}
@@ -4016,7 +4393,7 @@ backend_read_statsfile(void)
/* Not there or too old, so kick the collector and wait a bit */
if ((count % PGSTAT_INQ_LOOP_COUNT) == 0)
- pgstat_send_inquiry(cur_ts, min_ts);
+ pgstat_send_inquiry(cur_ts, min_ts, MyDatabaseId);
pg_usleep(PGSTAT_RETRY_DELAY * 1000L);
}
@@ -4026,9 +4403,16 @@ backend_read_statsfile(void)
/* Autovacuum launcher wants stats about all databases */
if (IsAutoVacuumLauncherProcess())
- pgStatDBHash = pgstat_read_statsfile(InvalidOid, false);
+ /*
+ * FIXME Does it really need info including tables/functions? Or is it enough to read
+ * database-level stats? It seems to me the launcher needs PgStat_StatDBEntry only
+ * (at least that's how I understand the rebuild_database_list() in autovacuum.c),
+ * because pgstat_stattabentries are used in do_autovacuum() only, that that's what's
+ * executed in workers ... So maybe we'd be just fine by reading in the dbentries?
+ */
+ pgStatDBHash = pgstat_read_statsfile(InvalidOid, false, true);
else
- pgStatDBHash = pgstat_read_statsfile(MyDatabaseId, false);
+ pgStatDBHash = pgstat_read_statsfile(MyDatabaseId, false, false);
}
@@ -4084,44 +4468,84 @@ pgstat_clear_snapshot(void)
static void
pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len)
{
- /*
- * Advance last_statrequest if this requestor has a newer cutoff time
- * than any previous request.
- */
- if (msg->cutoff_time > last_statrequest)
- last_statrequest = msg->cutoff_time;
+ int i = 0;
+ bool found = false;
+ PgStat_StatDBEntry *dbentry;
+
+ elog(DEBUG1, "received inquiry for %d", msg->databaseid);
/*
- * If the requestor's local clock time is older than last_statwrite, we
- * should suspect a clock glitch, ie system time going backwards; though
- * the more likely explanation is just delayed message receipt. It is
- * worth expending a GetCurrentTimestamp call to be sure, since a large
- * retreat in the system clock reading could otherwise cause us to neglect
- * to update the stats file for a long time.
+ * Find the last write request for this DB (found=true in that case). Plain
+ * linear search, not really worth doing any magic here (probably).
*/
- if (msg->clock_time < last_statwrite)
+ for (i = 0; i < num_statrequests; i++)
+ {
+ if (last_statrequests[i].databaseid == msg->databaseid)
+ {
+ found = true;
+ break;
+ }
+ }
+
+ if (found)
+ {
+ /*
+ * There already is a request for this DB, so lets advance the
+ * request time if this requestor has a newer cutoff time
+ * than any previous request.
+ */
+ if (msg->cutoff_time > last_statrequests[i].request_time)
+ last_statrequests[i].request_time = msg->cutoff_time;
+ }
+ else
{
- TimestampTz cur_ts = GetCurrentTimestamp();
+ /*
+ * There's no request for this DB yet, so lets create it (allocate a
+ * space for it, set the values).
+ */
+ if (last_statrequests == NULL)
+ last_statrequests = palloc(sizeof(DBWriteRequest));
+ else
+ last_statrequests = repalloc(last_statrequests,
+ (num_statrequests + 1)*sizeof(DBWriteRequest));
+
+ last_statrequests[num_statrequests].databaseid = msg->databaseid;
+ last_statrequests[num_statrequests].request_time = msg->clock_time;
+ num_statrequests += 1;
- if (cur_ts < last_statwrite)
+ /*
+ * If the requestor's local clock time is older than last_statwrite, we
+ * should suspect a clock glitch, ie system time going backwards; though
+ * the more likely explanation is just delayed message receipt. It is
+ * worth expending a GetCurrentTimestamp call to be sure, since a large
+ * retreat in the system clock reading could otherwise cause us to neglect
+ * to update the stats file for a long time.
+ */
+ dbentry = pgstat_get_db_entry(msg->databaseid, false);
+ if ((dbentry != NULL) && (msg->clock_time < dbentry->stats_timestamp))
{
- /*
- * Sure enough, time went backwards. Force a new stats file write
- * to get back in sync; but first, log a complaint.
- */
- char *writetime;
- char *mytime;
-
- /* Copy because timestamptz_to_str returns a static buffer */
- writetime = pstrdup(timestamptz_to_str(last_statwrite));
- mytime = pstrdup(timestamptz_to_str(cur_ts));
- elog(LOG, "last_statwrite %s is later than collector's time %s",
- writetime, mytime);
- pfree(writetime);
- pfree(mytime);
-
- last_statrequest = cur_ts;
- last_statwrite = last_statrequest - 1;
+ TimestampTz cur_ts = GetCurrentTimestamp();
+
+ if (cur_ts < dbentry->stats_timestamp)
+ {
+ /*
+ * Sure enough, time went backwards. Force a new stats file write
+ * to get back in sync; but first, log a complaint.
+ */
+ char *writetime;
+ char *mytime;
+
+ /* Copy because timestamptz_to_str returns a static buffer */
+ writetime = pstrdup(timestamptz_to_str(dbentry->stats_timestamp));
+ mytime = pstrdup(timestamptz_to_str(cur_ts));
+ elog(LOG, "last_statwrite %s is later than collector's time %s for "
+ "db %d", writetime, mytime, dbentry->databaseid);
+ pfree(writetime);
+ pfree(mytime);
+
+ last_statrequests[num_statrequests].request_time = cur_ts;
+ dbentry->stats_timestamp = cur_ts - 1;
+ }
}
}
}
@@ -4278,10 +4702,17 @@ pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len)
dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
/*
- * If found, remove it.
+ * If found, remove it (along with the db statfile).
*/
if (dbentry)
{
+ char db_statfile[strlen(pgstat_stat_db_filename) + 11];
+ snprintf(db_statfile, strlen(pgstat_stat_db_filename) + 11,
+ pgstat_stat_filename, dbentry->databaseid);
+
+ elog(DEBUG1, "removing %s", db_statfile);
+ unlink(db_statfile);
+
if (dbentry->tables != NULL)
hash_destroy(dbentry->tables);
if (dbentry->functions != NULL)
@@ -4687,3 +5118,58 @@ pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len)
HASH_REMOVE, NULL);
}
}
+
+/* ----------
+ * pgstat_write_statsfile_needed() -
+ *
+ * Checks whether there's a db stats request, requiring a file write.
+ *
+ * TODO Seems that thanks the way we handle last_statrequests (erase after
+ * a write), this is unnecessary. Just check that there's at least one
+ * request and you're done. Although there might be delayed requests ...
+ * ----------
+ */
+
+static bool pgstat_write_statsfile_needed(void)
+{
+ int i = 0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* Check the databases if they need to refresh the stats. */
+ for (i = 0; i < num_statrequests; i++)
+ {
+ dbentry = pgstat_get_db_entry(last_statrequests[i].databaseid, false);
+
+ /* No dbentry yet or too old. */
+ if ((! dbentry) ||
+ (dbentry->stats_timestamp < last_statrequests[i].request_time)) {
+ return true;
+ }
+
+ }
+
+ /* Well, everything was written recently ... */
+ return false;
+}
+
+/* ----------
+ * pgstat_write_statsfile_needed() -
+ *
+ * Checks whether stats for a particular DB need to be written to a file).
+ * ----------
+ */
+
+static bool
+pgstat_db_requested(Oid databaseid)
+{
+ int i = 0;
+
+ /* Check the databases if they need to refresh the stats. */
+ for (i = 0; i < num_statrequests; i++)
+ {
+ if (last_statrequests[i].databaseid == databaseid)
+ return true;
+ }
+
+ return false;
+}
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index b0af9f5..08ef324 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -8709,20 +8709,43 @@ static void
assign_pgstat_temp_directory(const char *newval, void *extra)
{
/* check_canonical_path already canonicalized newval for us */
+ char *dname;
char *tname;
char *fname;
-
- tname = guc_malloc(ERROR, strlen(newval) + 12); /* /pgstat.tmp */
- sprintf(tname, "%s/pgstat.tmp", newval);
- fname = guc_malloc(ERROR, strlen(newval) + 13); /* /pgstat.stat */
- sprintf(fname, "%s/pgstat.stat", newval);
-
+ char *tname_db;
+ char *fname_db;
+
+ /* directory */
+ dname = guc_malloc(ERROR, strlen(newval) + 1); /* runtime dir */
+ sprintf(dname, "%s", newval);
+
+ /* global stats */
+ tname = guc_malloc(ERROR, strlen(newval) + 12); /* /global.tmp */
+ sprintf(tname, "%s/global.tmp", newval);
+ fname = guc_malloc(ERROR, strlen(newval) + 13); /* /global.stat */
+ sprintf(fname, "%s/global.stat", newval);
+
+ /* per-db stats */
+ tname_db = guc_malloc(ERROR, strlen(newval) + 8); /* /%d.tmp */
+ sprintf(tname_db, "%s/%%d.tmp", newval);
+ fname_db = guc_malloc(ERROR, strlen(newval) + 9); /* /%d.stat */
+ sprintf(fname_db, "%s/%%d.stat", newval);
+
+ if (pgstat_stat_directory)
+ free(pgstat_stat_directory);
+ pgstat_stat_directory = dname;
if (pgstat_stat_tmpname)
free(pgstat_stat_tmpname);
pgstat_stat_tmpname = tname;
if (pgstat_stat_filename)
free(pgstat_stat_filename);
pgstat_stat_filename = fname;
+ if (pgstat_stat_db_tmpname)
+ free(pgstat_stat_db_tmpname);
+ pgstat_stat_db_tmpname = tname_db;
+ if (pgstat_stat_db_filename)
+ free(pgstat_stat_db_filename);
+ pgstat_stat_db_filename = fname_db;
}
static bool
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 1bba426..da1e19f 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -192,6 +192,7 @@ const char *subdirs[] = {
"base",
"base/1",
"pg_tblspc",
+ "pg_stat",
"pg_stat_tmp"
};
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 03c0174..d7d4ad9 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -205,6 +205,7 @@ typedef struct PgStat_MsgInquiry
PgStat_MsgHdr m_hdr;
TimestampTz clock_time; /* observed local clock time */
TimestampTz cutoff_time; /* minimum acceptable file timestamp */
+ Oid databaseid; /* requested DB (InvalidOid => all DBs) */
} PgStat_MsgInquiry;
@@ -514,7 +515,7 @@ typedef union PgStat_Msg
* ------------------------------------------------------------
*/
-#define PGSTAT_FILE_FORMAT_ID 0x01A5BC9A
+#define PGSTAT_FILE_FORMAT_ID 0xA240CA47
/* ----------
* PgStat_StatDBEntry The collector's data per database
@@ -545,6 +546,7 @@ typedef struct PgStat_StatDBEntry
PgStat_Counter n_block_write_time;
TimestampTz stat_reset_timestamp;
+ TimestampTz stats_timestamp; /* time of db stats file update */
/*
* tables and functions must be last in the struct, because we don't write
@@ -722,8 +724,11 @@ extern bool pgstat_track_activities;
extern bool pgstat_track_counts;
extern int pgstat_track_functions;
extern PGDLLIMPORT int pgstat_track_activity_query_size;
+extern char *pgstat_stat_directory;
extern char *pgstat_stat_tmpname;
extern char *pgstat_stat_filename;
+extern char *pgstat_stat_db_tmpname;
+extern char *pgstat_stat_db_filename;
/*
* BgWriter statistics counters are updated directly by bgwriter and bufmgr
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers