On 3.1.2013 20:33, Magnus Hagander wrote:
> On Thu, Jan 3, 2013 at 8:31 PM, Tomas Vondra <[email protected]> wrote:
>> On 3.1.2013 18:47, Heikki Linnakangas wrote:
>>> How about creating the new directory as a direct subdir of $PGDATA,
>>> rather than buried in global? "global" is supposed to contain data
>>> related to shared catalog relations (plus pg_control), so it doesn't
>>> seem like the right location for per-database stat files. Also, if we're
>>> going to have admins manually zapping the directory (hopefully when the
>>> system is offline), that's less scary if the directory is not buried as
>>> deep.
>>
>> That's clearly possible and it's a trivial change. I was thinking about
>> that actually, but then I placed the directory into "global" because
>> that's where the "pgstat.stat" originally was.
>
> Yeah, +1 for a separate directory not in global.
OK, I moved the files from "global/stat" to "stat".
Tomas
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index be3adf1..4ec485e 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -64,10 +64,14 @@
/* ----------
* Paths for the statistics files (relative to installation's $PGDATA).
+ * Permanent and temprorary, global and per-database files.
* ----------
*/
-#define PGSTAT_STAT_PERMANENT_FILENAME "global/pgstat.stat"
-#define PGSTAT_STAT_PERMANENT_TMPFILE "global/pgstat.tmp"
+#define PGSTAT_STAT_PERMANENT_DIRECTORY "stat"
+#define PGSTAT_STAT_PERMANENT_FILENAME "stat/global.stat"
+#define PGSTAT_STAT_PERMANENT_TMPFILE "stat/global.tmp"
+#define PGSTAT_STAT_PERMANENT_DB_FILENAME "stat/%d.stat"
+#define PGSTAT_STAT_PERMANENT_DB_TMPFILE "stat/%d.tmp"
/* ----------
* Timer definitions.
@@ -115,8 +119,11 @@ int
pgstat_track_activity_query_size = 1024;
* Built from GUC parameter
* ----------
*/
+char *pgstat_stat_directory = NULL;
char *pgstat_stat_filename = NULL;
char *pgstat_stat_tmpname = NULL;
+char *pgstat_stat_db_filename = NULL;
+char *pgstat_stat_db_tmpname = NULL;
/*
* BgWriter global statistics counters (unused in other processes).
@@ -219,11 +226,16 @@ static int localNumBackends = 0;
*/
static PgStat_GlobalStats globalStats;
-/* Last time the collector successfully wrote the stats file */
-static TimestampTz last_statwrite;
+/* Write request info for each database */
+typedef struct DBWriteRequest
+{
+ Oid databaseid; /* OID of the database
to write */
+ TimestampTz request_time; /* timestamp of the last write request
*/
+} DBWriteRequest;
-/* Latest statistics request time from backends */
-static TimestampTz last_statrequest;
+/* Latest statistics request time from backends for each DB */
+static DBWriteRequest * last_statrequests = NULL;
+static int num_statrequests = 0;
static volatile bool need_exit = false;
static volatile bool got_SIGHUP = false;
@@ -252,11 +264,17 @@ static void pgstat_sighup_handler(SIGNAL_ARGS);
static PgStat_StatDBEntry *pgstat_get_db_entry(Oid databaseid, bool create);
static PgStat_StatTabEntry *pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry,
Oid tableoid, bool create);
-static void pgstat_write_statsfile(bool permanent);
-static HTAB *pgstat_read_statsfile(Oid onlydb, bool permanent);
+static void pgstat_write_statsfile(bool permanent, bool force);
+static void pgstat_write_db_statsfile(PgStat_StatDBEntry * dbentry, bool
permanent);
+static void pgstat_write_db_dummyfile(Oid databaseid);
+static HTAB *pgstat_read_statsfile(Oid onlydb, bool permanent, bool onlydbs);
+static void pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB
*funchash, bool permanent);
static void backend_read_statsfile(void);
static void pgstat_read_current_status(void);
+static bool pgstat_write_statsfile_needed();
+static bool pgstat_db_requested(Oid databaseid);
+
static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
static void pgstat_send_funcstats(void);
static HTAB *pgstat_collect_oids(Oid catalogid);
@@ -285,7 +303,6 @@ static void
pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int le
static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
-
/* ------------------------------------------------------------
* Public functions called from postmaster follow
* ------------------------------------------------------------
@@ -549,8 +566,34 @@ startup_failed:
void
pgstat_reset_all(void)
{
- unlink(pgstat_stat_filename);
- unlink(PGSTAT_STAT_PERMANENT_FILENAME);
+ DIR * dir;
+ struct dirent * entry;
+
+ dir = AllocateDir(pgstat_stat_directory);
+ while ((entry = ReadDir(dir, pgstat_stat_directory)) != NULL)
+ {
+ char fname[strlen(pgstat_stat_directory) +
strlen(entry->d_name) + 1];
+
+ if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name,
"..") == 0)
+ continue;
+
+ sprintf(fname, "%s/%s", pgstat_stat_directory, entry->d_name);
+ unlink(fname);
+ }
+ FreeDir(dir);
+
+ dir = AllocateDir(PGSTAT_STAT_PERMANENT_DIRECTORY);
+ while ((entry = ReadDir(dir, PGSTAT_STAT_PERMANENT_DIRECTORY)) != NULL)
+ {
+ char fname[strlen(PGSTAT_STAT_PERMANENT_FILENAME) +
strlen(entry->d_name) + 1];
+
+ if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name,
"..") == 0)
+ continue;
+
+ sprintf(fname, "%s/%s", PGSTAT_STAT_PERMANENT_FILENAME,
entry->d_name);
+ unlink(fname);
+ }
+ FreeDir(dir);
}
#ifdef EXEC_BACKEND
@@ -1408,13 +1451,14 @@ pgstat_ping(void)
* ----------
*/
static void
-pgstat_send_inquiry(TimestampTz clock_time, TimestampTz cutoff_time)
+pgstat_send_inquiry(TimestampTz clock_time, TimestampTz cutoff_time, Oid
databaseid)
{
PgStat_MsgInquiry msg;
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_INQUIRY);
msg.clock_time = clock_time;
msg.cutoff_time = cutoff_time;
+ msg.databaseid = databaseid;
pgstat_send(&msg, sizeof(msg));
}
@@ -3004,6 +3048,7 @@ PgstatCollectorMain(int argc, char *argv[])
int len;
PgStat_Msg msg;
int wr;
+ bool first_write = true;
IsUnderPostmaster = true; /* we are a postmaster subprocess now */
@@ -3053,17 +3098,11 @@ PgstatCollectorMain(int argc, char *argv[])
init_ps_display("stats collector process", "", "", "");
/*
- * Arrange to write the initial status file right away
- */
- last_statrequest = GetCurrentTimestamp();
- last_statwrite = last_statrequest - 1;
-
- /*
* Read in an existing statistics stats file or initialize the stats to
- * zero.
+ * zero (read data for all databases, including table/func stats).
*/
pgStatRunningInCollector = true;
- pgStatDBHash = pgstat_read_statsfile(InvalidOid, true);
+ pgStatDBHash = pgstat_read_statsfile(InvalidOid, true, false);
/*
* Loop to process messages until we get SIGQUIT or detect ungraceful
@@ -3107,10 +3146,14 @@ PgstatCollectorMain(int argc, char *argv[])
/*
* Write the stats file if a new request has arrived
that is not
- * satisfied by existing file.
+ * satisfied by existing file (force writing all files
if it's
+ * the first write after startup).
*/
- if (last_statwrite < last_statrequest)
- pgstat_write_statsfile(false);
+ if (first_write || pgstat_write_statsfile_needed())
+ {
+ pgstat_write_statsfile(false, first_write);
+ first_write = false;
+ }
/*
* Try to receive and process a message. This will not
block,
@@ -3269,7 +3312,7 @@ PgstatCollectorMain(int argc, char *argv[])
/*
* Save the final stats to reuse at next startup.
*/
- pgstat_write_statsfile(true);
+ pgstat_write_statsfile(true, true);
exit(0);
}
@@ -3429,23 +3472,25 @@ pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, Oid
tableoid, bool create)
* shutting down only), remove the temporary file so that backends
* starting up under a new postmaster can't read the old data before
* the new collector is ready.
+ *
+ * When the 'force' is false, only the requested databases (listed in
+ * last_statrequests) will be written. If 'force' is true, all databases
+ * will be written (this is used e.g. at shutdown).
* ----------
*/
static void
-pgstat_write_statsfile(bool permanent)
+pgstat_write_statsfile(bool permanent, bool force)
{
HASH_SEQ_STATUS hstat;
- HASH_SEQ_STATUS tstat;
- HASH_SEQ_STATUS fstat;
PgStat_StatDBEntry *dbentry;
- PgStat_StatTabEntry *tabentry;
- PgStat_StatFuncEntry *funcentry;
FILE *fpout;
int32 format_id;
const char *tmpfile = permanent ? PGSTAT_STAT_PERMANENT_TMPFILE :
pgstat_stat_tmpname;
const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME :
pgstat_stat_filename;
int rc;
+ elog(DEBUG1, "writing statsfile '%s'", statfile);
+
/*
* Open the statistics temp file to write out the current values.
*/
@@ -3484,6 +3529,20 @@ pgstat_write_statsfile(bool permanent)
while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) !=
NULL)
{
/*
+ * Write our the tables and functions into a separate file, but
only
+ * if the database is in the requests or if it's a forced write
(then
+ * all the DBs need to be written - e.g. at the shutdown).
+ *
+ * We need to do this before the dbentry write to write the
proper
+ * timestamp to the global file.
+ */
+ if (force || pgstat_db_requested(dbentry->databaseid)) {
+ elog(DEBUG1, "writing statsfile for DB %d",
dbentry->databaseid);
+ dbentry->stats_timestamp = globalStats.stats_timestamp;
+ pgstat_write_db_statsfile(dbentry, permanent);
+ }
+
+ /*
* Write out the DB entry including the number of live
backends. We
* don't write the tables or functions pointers, since they're
of no
* use to any other process.
@@ -3493,29 +3552,10 @@ pgstat_write_statsfile(bool permanent)
(void) rc; /* we'll check for
error with ferror */
/*
- * Walk through the database's access stats per table.
- */
- hash_seq_init(&tstat, dbentry->tables);
- while ((tabentry = (PgStat_StatTabEntry *)
hash_seq_search(&tstat)) != NULL)
- {
- fputc('T', fpout);
- rc = fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1,
fpout);
- (void) rc; /* we'll check for
error with ferror */
- }
-
- /*
- * Walk through the database's function stats table.
- */
- hash_seq_init(&fstat, dbentry->functions);
- while ((funcentry = (PgStat_StatFuncEntry *)
hash_seq_search(&fstat)) != NULL)
- {
- fputc('F', fpout);
- rc = fwrite(funcentry, sizeof(PgStat_StatFuncEntry), 1,
fpout);
- (void) rc; /* we'll check for
error with ferror */
- }
-
- /*
* Mark the end of this DB
+ *
+ * TODO Does using these chars still make sense, when the
tables/func
+ * stats are moved to a separate file?
*/
fputc('d', fpout);
}
@@ -3527,6 +3567,28 @@ pgstat_write_statsfile(bool permanent)
*/
fputc('E', fpout);
+ /* In any case, we can just throw away all the db requests, but we need
to
+ * write dummy files for databases without a stat entry (it would cause
+ * issues in pgstat_read_db_statsfile_timestamp and pgstat wait
timeouts).
+ * This may happend e.g. for shared DB (oid = 0) right after initdb.
+ */
+ if (last_statrequests != NULL)
+ {
+ int i = 0;
+ for (i = 0; i < num_statrequests; i++)
+ {
+ /* Create dummy files for requested databases without a
proper
+ * dbentry. It's much easier this way than dealing with
multiple
+ * timestamps, possibly existing but not yet written
DBs etc. */
+ if (!
pgstat_get_db_entry(last_statrequests[i].databaseid, false))
+
pgstat_write_db_dummyfile(last_statrequests[i].databaseid);
+ }
+
+ pfree(last_statrequests);
+ last_statrequests = NULL;
+ num_statrequests = 0;
+ }
+
if (ferror(fpout))
{
ereport(LOG,
@@ -3552,57 +3614,247 @@ pgstat_write_statsfile(bool permanent)
tmpfile, statfile)));
unlink(tmpfile);
}
- else
+
+ if (permanent)
+ unlink(pgstat_stat_filename);
+}
+
+
+/* ----------
+ * pgstat_write_db_statsfile() -
+ *
+ * Tell the news. This writes stats file for a single database.
+ *
+ * If writing to the permanent file (happens when the collector is
+ * shutting down only), remove the temporary file so that backends
+ * starting up under a new postmaster can't read the old data before
+ * the new collector is ready.
+ * ----------
+ */
+static void
+pgstat_write_db_statsfile(PgStat_StatDBEntry * dbentry, bool permanent)
+{
+ HASH_SEQ_STATUS tstat;
+ HASH_SEQ_STATUS fstat;
+ PgStat_StatTabEntry *tabentry;
+ PgStat_StatFuncEntry *funcentry;
+ FILE *fpout;
+ int32 format_id;
+ const char *tmpfile = permanent ? PGSTAT_STAT_PERMANENT_DB_TMPFILE :
pgstat_stat_db_tmpname;
+ const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_DB_FILENAME :
pgstat_stat_db_filename;
+ int rc;
+
+ /*
+ * OIDs are 32-bit values, so 10 chars should be safe, +1 for the \0
byte
+ */
+ char db_tmpfile[strlen(tmpfile) + 11];
+ char db_statfile[strlen(statfile) + 11];
+
+ /*
+ * Append database OID at the end of the basic filename (both for tmp
and target file).
+ */
+ snprintf(db_tmpfile, strlen(tmpfile) + 11, tmpfile,
dbentry->databaseid);
+ snprintf(db_statfile, strlen(statfile) + 11, statfile,
dbentry->databaseid);
+
+ elog(DEBUG1, "writing statsfile '%s'", db_statfile);
+
+ /*
+ * Open the statistics temp file to write out the current values.
+ */
+ fpout = AllocateFile(db_tmpfile, PG_BINARY_W);
+ if (fpout == NULL)
{
- /*
- * Successful write, so update last_statwrite.
- */
- last_statwrite = globalStats.stats_timestamp;
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not open temporary statistics
file \"%s\": %m",
+ db_tmpfile)));
+ return;
+ }
- /*
- * If there is clock skew between backends and the collector,
we could
- * receive a stats request time that's in the future. If so,
complain
- * and reset last_statrequest. Resetting ensures that no
inquiry
- * message can cause more than one stats file write to occur.
- */
- if (last_statrequest > last_statwrite)
- {
- char *reqtime;
- char *mytime;
-
- /* Copy because timestamptz_to_str returns a static
buffer */
- reqtime = pstrdup(timestamptz_to_str(last_statrequest));
- mytime = pstrdup(timestamptz_to_str(last_statwrite));
- elog(LOG, "last_statrequest %s is later than
collector's time %s",
- reqtime, mytime);
- pfree(reqtime);
- pfree(mytime);
-
- last_statrequest = last_statwrite;
- }
+ /*
+ * Write the file header --- currently just a format ID.
+ */
+ format_id = PGSTAT_FILE_FORMAT_ID;
+ rc = fwrite(&format_id, sizeof(format_id), 1, fpout);
+ (void) rc; /* we'll check for
error with ferror */
+
+ /*
+ * Write the timestamp.
+ */
+ rc = fwrite(&(globalStats.stats_timestamp),
sizeof(globalStats.stats_timestamp), 1, fpout);
+ (void) rc; /* we'll check for
error with ferror */
+
+ /*
+ * Walk through the database's access stats per table.
+ */
+ hash_seq_init(&tstat, dbentry->tables);
+ while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) !=
NULL)
+ {
+ fputc('T', fpout);
+ rc = fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout);
+ (void) rc; /* we'll check for error with
ferror */
}
+ /*
+ * Walk through the database's function stats table.
+ */
+ hash_seq_init(&fstat, dbentry->functions);
+ while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&fstat))
!= NULL)
+ {
+ fputc('F', fpout);
+ rc = fwrite(funcentry, sizeof(PgStat_StatFuncEntry), 1, fpout);
+ (void) rc; /* we'll check for error with
ferror */
+ }
+
+ /*
+ * No more output to be done. Close the temp file and replace the old
+ * pgstat.stat with it. The ferror() check replaces testing for error
+ * after each individual fputc or fwrite above.
+ */
+ fputc('E', fpout);
+
+ if (ferror(fpout))
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not write temporary statistics file
\"%s\": %m",
+ db_tmpfile)));
+ FreeFile(fpout);
+ unlink(db_tmpfile);
+ }
+ else if (FreeFile(fpout) < 0)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not close temporary statistics file
\"%s\": %m",
+ db_tmpfile)));
+ unlink(db_tmpfile);
+ }
+ else if (rename(db_tmpfile, db_statfile) < 0)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not rename temporary statistics
file \"%s\" to \"%s\": %m",
+ db_tmpfile, db_statfile)));
+ unlink(db_tmpfile);
+ }
+
if (permanent)
- unlink(pgstat_stat_filename);
+ {
+ char db_statfile[strlen(pgstat_stat_db_filename) + 11];
+ snprintf(db_statfile, strlen(pgstat_stat_db_filename) + 11,
+ pgstat_stat_db_filename, dbentry->databaseid);
+ elog(DEBUG1, "removing temporary stat file '%s'", db_statfile);
+ unlink(db_statfile);
+ }
}
/* ----------
+ * pgstat_write_db_dummyfile() -
+ *
+ * All this does is writing a dummy stat file for databases without dbentry
+ * yet. It basically writes just a file header - format ID and a timestamp.
+ * ----------
+ */
+static void
+pgstat_write_db_dummyfile(Oid databaseid)
+{
+ FILE *fpout;
+ int32 format_id;
+ int rc;
+
+ /*
+ * OIDs are 32-bit values, so 10 chars should be safe, +1 for the \0
byte
+ */
+ char db_tmpfile[strlen(pgstat_stat_db_tmpname) + 11];
+ char db_statfile[strlen(pgstat_stat_db_filename) + 11];
+
+ /*
+ * Append database OID at the end of the basic filename (both for tmp
and target file).
+ */
+ snprintf(db_tmpfile, strlen(pgstat_stat_db_tmpname) + 11,
pgstat_stat_db_tmpname, databaseid);
+ snprintf(db_statfile, strlen(pgstat_stat_db_filename) + 11,
pgstat_stat_db_filename, databaseid);
+
+ elog(DEBUG1, "writing statsfile '%s'", db_statfile);
+
+ /*
+ * Open the statistics temp file to write out the current values.
+ */
+ fpout = AllocateFile(db_tmpfile, PG_BINARY_W);
+ if (fpout == NULL)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not open temporary statistics
file \"%s\": %m",
+ db_tmpfile)));
+ return;
+ }
+
+ /*
+ * Write the file header --- currently just a format ID.
+ */
+ format_id = PGSTAT_FILE_FORMAT_ID;
+ rc = fwrite(&format_id, sizeof(format_id), 1, fpout);
+ (void) rc; /* we'll check for
error with ferror */
+
+ /*
+ * Write the timestamp.
+ */
+ rc = fwrite(&(globalStats.stats_timestamp),
sizeof(globalStats.stats_timestamp), 1, fpout);
+ (void) rc; /* we'll check for
error with ferror */
+
+ /*
+ * No more output to be done. Close the temp file and replace the old
+ * pgstat.stat with it. The ferror() check replaces testing for error
+ * after each individual fputc or fwrite above.
+ */
+ fputc('E', fpout);
+
+ if (ferror(fpout))
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not write temporary dummy statistics
file \"%s\": %m",
+ db_tmpfile)));
+ FreeFile(fpout);
+ unlink(db_tmpfile);
+ }
+ else if (FreeFile(fpout) < 0)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not close temporary dummy statistics
file \"%s\": %m",
+ db_tmpfile)));
+ unlink(db_tmpfile);
+ }
+ else if (rename(db_tmpfile, db_statfile) < 0)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not rename temporary dummy
statistics file \"%s\" to \"%s\": %m",
+ db_tmpfile, db_statfile)));
+ unlink(db_tmpfile);
+ }
+
+}
+
+/* ----------
* pgstat_read_statsfile() -
*
* Reads in an existing statistics collector file and initializes the
* databases' hash table (whose entries point to the tables' hash tables).
+ *
+ * Allows reading only the global stats (at database level), which is just
+ * enough for many purposes (e.g. autovacuum launcher etc.). If this is
+ * sufficient for you, use onlydbs=true.
* ----------
*/
static HTAB *
-pgstat_read_statsfile(Oid onlydb, bool permanent)
+pgstat_read_statsfile(Oid onlydb, bool permanent, bool onlydbs)
{
PgStat_StatDBEntry *dbentry;
PgStat_StatDBEntry dbbuf;
- PgStat_StatTabEntry *tabentry;
- PgStat_StatTabEntry tabbuf;
- PgStat_StatFuncEntry funcbuf;
- PgStat_StatFuncEntry *funcentry;
HASHCTL hash_ctl;
HTAB *dbhash;
HTAB *tabhash = NULL;
@@ -3613,6 +3865,11 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME :
pgstat_stat_filename;
/*
+ * If we want a db-level stats only, we don't want a particular db.
+ */
+ Assert(!((onlydb != InvalidOid) && onlydbs));
+
+ /*
* The tables will live in pgStatLocalContext.
*/
pgstat_setup_memcxt();
@@ -3758,6 +4015,16 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
*/
tabhash = dbentry->tables;
funchash = dbentry->functions;
+
+ /*
+ * Read the data from the file for this
database. If there was
+ * onlydb specified (!= InvalidOid), we would
not get here because
+ * of a break above. So we don't need to
recheck.
+ */
+ if (! onlydbs)
+
pgstat_read_db_statsfile(dbentry->databaseid, tabhash, funchash,
+
permanent);
+
break;
/*
@@ -3768,6 +4035,105 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
funchash = NULL;
break;
+ case 'E':
+ goto done;
+
+ default:
+ ereport(pgStatRunningInCollector ? LOG :
WARNING,
+ (errmsg("corrupted statistics
file \"%s\"",
+ statfile)));
+ goto done;
+ }
+ }
+
+done:
+ FreeFile(fpin);
+
+ if (permanent)
+ unlink(PGSTAT_STAT_PERMANENT_FILENAME);
+
+ return dbhash;
+}
+
+
+/* ----------
+ * pgstat_read_db_statsfile() -
+ *
+ * Reads in an existing statistics collector db file and initializes the
+ * tables and functions hash tables (for the database identified by Oid).
+ * ----------
+ */
+static void
+pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash, bool
permanent)
+{
+ PgStat_StatTabEntry *tabentry;
+ PgStat_StatTabEntry tabbuf;
+ PgStat_StatFuncEntry funcbuf;
+ PgStat_StatFuncEntry *funcentry;
+ FILE *fpin;
+ int32 format_id;
+ TimestampTz timestamp;
+ bool found;
+ const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_DB_FILENAME :
pgstat_stat_db_filename;
+
+ /*
+ * OIDs are 32-bit values, so 10 chars should be safe, +1 for the \0
byte
+ */
+ char db_statfile[strlen(statfile) + 11];
+
+ /*
+ * Append database OID at the end of the basic filename (both for tmp
and target file).
+ */
+ snprintf(db_statfile, strlen(statfile) + 11, statfile, databaseid);
+
+ /*
+ * Try to open the status file. If it doesn't exist, the backends simply
+ * return zero for anything and the collector simply starts from scratch
+ * with empty counters.
+ *
+ * ENOENT is a possibility if the stats collector is not running or has
+ * not yet written the stats file the first time. Any other failure
+ * condition is suspicious.
+ */
+ if ((fpin = AllocateFile(db_statfile, PG_BINARY_R)) == NULL)
+ {
+ if (errno != ENOENT)
+ ereport(pgStatRunningInCollector ? LOG : WARNING,
+ (errcode_for_file_access(),
+ errmsg("could not open statistics file
\"%s\": %m",
+ db_statfile)));
+ return;
+ }
+
+ /*
+ * Verify it's of the expected format.
+ */
+ if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id)
+ || format_id != PGSTAT_FILE_FORMAT_ID)
+ {
+ ereport(pgStatRunningInCollector ? LOG : WARNING,
+ (errmsg("corrupted statistics file \"%s\"",
db_statfile)));
+ goto done;
+ }
+
+ /*
+ * Read global stats struct
+ */
+ if (fread(×tamp, 1, sizeof(timestamp), fpin) != sizeof(timestamp))
+ {
+ ereport(pgStatRunningInCollector ? LOG : WARNING,
+ (errmsg("corrupted statistics file \"%s\"",
db_statfile)));
+ goto done;
+ }
+
+ /*
+ * We found an existing collector stats file. Read it and put all the
+ * hashtable entries into place.
+ */
+ for (;;)
+ {
+ switch (fgetc(fpin))
+ {
/*
* 'T' A PgStat_StatTabEntry follows.
*/
@@ -3777,7 +4143,7 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
{
ereport(pgStatRunningInCollector ? LOG
: WARNING,
(errmsg("corrupted
statistics file \"%s\"",
-
statfile)));
+
db_statfile)));
goto done;
}
@@ -3795,7 +4161,7 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
{
ereport(pgStatRunningInCollector ? LOG
: WARNING,
(errmsg("corrupted
statistics file \"%s\"",
-
statfile)));
+
db_statfile)));
goto done;
}
@@ -3811,7 +4177,7 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
{
ereport(pgStatRunningInCollector ? LOG
: WARNING,
(errmsg("corrupted
statistics file \"%s\"",
-
statfile)));
+
db_statfile)));
goto done;
}
@@ -3829,7 +4195,7 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
{
ereport(pgStatRunningInCollector ? LOG
: WARNING,
(errmsg("corrupted
statistics file \"%s\"",
-
statfile)));
+
db_statfile)));
goto done;
}
@@ -3845,7 +4211,7 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
default:
ereport(pgStatRunningInCollector ? LOG :
WARNING,
(errmsg("corrupted statistics
file \"%s\"",
- statfile)));
+ db_statfile)));
goto done;
}
}
@@ -3854,37 +4220,47 @@ done:
FreeFile(fpin);
if (permanent)
- unlink(PGSTAT_STAT_PERMANENT_FILENAME);
+ {
+ char db_statfile[strlen(PGSTAT_STAT_PERMANENT_DB_FILENAME) +
11];
+ snprintf(db_statfile, strlen(PGSTAT_STAT_PERMANENT_DB_FILENAME)
+ 11,
+ PGSTAT_STAT_PERMANENT_DB_FILENAME, databaseid);
+ elog(DEBUG1, "removing permanent stats file '%s'", db_statfile);
+ unlink(db_statfile);
+ }
- return dbhash;
+ return;
}
/* ----------
- * pgstat_read_statsfile_timestamp() -
+ * pgstat_read_db_statsfile_timestamp() -
*
- * Attempt to fetch the timestamp of an existing stats file.
+ * Attempt to fetch the timestamp of an existing stats file (for a DB).
* Returns TRUE if successful (timestamp is stored at *ts).
* ----------
*/
static bool
-pgstat_read_statsfile_timestamp(bool permanent, TimestampTz *ts)
+pgstat_read_db_statsfile_timestamp(Oid databaseid, bool permanent, TimestampTz
*ts)
{
- PgStat_GlobalStats myGlobalStats;
+ TimestampTz timestamp;
FILE *fpin;
int32 format_id;
- const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME :
pgstat_stat_filename;
+ const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_DB_FILENAME :
pgstat_stat_db_filename;
+ char db_statfile[strlen(statfile) + 11];
+
+ /* format the db statfile filename */
+ snprintf(db_statfile, strlen(statfile) + 11, statfile, databaseid);
/*
* Try to open the status file. As above, anything but ENOENT is worthy
* of complaining about.
*/
- if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
+ if ((fpin = AllocateFile(db_statfile, PG_BINARY_R)) == NULL)
{
if (errno != ENOENT)
ereport(pgStatRunningInCollector ? LOG : WARNING,
(errcode_for_file_access(),
errmsg("could not open statistics file
\"%s\": %m",
- statfile)));
+ db_statfile)));
return false;
}
@@ -3895,7 +4271,7 @@ pgstat_read_statsfile_timestamp(bool permanent,
TimestampTz *ts)
|| format_id != PGSTAT_FILE_FORMAT_ID)
{
ereport(pgStatRunningInCollector ? LOG : WARNING,
- (errmsg("corrupted statistics file \"%s\"",
statfile)));
+ (errmsg("corrupted statistics file \"%s\"",
db_statfile)));
FreeFile(fpin);
return false;
}
@@ -3903,15 +4279,15 @@ pgstat_read_statsfile_timestamp(bool permanent,
TimestampTz *ts)
/*
* Read global stats struct
*/
- if (fread(&myGlobalStats, 1, sizeof(myGlobalStats), fpin) !=
sizeof(myGlobalStats))
+ if (fread(×tamp, 1, sizeof(TimestampTz), fpin) !=
sizeof(TimestampTz))
{
ereport(pgStatRunningInCollector ? LOG : WARNING,
- (errmsg("corrupted statistics file \"%s\"",
statfile)));
+ (errmsg("corrupted statistics file \"%s\"",
db_statfile)));
FreeFile(fpin);
return false;
}
- *ts = myGlobalStats.stats_timestamp;
+ *ts = timestamp;
FreeFile(fpin);
return true;
@@ -3947,7 +4323,7 @@ backend_read_statsfile(void)
CHECK_FOR_INTERRUPTS();
- ok = pgstat_read_statsfile_timestamp(false, &file_ts);
+ ok = pgstat_read_db_statsfile_timestamp(MyDatabaseId, false,
&file_ts);
cur_ts = GetCurrentTimestamp();
/* Calculate min acceptable timestamp, if we didn't already */
@@ -4006,7 +4382,7 @@ backend_read_statsfile(void)
pfree(mytime);
}
- pgstat_send_inquiry(cur_ts, min_ts);
+ pgstat_send_inquiry(cur_ts, min_ts, MyDatabaseId);
break;
}
@@ -4016,7 +4392,7 @@ backend_read_statsfile(void)
/* Not there or too old, so kick the collector and wait a bit */
if ((count % PGSTAT_INQ_LOOP_COUNT) == 0)
- pgstat_send_inquiry(cur_ts, min_ts);
+ pgstat_send_inquiry(cur_ts, min_ts, MyDatabaseId);
pg_usleep(PGSTAT_RETRY_DELAY * 1000L);
}
@@ -4026,9 +4402,16 @@ backend_read_statsfile(void)
/* Autovacuum launcher wants stats about all databases */
if (IsAutoVacuumLauncherProcess())
- pgStatDBHash = pgstat_read_statsfile(InvalidOid, false);
+ /*
+ * FIXME Does it really need info including tables/functions?
Or is it enough to read
+ * database-level stats? It seems to me the launcher needs
PgStat_StatDBEntry only
+ * (at least that's how I understand the
rebuild_database_list() in autovacuum.c),
+ * because pgstat_stattabentries are used in do_autovacuum()
only, that that's what's
+ * executed in workers ... So maybe we'd be just fine by
reading in the dbentries?
+ */
+ pgStatDBHash = pgstat_read_statsfile(InvalidOid, false, true);
else
- pgStatDBHash = pgstat_read_statsfile(MyDatabaseId, false);
+ pgStatDBHash = pgstat_read_statsfile(MyDatabaseId, false,
false);
}
@@ -4084,44 +4467,84 @@ pgstat_clear_snapshot(void)
static void
pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len)
{
- /*
- * Advance last_statrequest if this requestor has a newer cutoff time
- * than any previous request.
- */
- if (msg->cutoff_time > last_statrequest)
- last_statrequest = msg->cutoff_time;
+ int i = 0;
+ bool found = false;
+ PgStat_StatDBEntry *dbentry;
+
+ elog(DEBUG1, "received inquiry for %d", msg->databaseid);
/*
- * If the requestor's local clock time is older than last_statwrite, we
- * should suspect a clock glitch, ie system time going backwards; though
- * the more likely explanation is just delayed message receipt. It is
- * worth expending a GetCurrentTimestamp call to be sure, since a large
- * retreat in the system clock reading could otherwise cause us to
neglect
- * to update the stats file for a long time.
+ * Find the last write request for this DB (found=true in that case).
Plain
+ * linear search, not really worth doing any magic here (probably).
*/
- if (msg->clock_time < last_statwrite)
+ for (i = 0; i < num_statrequests; i++)
+ {
+ if (last_statrequests[i].databaseid == msg->databaseid)
+ {
+ found = true;
+ break;
+ }
+ }
+
+ if (found)
+ {
+ /*
+ * There already is a request for this DB, so lets advance the
+ * request time if this requestor has a newer cutoff time
+ * than any previous request.
+ */
+ if (msg->cutoff_time > last_statrequests[i].request_time)
+ last_statrequests[i].request_time = msg->cutoff_time;
+ }
+ else
{
- TimestampTz cur_ts = GetCurrentTimestamp();
+ /*
+ * There's no request for this DB yet, so lets create it
(allocate a
+ * space for it, set the values).
+ */
+ if (last_statrequests == NULL)
+ last_statrequests = palloc(sizeof(DBWriteRequest));
+ else
+ last_statrequests = repalloc(last_statrequests,
+
(num_statrequests + 1)*sizeof(DBWriteRequest));
+
+ last_statrequests[num_statrequests].databaseid =
msg->databaseid;
+ last_statrequests[num_statrequests].request_time =
msg->clock_time;
+ num_statrequests += 1;
- if (cur_ts < last_statwrite)
+ /*
+ * If the requestor's local clock time is older than
last_statwrite, we
+ * should suspect a clock glitch, ie system time going
backwards; though
+ * the more likely explanation is just delayed message receipt.
It is
+ * worth expending a GetCurrentTimestamp call to be sure, since
a large
+ * retreat in the system clock reading could otherwise cause us
to neglect
+ * to update the stats file for a long time.
+ */
+ dbentry = pgstat_get_db_entry(msg->databaseid, false);
+ if ((dbentry != NULL) && (msg->clock_time <
dbentry->stats_timestamp))
{
- /*
- * Sure enough, time went backwards. Force a new stats
file write
- * to get back in sync; but first, log a complaint.
- */
- char *writetime;
- char *mytime;
-
- /* Copy because timestamptz_to_str returns a static
buffer */
- writetime = pstrdup(timestamptz_to_str(last_statwrite));
- mytime = pstrdup(timestamptz_to_str(cur_ts));
- elog(LOG, "last_statwrite %s is later than collector's
time %s",
- writetime, mytime);
- pfree(writetime);
- pfree(mytime);
-
- last_statrequest = cur_ts;
- last_statwrite = last_statrequest - 1;
+ TimestampTz cur_ts = GetCurrentTimestamp();
+
+ if (cur_ts < dbentry->stats_timestamp)
+ {
+ /*
+ * Sure enough, time went backwards. Force a
new stats file write
+ * to get back in sync; but first, log a
complaint.
+ */
+ char *writetime;
+ char *mytime;
+
+ /* Copy because timestamptz_to_str returns a
static buffer */
+ writetime =
pstrdup(timestamptz_to_str(dbentry->stats_timestamp));
+ mytime = pstrdup(timestamptz_to_str(cur_ts));
+ elog(LOG, "last_statwrite %s is later than
collector's time %s for "
+ "db %d", writetime, mytime,
dbentry->databaseid);
+ pfree(writetime);
+ pfree(mytime);
+
+
last_statrequests[num_statrequests].request_time = cur_ts;
+ dbentry->stats_timestamp = cur_ts - 1;
+ }
}
}
}
@@ -4278,10 +4701,17 @@ pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len)
dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
/*
- * If found, remove it.
+ * If found, remove it (along with the db statfile).
*/
if (dbentry)
{
+ char db_statfile[strlen(pgstat_stat_db_filename) + 11];
+ snprintf(db_statfile, strlen(pgstat_stat_db_filename) + 11,
+ pgstat_stat_filename, dbentry->databaseid);
+
+ elog(DEBUG1, "removing %s", db_statfile);
+ unlink(db_statfile);
+
if (dbentry->tables != NULL)
hash_destroy(dbentry->tables);
if (dbentry->functions != NULL)
@@ -4687,3 +5117,58 @@ pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len)
HASH_REMOVE, NULL);
}
}
+
+/* ----------
+ * pgstat_write_statsfile_needed() -
+ *
+ * Checks whether there's a db stats request, requiring a file write.
+ *
+ * TODO Seems that thanks the way we handle last_statrequests (erase after
+ * a write), this is unnecessary. Just check that there's at least one
+ * request and you're done. Although there might be delayed requests ...
+ * ----------
+ */
+
+static bool pgstat_write_statsfile_needed()
+{
+ int i = 0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* Check the databases if they need to refresh the stats. */
+ for (i = 0; i < num_statrequests; i++)
+ {
+ dbentry = pgstat_get_db_entry(last_statrequests[i].databaseid,
false);
+
+ /* No dbentry yet or too old. */
+ if ((! dbentry) ||
+ (dbentry->stats_timestamp <
last_statrequests[i].request_time)) {
+ return true;
+ }
+
+ }
+
+ /* Well, everything was written recently ... */
+ return false;
+}
+
+/* ----------
+ * pgstat_write_statsfile_needed() -
+ *
+ * Checks whether stats for a particular DB need to be written to a file).
+ * ----------
+ */
+
+static bool
+pgstat_db_requested(Oid databaseid)
+{
+ int i = 0;
+
+ /* Check the databases if they need to refresh the stats. */
+ for (i = 0; i < num_statrequests; i++)
+ {
+ if (last_statrequests[i].databaseid == databaseid)
+ return true;
+ }
+
+ return false;
+}
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 2cf34ce..e3e432b 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -8730,20 +8730,43 @@ static void
assign_pgstat_temp_directory(const char *newval, void *extra)
{
/* check_canonical_path already canonicalized newval for us */
+ char *dname;
char *tname;
char *fname;
-
- tname = guc_malloc(ERROR, strlen(newval) + 12); /* /pgstat.tmp
*/
- sprintf(tname, "%s/pgstat.tmp", newval);
- fname = guc_malloc(ERROR, strlen(newval) + 13); /* /pgstat.stat
*/
- sprintf(fname, "%s/pgstat.stat", newval);
-
+ char *tname_db;
+ char *fname_db;
+
+ /* directory */
+ dname = guc_malloc(ERROR, strlen(newval) + 1); /* runtime dir
*/
+ sprintf(dname, "%s", newval);
+
+ /* global stats */
+ tname = guc_malloc(ERROR, strlen(newval) + 12); /* /global.tmp
*/
+ sprintf(tname, "%s/global.tmp", newval);
+ fname = guc_malloc(ERROR, strlen(newval) + 13); /* /global.stat
*/
+ sprintf(fname, "%s/global.stat", newval);
+
+ /* per-db stats */
+ tname_db = guc_malloc(ERROR, strlen(newval) + 8); /*
/%d.tmp */
+ sprintf(tname_db, "%s/%%d.tmp", newval);
+ fname_db = guc_malloc(ERROR, strlen(newval) + 9); /*
/%d.stat */
+ sprintf(fname_db, "%s/%%d.stat", newval);
+
+ if (pgstat_stat_directory)
+ free(pgstat_stat_directory);
+ pgstat_stat_directory = dname;
if (pgstat_stat_tmpname)
free(pgstat_stat_tmpname);
pgstat_stat_tmpname = tname;
if (pgstat_stat_filename)
free(pgstat_stat_filename);
pgstat_stat_filename = fname;
+ if (pgstat_stat_db_tmpname)
+ free(pgstat_stat_db_tmpname);
+ pgstat_stat_db_tmpname = tname_db;
+ if (pgstat_stat_db_filename)
+ free(pgstat_stat_db_filename);
+ pgstat_stat_db_filename = fname_db;
}
static bool
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 3e05ac3..a8a2639 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -179,6 +179,7 @@ char *restrict_env;
#endif
const char *subdirs[] = {
"global",
+ "stat",
"pg_xlog",
"pg_xlog/archive_status",
"pg_clog",
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 613c1c2..b3467d2 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -205,6 +205,7 @@ typedef struct PgStat_MsgInquiry
PgStat_MsgHdr m_hdr;
TimestampTz clock_time; /* observed local clock time */
TimestampTz cutoff_time; /* minimum acceptable file timestamp */
+ Oid databaseid; /* requested DB
(InvalidOid => all DBs) */
} PgStat_MsgInquiry;
@@ -514,7 +515,7 @@ typedef union PgStat_Msg
* ------------------------------------------------------------
*/
-#define PGSTAT_FILE_FORMAT_ID 0x01A5BC9A
+#define PGSTAT_FILE_FORMAT_ID 0xA240CA47
/* ----------
* PgStat_StatDBEntry The collector's data per database
@@ -545,6 +546,7 @@ typedef struct PgStat_StatDBEntry
PgStat_Counter n_block_write_time;
TimestampTz stat_reset_timestamp;
+ TimestampTz stats_timestamp; /* time of db stats file update
*/
/*
* tables and functions must be last in the struct, because we don't
write
@@ -722,8 +724,11 @@ extern bool pgstat_track_activities;
extern bool pgstat_track_counts;
extern int pgstat_track_functions;
extern PGDLLIMPORT int pgstat_track_activity_query_size;
+extern char *pgstat_stat_directory;
extern char *pgstat_stat_tmpname;
extern char *pgstat_stat_filename;
+extern char *pgstat_stat_db_tmpname;
+extern char *pgstat_stat_db_filename;
/*
* BgWriter statistics counters are updated directly by bgwriter and bufmgr
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers