Tom Lane wrote:
> Bruce Momjian <[email protected]> writes:
> > Have we seen any such failures since the first day they appeared?
>
> agouti blew up about the same time you typed that, so yes it's still
> a problem.
>
> http://www.pgbuildfarm.org/cgi-bin/show_log.pl?nm=agouti&dt=2006-05-08%2003:15:01
Delay pgstat file write patch reverted.
--
Bruce Momjian http://candle.pha.pa.us
EnterpriseDB http://www.enterprisedb.com
+ If your life is a hard drive, Christ can be your backup. +
Index: src/backend/postmaster/pgstat.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/postmaster/pgstat.c,v
retrieving revision 1.123
retrieving revision 1.124
diff -c -r1.123 -r1.124
*** src/backend/postmaster/pgstat.c 20 Apr 2006 10:51:32 -0000 1.123
--- src/backend/postmaster/pgstat.c 27 Apr 2006 00:06:58 -0000 1.124
***************
***************
*** 28,33 ****
--- 28,34 ----
#include <arpa/inet.h>
#include <signal.h>
#include <time.h>
+ #include <sys/stat.h>
#include "pgstat.h"
***************
*** 66,77 ****
* Timer definitions.
* ----------
*/
- #define PGSTAT_STAT_INTERVAL 500 /* How often to write the
status file;
-
* in milliseconds. */
! #define PGSTAT_RESTART_INTERVAL 60 /* How often to attempt to
restart a
!
* failed statistics collector; in
!
* seconds. */
/* ----------
* Amount of space reserved in pgstat_recvbuffer().
--- 67,81 ----
* Timer definitions.
* ----------
*/
! /* How often to write the status file, in milliseconds. */
! #define PGSTAT_STAT_INTERVAL (5*60*1000)
!
! /*
! * How often to attempt to restart a failed statistics collector; in ms.
! * Must be at least PGSTAT_STAT_INTERVAL.
! */
! #define PGSTAT_RESTART_INTERVAL (5*60*1000)
/* ----------
* Amount of space reserved in pgstat_recvbuffer().
***************
*** 172,182 ****
static void pgstat_write_statsfile(void);
static void pgstat_read_statsfile(HTAB **dbhash, Oid onlydb,
PgStat_StatBeEntry **betab,
! int *numbackends);
static void backend_read_statsfile(void);
static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype);
static void pgstat_send(void *msg, int len);
static void pgstat_recv_bestart(PgStat_MsgBestart *msg, int len);
static void pgstat_recv_beterm(PgStat_MsgBeterm *msg, int len);
--- 176,187 ----
static void pgstat_write_statsfile(void);
static void pgstat_read_statsfile(HTAB **dbhash, Oid onlydb,
PgStat_StatBeEntry **betab,
! int *numbackends, bool rewrite);
static void backend_read_statsfile(void);
static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype);
static void pgstat_send(void *msg, int len);
+ static void pgstat_send_rewrite(void);
static void pgstat_recv_bestart(PgStat_MsgBestart *msg, int len);
static void pgstat_recv_beterm(PgStat_MsgBeterm *msg, int len);
***************
*** 1449,1454 ****
--- 1454,1477 ----
#endif
}
+ /*
+ * pgstat_send_rewrite() -
+ *
+ * Send a command to the collector to rewrite the stats file.
+ * ----------
+ */
+ static void
+ pgstat_send_rewrite(void)
+ {
+ PgStat_MsgRewrite msg;
+
+ if (pgStatSock < 0)
+ return;
+
+ pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_REWRITE);
+ pgstat_send(&msg, sizeof(msg));
+ }
+
/* ----------
* PgstatBufferMain() -
***************
*** 1549,1555 ****
fd_set rfds;
int readPipe;
int len = 0;
! struct itimerval timeout;
bool need_timer = false;
MyProcPid = getpid(); /* reset MyProcPid */
--- 1572,1578 ----
fd_set rfds;
int readPipe;
int len = 0;
! struct itimerval timeout, canceltimeout;
bool need_timer = false;
MyProcPid = getpid(); /* reset MyProcPid */
***************
*** 1604,1615 ****
timeout.it_value.tv_sec = PGSTAT_STAT_INTERVAL / 1000;
timeout.it_value.tv_usec = PGSTAT_STAT_INTERVAL % 1000;
/*
* Read in an existing statistics stats file or initialize the stats to
* zero.
*/
pgStatRunningInCollector = true;
! pgstat_read_statsfile(&pgStatDBHash, InvalidOid, NULL, NULL);
/*
* Create the known backends table
--- 1627,1641 ----
timeout.it_value.tv_sec = PGSTAT_STAT_INTERVAL / 1000;
timeout.it_value.tv_usec = PGSTAT_STAT_INTERVAL % 1000;
+ /* Values set to zero will cancel the active timer */
+ MemSet(&canceltimeout, 0, sizeof(struct itimerval));
+
/*
* Read in an existing statistics stats file or initialize the stats to
* zero.
*/
pgStatRunningInCollector = true;
! pgstat_read_statsfile(&pgStatDBHash, InvalidOid, NULL, NULL, false);
/*
* Create the known backends table
***************
*** 1764,1769 ****
--- 1790,1801 ----
pgstat_recv_analyze((PgStat_MsgAnalyze
*) &msg, nread);
break;
+ case PGSTAT_MTYPE_REWRITE:
+ need_statwrite = true;
+ /* Disable the timer - it will be
restarted on next data update */
+ setitimer(ITIMER_REAL, &canceltimeout,
NULL);
+ break;
+
default:
break;
}
***************
*** 2344,2350 ****
*/
static void
pgstat_read_statsfile(HTAB **dbhash, Oid onlydb,
! PgStat_StatBeEntry **betab, int
*numbackends)
{
PgStat_StatDBEntry *dbentry;
PgStat_StatDBEntry dbbuf;
--- 2376,2382 ----
*/
static void
pgstat_read_statsfile(HTAB **dbhash, Oid onlydb,
! PgStat_StatBeEntry **betab, int
*numbackends, bool rewrite)
{
PgStat_StatDBEntry *dbentry;
PgStat_StatDBEntry dbbuf;
***************
*** 2363,2368 ****
--- 2395,2465 ----
MemoryContext use_mcxt;
int mcxt_flags;
+
+ if (rewrite)
+ {
+ /*
+ * To force a rewrite of the stats file from the collector, send
+ * a REWRITE message to the stats collector. Then wait for the
file
+ * to change. On Unix, we wait for the inode to change (as the
file
+ * is renamed into place from a different file). Win32 has no
concept
+ * of inodes, so we wait for the date on the file to change
instead.
+ * We can do this on win32 because we have high-res timing on
the
+ * file dates, but we can't on unix, because it has 1sec
resolution
+ * on the fields in struct stat.
+ */
+ int i;
+ #ifndef WIN32
+ struct stat st1, st2;
+
+ if (stat(PGSTAT_STAT_FILENAME, &st1))
+ {
+ /* Assume no file there yet */
+ st1.st_ino = 0;
+ }
+ st2.st_ino = 0;
+ #else
+ WIN32_FILE_ATTRIBUTE_DATA fd1, fd2;
+
+ if (!GetFileAttributesEx(PGSTAT_STAT_FILENAME,
GetFileExInfoStandard, &fd1))
+ {
+ fd1.ftLastWriteTime.dwLowDateTime = 0;
+ fd1.ftLastWriteTime.dwHighDateTime = 0;
+ }
+ fd2.ftLastWriteTime.dwLowDateTime = 0;
+ fd2.ftLastWriteTime.dwHighDateTime = 0;
+ #endif
+
+
+ /* Send rewrite message */
+ pgstat_send_rewrite();
+
+ /* Now wait for the file to change */
+ for (i=0; i < 50; i++)
+ {
+ #ifndef WIN32
+ if (!stat(PGSTAT_STAT_FILENAME, &st2))
+ {
+ if (st2.st_ino != st1.st_ino)
+ break;
+ }
+ #else
+ if (GetFileAttributesEx(PGSTAT_STAT_FILENAME,
GetFileExInfoStandard, &fd2))
+ {
+ if (fd1.ftLastWriteTime.dwLowDateTime !=
fd2.ftLastWriteTime.dwLowDateTime ||
+ fd1.ftLastWriteTime.dwHighDateTime !=
fd2.ftLastWriteTime.dwHighDateTime)
+ break;
+ }
+ #endif
+
+ pg_usleep(50000);
+ }
+ if (i >= 50)
+ ereport(WARNING,
+ (errmsg("pgstat update timeout")));
+ /* Fallthrough and read the old file anyway - old data better
than no data */
+ }
+
/*
* If running in the collector or the autovacuum process, we use the
* DynaHashCxt memory context. If running in a backend, we use the
***************
*** 2681,2687 ****
return;
Assert(!pgStatRunningInCollector);
pgstat_read_statsfile(&pgStatDBHash, InvalidOid,
! &pgStatBeTable,
&pgStatNumBackends);
}
else
{
--- 2778,2784 ----
return;
Assert(!pgStatRunningInCollector);
pgstat_read_statsfile(&pgStatDBHash, InvalidOid,
! &pgStatBeTable,
&pgStatNumBackends, true);
}
else
{
***************
*** 2691,2697 ****
{
Assert(!pgStatRunningInCollector);
pgstat_read_statsfile(&pgStatDBHash, MyDatabaseId,
!
&pgStatBeTable, &pgStatNumBackends);
pgStatDBHashXact = topXid;
}
}
--- 2788,2794 ----
{
Assert(!pgStatRunningInCollector);
pgstat_read_statsfile(&pgStatDBHash, MyDatabaseId,
!
&pgStatBeTable, &pgStatNumBackends, true);
pgStatDBHashXact = topXid;
}
}
Index: src/include/pgstat.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/pgstat.h,v
retrieving revision 1.43
retrieving revision 1.44
diff -c -r1.43 -r1.44
*** src/include/pgstat.h 6 Apr 2006 20:38:00 -0000 1.43
--- src/include/pgstat.h 27 Apr 2006 00:06:59 -0000 1.44
***************
*** 32,38 ****
PGSTAT_MTYPE_RESETCOUNTER,
PGSTAT_MTYPE_AUTOVAC_START,
PGSTAT_MTYPE_VACUUM,
! PGSTAT_MTYPE_ANALYZE
} StatMsgType;
/* ----------
--- 32,39 ----
PGSTAT_MTYPE_RESETCOUNTER,
PGSTAT_MTYPE_AUTOVAC_START,
PGSTAT_MTYPE_VACUUM,
! PGSTAT_MTYPE_ANALYZE,
! PGSTAT_MTYPE_REWRITE
} StatMsgType;
/* ----------
***************
*** 108,113 ****
--- 109,123 ----
} PgStat_MsgDummy;
/* ----------
+ * PgStat_MsgRewrite Sent by backends to cause a rewrite of the
stats file
+ * ----------
+ */
+ typedef struct Pgstat_MsgRewrite
+ {
+ PgStat_MsgHdr m_hdr;
+ } PgStat_MsgRewrite;
+
+ /* ----------
* PgStat_MsgBestart Sent by the backend on startup
* ----------
*/
---------------------------(end of broadcast)---------------------------
TIP 2: Don't 'kill -9' the postmaster