Tom Lane wrote:
> Bruce Momjian <pgman@candle.pha.pa.us> writes:
> > Have we seen any such failures since the first day they appeared?
> 
> agouti blew up about the same time you typed that, so yes it's still
> a problem.
> 
> http://www.pgbuildfarm.org/cgi-bin/show_log.pl?nm=agouti&dt=2006-05-08%2003:15:01

Delay pgstat file write patch reverted.

-- 
  Bruce Momjian   http://candle.pha.pa.us
  EnterpriseDB    http://www.enterprisedb.com

  + If your life is a hard drive, Christ can be your backup. +
Index: src/backend/postmaster/pgstat.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/postmaster/pgstat.c,v
retrieving revision 1.123
retrieving revision 1.124
diff -c -r1.123 -r1.124
*** src/backend/postmaster/pgstat.c     20 Apr 2006 10:51:32 -0000      1.123
--- src/backend/postmaster/pgstat.c     27 Apr 2006 00:06:58 -0000      1.124
***************
***************
*** 28,33 ****
--- 28,34 ----
  #include <arpa/inet.h>
  #include <signal.h>
  #include <time.h>
+ #include <sys/stat.h>
  
  #include "pgstat.h"
  
***************
*** 66,77 ****
   * Timer definitions.
   * ----------
   */
- #define PGSTAT_STAT_INTERVAL  500             /* How often to write the 
status file;
-                                                                               
 * in milliseconds. */
  
! #define PGSTAT_RESTART_INTERVAL 60            /* How often to attempt to 
restart a
!                                                                               
 * failed statistics collector; in
!                                                                               
 * seconds. */
  
  /* ----------
   * Amount of space reserved in pgstat_recvbuffer().
--- 67,81 ----
   * Timer definitions.
   * ----------
   */
  
! /* How often to write the status file, in milliseconds. */
! #define PGSTAT_STAT_INTERVAL  (5*60*1000)
! 
! /*
!  *    How often to attempt to restart a failed statistics collector; in ms.
!  *    Must be at least PGSTAT_STAT_INTERVAL.
!  */
! #define PGSTAT_RESTART_INTERVAL (5*60*1000)
  
  /* ----------
   * Amount of space reserved in pgstat_recvbuffer().
***************
*** 172,182 ****
  static void pgstat_write_statsfile(void);
  static void pgstat_read_statsfile(HTAB **dbhash, Oid onlydb,
                                          PgStat_StatBeEntry **betab,
!                                         int *numbackends);
  static void backend_read_statsfile(void);
  
  static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype);
  static void pgstat_send(void *msg, int len);
  
  static void pgstat_recv_bestart(PgStat_MsgBestart *msg, int len);
  static void pgstat_recv_beterm(PgStat_MsgBeterm *msg, int len);
--- 176,187 ----
  static void pgstat_write_statsfile(void);
  static void pgstat_read_statsfile(HTAB **dbhash, Oid onlydb,
                                          PgStat_StatBeEntry **betab,
!                                         int *numbackends, bool rewrite);
  static void backend_read_statsfile(void);
  
  static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype);
  static void pgstat_send(void *msg, int len);
+ static void pgstat_send_rewrite(void);
  
  static void pgstat_recv_bestart(PgStat_MsgBestart *msg, int len);
  static void pgstat_recv_beterm(PgStat_MsgBeterm *msg, int len);
***************
*** 1449,1454 ****
--- 1454,1477 ----
  #endif
  }
  
+ /*
+  * pgstat_send_rewrite() -
+  *
+  *    Send a command to the collector to rewrite the stats file.
+  * ----------
+  */
+ static void
+ pgstat_send_rewrite(void)
+ {
+     PgStat_MsgRewrite msg;
+ 
+       if (pgStatSock < 0)
+               return;
+ 
+       pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_REWRITE);
+       pgstat_send(&msg, sizeof(msg));
+ }
+ 
  
  /* ----------
   * PgstatBufferMain() -
***************
*** 1549,1555 ****
        fd_set          rfds;
        int                     readPipe;
        int                     len = 0;
!       struct itimerval timeout;
        bool            need_timer = false;
  
        MyProcPid = getpid();           /* reset MyProcPid */
--- 1572,1578 ----
        fd_set          rfds;
        int                     readPipe;
        int                     len = 0;
!       struct itimerval timeout, canceltimeout;
        bool            need_timer = false;
  
        MyProcPid = getpid();           /* reset MyProcPid */
***************
*** 1604,1615 ****
        timeout.it_value.tv_sec = PGSTAT_STAT_INTERVAL / 1000;
        timeout.it_value.tv_usec = PGSTAT_STAT_INTERVAL % 1000;
  
        /*
         * Read in an existing statistics stats file or initialize the stats to
         * zero.
         */
        pgStatRunningInCollector = true;
!       pgstat_read_statsfile(&pgStatDBHash, InvalidOid, NULL, NULL);
  
        /*
         * Create the known backends table
--- 1627,1641 ----
        timeout.it_value.tv_sec = PGSTAT_STAT_INTERVAL / 1000;
        timeout.it_value.tv_usec = PGSTAT_STAT_INTERVAL % 1000;
  
+       /* Values set to zero will cancel the active timer */
+       MemSet(&canceltimeout, 0, sizeof(struct itimerval));
+ 
        /*
         * Read in an existing statistics stats file or initialize the stats to
         * zero.
         */
        pgStatRunningInCollector = true;
!       pgstat_read_statsfile(&pgStatDBHash, InvalidOid, NULL, NULL, false);
  
        /*
         * Create the known backends table
***************
*** 1764,1769 ****
--- 1790,1801 ----
                                        pgstat_recv_analyze((PgStat_MsgAnalyze 
*) &msg, nread);
                                        break;
  
+                               case PGSTAT_MTYPE_REWRITE:
+                                       need_statwrite = true;
+                                       /* Disable the timer - it will be 
restarted on next data update */
+                                       setitimer(ITIMER_REAL, &canceltimeout, 
NULL);
+                                       break;
+ 
                                default:
                                        break;
                        }
***************
*** 2344,2350 ****
   */
  static void
  pgstat_read_statsfile(HTAB **dbhash, Oid onlydb,
!                                         PgStat_StatBeEntry **betab, int 
*numbackends)
  {
        PgStat_StatDBEntry *dbentry;
        PgStat_StatDBEntry dbbuf;
--- 2376,2382 ----
   */
  static void
  pgstat_read_statsfile(HTAB **dbhash, Oid onlydb,
!                                         PgStat_StatBeEntry **betab, int 
*numbackends, bool rewrite)
  {
        PgStat_StatDBEntry *dbentry;
        PgStat_StatDBEntry dbbuf;
***************
*** 2363,2368 ****
--- 2395,2465 ----
        MemoryContext use_mcxt;
        int                     mcxt_flags;
  
+ 
+       if (rewrite)
+       {
+               /*
+                * To force a rewrite of the stats file from the collector, send
+                * a REWRITE message to the stats collector. Then wait for the 
file
+                * to change. On Unix, we wait for the inode to change (as the 
file
+                * is renamed into place from a different file). Win32 has no 
concept
+                * of inodes, so we wait for the date on the file to change 
instead.
+                * We can do this on win32 because we have high-res timing on 
the 
+                * file dates, but we can't on unix, because it has 1sec 
resolution
+                * on the fields in struct stat.
+                */
+               int i;
+ #ifndef WIN32
+               struct stat st1, st2;
+ 
+               if (stat(PGSTAT_STAT_FILENAME, &st1))
+               {
+                       /* Assume no file there yet */
+                       st1.st_ino = 0;
+               }
+               st2.st_ino = 0;
+ #else
+               WIN32_FILE_ATTRIBUTE_DATA fd1, fd2;
+ 
+               if (!GetFileAttributesEx(PGSTAT_STAT_FILENAME, 
GetFileExInfoStandard, &fd1))
+               {
+                       fd1.ftLastWriteTime.dwLowDateTime = 0;
+                       fd1.ftLastWriteTime.dwHighDateTime = 0;
+               }
+               fd2.ftLastWriteTime.dwLowDateTime = 0;
+               fd2.ftLastWriteTime.dwHighDateTime = 0;
+ #endif
+ 
+ 
+               /* Send rewrite message */
+               pgstat_send_rewrite();
+ 
+               /* Now wait for the file to change */
+               for (i=0; i < 50; i++)
+               {
+ #ifndef WIN32
+                       if (!stat(PGSTAT_STAT_FILENAME, &st2))
+                       {
+                               if (st2.st_ino != st1.st_ino)
+                                       break;
+                       }
+ #else
+                       if (GetFileAttributesEx(PGSTAT_STAT_FILENAME, 
GetFileExInfoStandard, &fd2))
+                       {
+                               if (fd1.ftLastWriteTime.dwLowDateTime != 
fd2.ftLastWriteTime.dwLowDateTime ||
+                                       fd1.ftLastWriteTime.dwHighDateTime != 
fd2.ftLastWriteTime.dwHighDateTime)
+                                       break;
+                       }
+ #endif
+ 
+                       pg_usleep(50000); 
+               }
+               if (i >= 50)
+                       ereport(WARNING,
+                               (errmsg("pgstat update timeout")));
+               /* Fallthrough and read the old file anyway - old data better 
than no data */
+       }
+ 
        /*
         * If running in the collector or the autovacuum process, we use the
         * DynaHashCxt memory context.  If running in a backend, we use the
***************
*** 2681,2687 ****
                        return;
                Assert(!pgStatRunningInCollector);
                pgstat_read_statsfile(&pgStatDBHash, InvalidOid,
!                                                         &pgStatBeTable, 
&pgStatNumBackends);
        }
        else
        {
--- 2778,2784 ----
                        return;
                Assert(!pgStatRunningInCollector);
                pgstat_read_statsfile(&pgStatDBHash, InvalidOid,
!                                                         &pgStatBeTable, 
&pgStatNumBackends, true);
        }
        else
        {
***************
*** 2691,2697 ****
                {
                        Assert(!pgStatRunningInCollector);
                        pgstat_read_statsfile(&pgStatDBHash, MyDatabaseId,
!                                                                 
&pgStatBeTable, &pgStatNumBackends);
                        pgStatDBHashXact = topXid;
                }
        }
--- 2788,2794 ----
                {
                        Assert(!pgStatRunningInCollector);
                        pgstat_read_statsfile(&pgStatDBHash, MyDatabaseId,
!                                                                 
&pgStatBeTable, &pgStatNumBackends, true);
                        pgStatDBHashXact = topXid;
                }
        }
Index: src/include/pgstat.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/pgstat.h,v
retrieving revision 1.43
retrieving revision 1.44
diff -c -r1.43 -r1.44
*** src/include/pgstat.h        6 Apr 2006 20:38:00 -0000       1.43
--- src/include/pgstat.h        27 Apr 2006 00:06:59 -0000      1.44
***************
*** 32,38 ****
        PGSTAT_MTYPE_RESETCOUNTER,
        PGSTAT_MTYPE_AUTOVAC_START,
        PGSTAT_MTYPE_VACUUM,
!       PGSTAT_MTYPE_ANALYZE
  } StatMsgType;
  
  /* ----------
--- 32,39 ----
        PGSTAT_MTYPE_RESETCOUNTER,
        PGSTAT_MTYPE_AUTOVAC_START,
        PGSTAT_MTYPE_VACUUM,
!       PGSTAT_MTYPE_ANALYZE,
!       PGSTAT_MTYPE_REWRITE
  } StatMsgType;
  
  /* ----------
***************
*** 108,113 ****
--- 109,123 ----
  } PgStat_MsgDummy;
  
  /* ----------
+  * PgStat_MsgRewrite            Sent by backends to cause a rewrite of the 
stats file
+  * ----------
+  */
+ typedef struct Pgstat_MsgRewrite
+ {
+       PgStat_MsgHdr m_hdr;
+ } PgStat_MsgRewrite;
+ 
+ /* ----------
   * PgStat_MsgBestart                  Sent by the backend on startup
   * ----------
   */
---------------------------(end of broadcast)---------------------------
TIP 2: Don't 'kill -9' the postmaster

Reply via email to