Hi,

> On 05/07/2011 03:32 AM, Mitsuru IWASAKI wrote:
> > For 1, I've just finish my work.  The latest patch is available at:
> > http://people.freebsd.org/~iwasaki/postgres/buffer-cache-hibernation-postgresql-20110507.patch
> >    
> 
> Reminder here--we can't accept code based on it being published to a web 
> page.  You'll need to e-mail it to the pgsql-hackers mailing list to be 
> considered for the next PostgreSQL CommitFest, which is starting in a 
> few weeks.  Code submitted to the mailing list is considered a release 
> of it to the project under the PostgreSQL license, which we can't just 
> assume for things when given only a URL to them.

Sorry about that, but I had enough time to revise my patches this week-end.
I attached the patches in this mail, and will update CommitFest page soon.

> Also, you suggested you were out of time to work on this.  If that's the 
> case, we'd like to know that so we don't keep cc'ing you about things in 
> expectation of an answer.  Someone else may pick this up as a project to 
> continue working on.  But it's going to need a fair amount of revision 
> before it matches what people want here, and I'm not sure how much of 
> what you've written is going to end up in any commit that may happen 
> from this idea.

It seems that I don't have enough time to complete this work.
You don't need to keep cc'ing me, and I'm very happy if postgres to be
the first DBMS which support buffer cache hibernation feature.

Thanks!


diff --git src/backend/access/transam/xlog.c src/backend/access/transam/xlog.c
index b0e4c41..7a3a207 100644
--- src/backend/access/transam/xlog.c
+++ src/backend/access/transam/xlog.c
@@ -4834,6 +4834,19 @@ ReadControlFile(void)
 #endif
 }
 
+bool
+GetControlFile(ControlFileData *controlFile)
+{
+       if (ControlFile == NULL)
+       {
+               return false;
+       }
+
+       memcpy(controlFile, ControlFile, sizeof(ControlFileData));
+
+       return true;
+}
+
 void
 UpdateControlFile(void)
 {
diff --git src/backend/bootstrap/bootstrap.c src/backend/bootstrap/bootstrap.c
index fc093cc..7ecf6bb 100644
--- src/backend/bootstrap/bootstrap.c
+++ src/backend/bootstrap/bootstrap.c
@@ -360,6 +360,15 @@ AuxiliaryProcessMain(int argc, char *argv[])
        BaseInit();
 
        /*
+        * Only StartupProcess can call ResumeBufferCacheHibernation() after
+        * InitFileAccess() and smgrinit().
+        */
+       if (auxType == StartupProcess && BufferCacheHibernationLevel > 0)
+       {
+               ResumeBufferCacheHibernation();
+       }
+
+       /*
         * When we are an auxiliary process, we aren't going to do the full
         * InitPostgres pushups, but there are a couple of things that need to 
get
         * lit up even in an auxiliary process.
diff --git src/backend/storage/buffer/buf_init.c 
src/backend/storage/buffer/buf_init.c
index dadb49d..52eb51a 100644
--- src/backend/storage/buffer/buf_init.c
+++ src/backend/storage/buffer/buf_init.c
@@ -127,6 +127,14 @@ InitBufferPool(void)
 
        /* Init other shared buffer-management stuff */
        StrategyInitialize(!foundDescs);
+
+       if (BufferCacheHibernationLevel > 0)
+       {
+               
ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS,
+                       (char *)BufferDescriptors, sizeof(BufferDesc), 
NBuffers);
+               
ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS,
+                       (char *)BufferBlocks, BLCKSZ, NBuffers);
+       }
 }
 
 /*
diff --git src/backend/storage/buffer/bufmgr.c 
src/backend/storage/buffer/bufmgr.c
index f96685d..dba8ebf 100644
--- src/backend/storage/buffer/bufmgr.c
+++ src/backend/storage/buffer/bufmgr.c
@@ -31,6 +31,7 @@
 #include "postgres.h"
 
 #include <sys/file.h>
+#include <sys/stat.h>
 #include <unistd.h>
 
 #include "catalog/catalog.h"
@@ -61,6 +62,13 @@
 #define BUF_WRITTEN                            0x01
 #define BUF_REUSABLE                   0x02
 
+/*
+ * Buffer Cache Hibernation stuff.
+ */
+/* enable this to debug buffer cache hibernation. */
+#if 0
+#define DEBUG_BUFFER_CACHE_HIBERNATION
+#endif
 
 /* GUC variables */
 bool           zero_damaged_pages = false;
@@ -765,6 +773,16 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, 
ForkNumber forkNum,
                                }
                        }
 
+#ifdef DEBUG_BUFFER_CACHE_HIBERNATION
+                       elog(DEBUG5,
+                               "alloc  
[%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d",
+                                       buf->buf_id, buf->flags, 
buf->usage_count, buf->refcount,
+                                       buf->wait_backend_pid, buf->freeNext,
+                                       newHash, newTag.rnode.spcNode,
+                                       newTag.rnode.dbNode, 
newTag.rnode.relNode,
+                                       newTag.forkNum, newTag.blockNum);
+#endif
+
                        return buf;
                }
 
@@ -800,6 +818,16 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, 
ForkNumber forkNum,
         * the old content is no longer relevant.  (The usage_count starts out 
at
         * 1 so that the buffer can survive one clock-sweep pass.)
         */
+#ifdef DEBUG_BUFFER_CACHE_HIBERNATION
+       elog(DEBUG5,
+               "rename [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d",
+                       buf->buf_id, buf->flags, buf->usage_count, 
buf->refcount,
+                       buf->wait_backend_pid, buf->freeNext,
+                       oldHash, oldTag.rnode.spcNode,
+                       oldTag.rnode.dbNode, oldTag.rnode.relNode,
+                       oldTag.forkNum, oldTag.blockNum);
+#endif
+
        buf->tag = newTag;
        buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | 
BM_CHECKPOINT_NEEDED | BM_IO_ERROR | BM_PERMANENT);
        if (relpersistence == RELPERSISTENCE_PERMANENT)
@@ -2772,3 +2800,716 @@ local_buffer_write_error_callback(void *arg)
                pfree(path);
        }
 }
+
+/* ----------------------------------------------------------------
+ *             Buffer Cache Hibernation support stuff
+ *
+ * Suspend/resume buffer cache data structure using hibernation files
+ * at shutdown/startup.
+ * ----------------------------------------------------------------
+ */
+
+int    BufferCacheHibernationLevel = 0;
+
+#define        BUFFER_CACHE_HIBERNATION_FILE_STRATEGY          
"global/pg_buffer_cache_hibernation_strategy"
+#define        BUFFER_CACHE_HIBERNATION_FILE_DESCRIPTORS       
"global/pg_buffer_cache_hibernation_descriptors"
+#define        BUFFER_CACHE_HIBERNATION_FILE_BLOCKS            
"global/pg_buffer_cache_hibernation_blocks"
+#define        BUFFER_CACHE_HIBERNATION_FILE_CRC32                     
"global/pg_buffer_cache_hibernation_crc32"
+
+static struct
+{
+       char            *hibernation_file;
+       char            *data_ptr;
+       Size            record_length;  
+       Size            num_records;    
+       pg_crc32        crc;
+} BufferCacheHibernationData[] =
+{
+       /* BufferStrategyControl */
+       {
+               BUFFER_CACHE_HIBERNATION_FILE_STRATEGY,
+               NULL, 0, 0, 0
+       },
+
+       /* BufferDescriptors */
+       {
+               BUFFER_CACHE_HIBERNATION_FILE_DESCRIPTORS,
+               NULL, 0, 0, 0
+       },
+
+       /* BufferBlocks */
+       {
+               BUFFER_CACHE_HIBERNATION_FILE_BLOCKS,
+               NULL, 0, 0, 0
+       },
+
+       /* End-of-list marker */
+       {
+               NULL,
+               NULL, 0, 0, 0
+       },
+};
+
+static ControlFileData controlFile;
+static bool                            controlFileInitialized = false;
+
+/*
+ * AtProcExit_BufferCacheHibernation:
+ *             store the buffer cache into hibernation files at shutdown.
+ */
+static void
+AtProcExit_BufferCacheHibernation(int code, Datum arg)
+{
+       BufferHibernationFileType       id;
+       int                                                     i;
+       int                                                     fd;
+
+       if (BufferCacheHibernationLevel == 0)
+       {
+               return;
+       }
+
+       /*
+        * get the control file to check the system state validation.
+        */
+       if (GetControlFile(&controlFile) == false)
+       {
+               elog(WARNING,
+                       "could not get control file, "
+                       "aborting buffer cache hibernation");
+               return;
+       }
+
+       if (controlFile.state != DB_SHUTDOWNED)
+       {
+               elog(WARNING,
+                       "database system was not shut down normally, "
+                       "aborting buffer cache hibernation");
+               return;
+       }
+
+       /*
+        * suspend buffer cache data structure into hibernation files.
+        */
+       for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; 
id++)
+       {
+               Size            record_length;
+               Size            num_records;
+               char            *ptr;
+               pg_crc32        crc;
+
+               if (BufferCacheHibernationLevel < 2 &&
+                       id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
+               {
+                       continue;
+               }
+
+               if (BufferCacheHibernationData[id].data_ptr == NULL ||
+                       BufferCacheHibernationData[id].record_length == 0 ||
+                       BufferCacheHibernationData[id].num_records == 0)
+               {
+                       elog(WARNING,
+                               "ResisterBufferCacheHibernation() was not 
called for %s",
+                               
BufferCacheHibernationData[id].hibernation_file);
+                       goto cleanup;
+               }
+
+               fd = 
BasicOpenFile(BufferCacheHibernationData[id].hibernation_file,
+                               O_CREAT | O_WRONLY | O_TRUNC | PG_BINARY, 
S_IRUSR | S_IWUSR);
+               if (fd < 0)
+               {
+                       elog(WARNING,
+                               "could not open %s",
+                               
BufferCacheHibernationData[id].hibernation_file);
+                       goto cleanup;
+               }
+
+               record_length = BufferCacheHibernationData[id].record_length;
+               num_records = BufferCacheHibernationData[id].num_records;
+
+               elog(NOTICE,
+                       "buffer cache hibernate into %s",
+                       BufferCacheHibernationData[id].hibernation_file);
+
+               INIT_CRC32(crc);
+               for (i = 0; i < num_records; i++)
+               {
+                       ptr = BufferCacheHibernationData[id].data_ptr + (i * 
record_length);
+                       if (write(fd, (void *)ptr, record_length) != 
record_length)
+                       {
+                               elog(WARNING,
+                                       "could not write %s",
+                                       
BufferCacheHibernationData[id].hibernation_file);
+                               goto cleanup;
+                       }
+
+                       COMP_CRC32(crc, ptr, record_length);
+               }
+
+               FIN_CRC32(crc);
+               close(fd);
+
+               BufferCacheHibernationData[id].crc = crc;
+       }
+
+       /*
+        * save the computed crc values for the validations at resuming.
+        */
+       fd = BasicOpenFile(BUFFER_CACHE_HIBERNATION_FILE_CRC32,
+                       O_CREAT | O_WRONLY | O_TRUNC | PG_BINARY, S_IRUSR | 
S_IWUSR);
+       if (fd < 0)
+       {
+               elog(WARNING,
+                       "could not open %s",
+                       BUFFER_CACHE_HIBERNATION_FILE_CRC32);
+               goto cleanup;
+       }
+
+       for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; 
id++)
+       {
+               pg_crc32        crc;
+
+               if (BufferCacheHibernationLevel < 2 &&
+                       id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
+               {
+                       continue;
+               }
+
+               crc = BufferCacheHibernationData[id].crc;
+               if (write(fd, (void *)&crc, sizeof(pg_crc32)) != 
sizeof(pg_crc32))
+               {
+                       elog(WARNING,
+                               "could not write %s for %s",
+                               BUFFER_CACHE_HIBERNATION_FILE_CRC32,
+                               
BufferCacheHibernationData[id].hibernation_file);
+                       goto cleanup;
+               }
+       }
+       close(fd);
+
+       elog(NOTICE,
+               "buffer cache suspended successfully");
+
+       return;
+
+cleanup:
+       for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; 
id++)
+       {
+               unlink(BufferCacheHibernationData[id].hibernation_file);
+       }
+
+       return;
+}
+
+/*
+ * ResisterBufferCacheHibernation:
+ *             register the buffer cache data structure info.
+ */
+void
+ResisterBufferCacheHibernation(BufferHibernationFileType id, char *ptr, Size 
record_length, Size num_records)
+{
+       static bool                                     first_time = true;
+
+       if (BufferCacheHibernationLevel == 0)
+       {
+               return;
+       }
+
+       if (id != BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY &&
+               id != BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS &&
+               id != BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
+       {
+               return;
+       }
+
+       if (first_time)
+       {
+               /*
+                * AtProcExit_BufferCacheHibernation to be called at shutdown.
+                */
+               on_shmem_exit(AtProcExit_BufferCacheHibernation, 0);
+               first_time = false;
+       }
+
+       /*
+        * get the control file to check the system state and
+        * hibernation file validations.
+        */
+       if (controlFileInitialized == false)
+       {
+               if (GetControlFile(&controlFile) == true)
+               {
+                       controlFileInitialized = true;
+               }
+       }
+
+       BufferCacheHibernationData[id].data_ptr = ptr;
+       BufferCacheHibernationData[id].record_length = record_length;
+       BufferCacheHibernationData[id].num_records = num_records;
+}
+
+/*
+ * ResumeBufferCacheHibernation:
+ *             resume the buffer cache from hibernation file at startup.
+ */
+void
+ResumeBufferCacheHibernation(void)
+{
+       BufferHibernationFileType       id;
+       int                                                     i;
+       int                                                     fd;
+       Size                                            num_records;
+       Size                                            record_length;
+       char                                            *buf_common;
+       int                                                     oldNBuffers;
+       bool                                            buffer_block_processed;
+
+       if (BufferCacheHibernationLevel == 0)
+       {
+               return;
+       }
+
+       buf_common = NULL;
+       buffer_block_processed = false;
+
+       /*
+        * lock all buffer descriptors to prevent other processes from
+        * updating buffers.
+        */
+       for (i = 0; i < NBuffers; i++)
+       {
+               BufferDesc      *buf;
+
+               buf = &BufferDescriptors[i];
+               LockBufHdr(buf);
+       }
+
+       /*
+        * get the control file to check the system state and
+        * hibernation file validations.
+        */
+       if (controlFileInitialized == false)
+       {
+               elog(WARNING,
+                       "could not get control file, "
+                       "aborting buffer cache hibernation");
+               goto cleanup;
+       }
+
+       if (controlFile.state != DB_SHUTDOWNED)
+       {
+               elog(WARNING,
+                       "database system was not shut down normally, "
+                       "aborting buffer cache hibernation");
+               goto cleanup;
+       }
+
+       /*
+        * read the crc values which was computed when the hibernation
+        * files were created.
+        */
+       fd = BasicOpenFile(BUFFER_CACHE_HIBERNATION_FILE_CRC32,
+                       O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR);
+       if (fd < 0)
+       {
+               elog(WARNING,
+                       "could not open %s",
+                       BUFFER_CACHE_HIBERNATION_FILE_CRC32);
+               goto cleanup;
+       }
+
+       for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; 
id++)
+       {
+               pg_crc32        crc;
+
+               if (BufferCacheHibernationLevel < 2 &&
+                       id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
+               {
+                       continue;
+               }
+
+               if (read(fd, (void *)&crc, sizeof(pg_crc32)) != 
sizeof(pg_crc32))
+               {
+                       if (BufferCacheHibernationLevel == 2 &&
+                               id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
+                       {
+                               /*
+                                * if buffer_cache_hibernation_level changes 1 
to 2,
+                                * the crc value of buffer block hibernation 
file may not exist.
+                                * just ignore it here.
+                                */
+                               continue;
+                       }
+
+                       elog(WARNING,
+                               "could not read %s for %s",
+                               BUFFER_CACHE_HIBERNATION_FILE_CRC32,
+                               
BufferCacheHibernationData[id].hibernation_file);
+                       close(fd);
+                       goto cleanup;
+               }
+               BufferCacheHibernationData[id].crc = crc;
+       }
+
+       close(fd);
+
+       /*
+        * allocate a buffer to read the contents of the hibernation files
+        * for validations.
+        */
+       record_length = 0;
+       for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; 
id++)
+       {
+               if (record_length < 
BufferCacheHibernationData[id].record_length)
+               {
+                       record_length = 
BufferCacheHibernationData[id].record_length;
+               }
+       }
+
+       buf_common = malloc(record_length);
+       Assert(buf_common != NULL);
+
+       /* assume that the number of buffers have not changed. */
+       oldNBuffers = NBuffers;
+
+       /*
+        * check if all hibernation files are valid.
+        */
+       for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; 
id++)
+       {
+               struct stat     sb;
+               pg_crc32        crc;
+
+               if (BufferCacheHibernationLevel < 2 &&
+                       id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
+               {
+                       continue;
+               }
+
+               if (BufferCacheHibernationData[id].data_ptr == NULL ||
+                       BufferCacheHibernationData[id].record_length == 0 ||
+                       BufferCacheHibernationData[id].num_records == 0)
+               {
+                       elog(WARNING,
+                               "ResisterBufferCacheHibernation() was not 
called for %s",
+                               
BufferCacheHibernationData[id].hibernation_file);
+                       goto cleanup;
+               }
+
+               fd = 
BasicOpenFile(BufferCacheHibernationData[id].hibernation_file,
+                               O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR);
+               if (fd < 0)
+               {
+                       if (BufferCacheHibernationLevel == 2 &&
+                               id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
+                       {
+                               /*
+                                * if buffer_cache_hibernation_level changes 1 
to 2,
+                                * the buffer block hibernation file may not 
exist.
+                                * just ignore it here.
+                                */
+                               continue;
+                       }
+
+                       goto cleanup;
+               }
+
+               if (fstat(fd, &sb) < 0)
+               {
+                       elog(WARNING,
+                               "could not get stats of the buffer cache 
hibernation file: %s",
+                               
BufferCacheHibernationData[id].hibernation_file);
+                       close(fd);
+                       goto cleanup;
+               }
+
+               record_length = BufferCacheHibernationData[id].record_length;
+               num_records = BufferCacheHibernationData[id].num_records;
+
+               if (sb.st_size != (record_length * num_records))
+               {
+                       /* The size of StrategyControl should be the same 
always. */
+                       if (id == BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY ||
+                               (sb.st_size % record_length) > 0)
+                       {
+                               elog(WARNING,
+                                       "size mismatch on the buffer cache 
hibernation file: %s",
+                                       
BufferCacheHibernationData[id].hibernation_file);
+                               close(fd);
+                               goto cleanup;
+                       }
+
+                       /*
+                        * The number of records of buffer descriptors and 
blocks
+                        * should be the same.
+                        */
+                       if (oldNBuffers != NBuffers &&
+                               oldNBuffers != (sb.st_size / record_length))
+                       {
+                               elog(WARNING,
+                                       "size mismatch on the buffer cache 
hibernation file: %s",
+                                       
BufferCacheHibernationData[id].hibernation_file);
+                               close(fd);
+                               goto cleanup;
+                       }
+                       
+                       oldNBuffers = sb.st_size / record_length;
+
+                       elog(NOTICE,
+                               "shared_buffers have changed from %d to %d: %s",
+                               oldNBuffers, NBuffers,
+                               
BufferCacheHibernationData[id].hibernation_file);
+
+                       /* use the original size to compute CRC of the 
hibernation file. */
+                       num_records = oldNBuffers;
+               }
+
+               if ((pg_time_t)sb.st_mtime < controlFile.time)
+               {
+                       elog(WARNING,
+                               "the hibernation file is older than control 
file: %s",
+                               
BufferCacheHibernationData[id].hibernation_file);
+                       close(fd);
+                       goto cleanup;
+               }
+
+               INIT_CRC32(crc);
+               for (i = 0; i < num_records; i++)
+               {
+                       if (read(fd, (void *)buf_common, record_length) != 
record_length)
+                       {
+                               elog(WARNING,
+                                       "could not read the buffer cache 
hibernation file: %s",
+                                       
BufferCacheHibernationData[id].hibernation_file);
+                               close(fd);
+                               goto cleanup;
+                       }
+
+                       COMP_CRC32(crc, buf_common, record_length);
+
+                       /*
+                        * buffer descriptors validations.
+                        */
+                       if (id == BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS)
+                       {
+                               BufferDesc      *buf;
+                               BufFlags        abnormal_flags;
+
+                               if (i >= NBuffers)
+                               {
+                                       continue;
+                               }
+
+                               abnormal_flags = (BM_DIRTY | BM_IO_IN_PROGRESS 
| BM_IO_ERROR |
+                                                                 
BM_JUST_DIRTIED | BM_PIN_COUNT_WAITER);
+
+                               buf = (BufferDesc *)buf_common;
+
+                               if (buf->flags & abnormal_flags)
+                               {
+                                       elog(WARNING,
+                                               "abnormal flags in buffer 
descriptors: %d",
+                                               buf->flags);
+                                       close(fd);
+                                       goto cleanup;
+                               }
+
+                               if (buf->usage_count > BM_MAX_USAGE_COUNT)
+                               {
+                                       elog(WARNING,
+                                               "invalid usage count in buffer 
descriptors: %d",
+                                               buf->usage_count);
+                                       close(fd);
+                                       goto cleanup;
+                               }
+
+                               if (buf->buf_id < 0 || buf->buf_id >= 
num_records)
+                               {
+                                       elog(WARNING,
+                                               "invalid buffer id in buffer 
descriptors: %d",
+                                               buf->buf_id);
+                                       close(fd);
+                                       goto cleanup;
+                               }
+                       }
+               }
+
+               FIN_CRC32(crc);
+               close(fd);
+
+               if (!EQ_CRC32(BufferCacheHibernationData[id].crc, crc))
+               {
+                       elog(WARNING,
+                               "crc mismatch on the buffer cache hibernation 
file: %s",
+                               
BufferCacheHibernationData[id].hibernation_file);
+                       close(fd);
+                       goto cleanup;
+               }
+       }
+
+       /*
+        * resume the buffer cache data structure from the hibernation files.
+        */
+       for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; 
id++)
+       {
+               int                     fd;
+               char            *ptr;
+
+               if (BufferCacheHibernationLevel < 2 &&
+                       id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
+               {
+                       continue;
+               }
+
+               record_length = BufferCacheHibernationData[id].record_length;
+               num_records = BufferCacheHibernationData[id].num_records;
+
+               if (id != BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY)
+               {
+                       /* use the smaller number of buffers. */
+                       num_records = (oldNBuffers < NBuffers)? oldNBuffers : 
NBuffers;
+               }
+
+               fd = 
BasicOpenFile(BufferCacheHibernationData[id].hibernation_file,
+                               O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR);
+               if (fd < 0)
+               {
+                       if (BufferCacheHibernationLevel == 2 &&
+                               id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
+                       {
+                               /*
+                                * if buffer_cache_hibernation_level changes 1 
to 2,
+                                * the buffer block hibernation file may not 
exist.
+                                * just ignore it here.
+                                */
+                               continue;
+                       }
+
+                       goto cleanup;
+               }
+
+               elog(NOTICE,
+                       "buffer cache resume from %s(%d bytes * %d records)",
+                       BufferCacheHibernationData[id].hibernation_file,
+                       record_length, num_records);
+
+               for (i = 0; i < num_records; i++)
+               {
+                       ptr = BufferCacheHibernationData[id].data_ptr + (i * 
record_length);
+                       read(fd, (void *)ptr, record_length);
+
+                       /* Re-lock the buffer descriptor if necessary. */
+                       if (id == BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS)
+                       {
+                               BufferDesc      *buf;
+
+                               buf = (BufferDesc *)ptr;
+                               if (IsUnlockBufHdr(buf))
+                               {
+                                       LockBufHdr(buf);
+                               }
+                       }
+               }
+
+               close(fd);
+
+               if (id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
+               {
+                       buffer_block_processed = true;
+               }
+       }
+
+       if (buffer_block_processed == false)
+       {
+               /* we didn't use the buffer block hibernation file, so delete 
it now. */
+               id = BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS;
+               unlink(BufferCacheHibernationData[id].hibernation_file);
+       }
+
+       /*
+        * set the rest data structures (eg. lookup hashtable) up
+        * based on the buffer descriptors.
+        */
+       num_records = (oldNBuffers < NBuffers)? oldNBuffers : NBuffers;
+       for (i = 0; i < num_records; i++)
+       {
+               BufferDesc              *buf;
+               BufferTag               newTag;
+               uint32                  newHash;
+               int                             buf_id;
+
+               buf = &BufferDescriptors[i];
+               if (buf->tag.rnode.spcNode      == InvalidOid &&
+                       buf->tag.rnode.dbNode   == InvalidOid &&
+                       buf->tag.rnode.relNode  == InvalidOid)
+               {
+                       continue;
+               }
+
+               INIT_BUFFERTAG(newTag, buf->tag.rnode, buf->tag.forkNum, 
buf->tag.blockNum);
+               newHash = BufTableHashCode(&newTag);
+
+               if (buffer_block_processed == false)
+               {
+                       Block                   bufBlock;
+                       SMgrRelation    smgr;
+
+                       /*
+                        * re-read buffer block.
+                        */
+                       bufBlock = BufHdrGetBlock(buf);
+                       smgr = smgropen(buf->tag.rnode, InvalidBackendId);
+                       smgrread(smgr, newTag.forkNum, newTag.blockNum, (char 
*) bufBlock);
+               }
+
+               buf_id = BufTableInsert(&newTag, newHash, buf->buf_id);
+               if (buf_id != -1)
+               {
+                       /* the entry exists already, return it to the freelist. 
*/
+                       buf->refcount = 0;
+                       buf->flags = 0;
+                       InvalidateBuffer(buf);
+                       continue;
+               }
+
+               /* clear wait_backend_pid because the process was terminated 
already. */
+               buf->wait_backend_pid = 0;
+
+#ifdef DEBUG_BUFFER_CACHE_HIBERNATION
+               elog(DEBUG5,
+                       "resume [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d",
+                               buf->buf_id, buf->flags, buf->usage_count, 
buf->refcount,
+                               buf->wait_backend_pid, buf->freeNext,
+                               newHash, newTag.rnode.spcNode,
+                               newTag.rnode.dbNode, newTag.rnode.relNode,
+                               newTag.forkNum, newTag.blockNum);
+#endif
+       }
+
+       /*
+        * adjust StrategyControl based on the change of shared_buffers.
+        */
+       if (oldNBuffers != NBuffers)
+       {
+               AdjustStrategyControl(oldNBuffers);
+       }
+
+       elog(NOTICE,
+               "buffer cache resumed successfully");
+
+cleanup:
+       for (i = 0; i < NBuffers; i++)
+       {
+               BufferDesc      *buf;
+
+               buf = &BufferDescriptors[i];
+               UnlockBufHdr(buf);
+       }
+
+       if (buf_common != NULL)
+       {
+               free(buf_common);
+       }
+
+       return;
+}
diff --git src/backend/storage/buffer/freelist.c 
src/backend/storage/buffer/freelist.c
index bf9903b..ffc101d 100644
--- src/backend/storage/buffer/freelist.c
+++ src/backend/storage/buffer/freelist.c
@@ -347,6 +347,12 @@ StrategyInitialize(bool init)
        }
        else
                Assert(!init);
+
+       if (BufferCacheHibernationLevel > 0)
+       {
+               
ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY,
+                       (char *)StrategyControl, sizeof(BufferStrategyControl), 
1);
+       }
 }
 
 
@@ -521,3 +527,47 @@ StrategyRejectBuffer(BufferAccessStrategy strategy, 
volatile BufferDesc *buf)
 
        return true;
 }
+
+/*
+ * AdjustStrategyControl -- adjust the member variables of StrategyControl
+ *
+ * If the shared_buffers setting had changed, restored StrategyControl
+ * needs to be adjusted for in both cases of shrinking and enlarging.
+ * This is called only from bufmgr.c:ResumeBufferCacheHibernation().
+ */
+void
+AdjustStrategyControl(int oldNBuffers)
+{
+       if (oldNBuffers == NBuffers)
+       {
+               return;
+       }
+
+       /* enlarge or shrink the free buffer based on current NBuffers. */
+       StrategyControl->lastFreeBuffer = NBuffers - 1;
+
+       /* shared_buffers shrunk. */
+       if (oldNBuffers > NBuffers)
+       {
+               if (StrategyControl->nextVictimBuffer >= NBuffers)
+               {
+                       /* set the tail of buffers. */
+                       StrategyControl->nextVictimBuffer = NBuffers - 1;
+               }
+
+               if (StrategyControl->firstFreeBuffer >= NBuffers)
+               {
+                       /* set FREENEXT_END_OF_LIST(-1). */
+                       StrategyControl->firstFreeBuffer = FREENEXT_END_OF_LIST;
+               }
+       }
+       else
+       /* shared_buffers enlarged. */
+       {
+               if (StrategyControl->firstFreeBuffer < 0)
+               {
+                       /* set the next entry of the tail of old buffers. */
+                       StrategyControl->firstFreeBuffer = oldNBuffers;
+               }
+       }
+}
diff --git src/backend/utils/misc/guc.c src/backend/utils/misc/guc.c
index 738e215..5affc6e 100644
--- src/backend/utils/misc/guc.c
+++ src/backend/utils/misc/guc.c
@@ -2361,6 +2361,18 @@ static struct config_int ConfigureNamesInt[] =
                NULL, NULL, NULL
        },
 
+       {
+               {"buffer_cache_hibernation_level", PGC_POSTMASTER, UNGROUPED,
+                       gettext_noop("Sets buffer cache hibernation level."),
+                       gettext_noop("0 to disable(default), "
+                                                "1 for saving buffer 
descriptors only(recommended), "
+                                                "2 for saving buffer 
descriptors and buffer blocks(slower at shutdown).")
+               },
+               &BufferCacheHibernationLevel,
+               0, 0, 2,
+               NULL, NULL, NULL
+       },
+
        /* End-of-list marker */
        {
                {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git src/backend/utils/misc/postgresql.conf.sample 
src/backend/utils/misc/postgresql.conf.sample
index b8a1582..44b6ff3 100644
--- src/backend/utils/misc/postgresql.conf.sample
+++ src/backend/utils/misc/postgresql.conf.sample
@@ -119,6 +119,17 @@
 #maintenance_work_mem = 16MB           # min 1MB
 #max_stack_depth = 2MB                 # min 100kB
 
+
+# Buffer Cache Hibernation:
+#  Suspend/resume buffer cache data structure using hibernation files
+#  at shutdown/startup.
+#buffer_cache_hibernation_level = 0    # Sets buffer cache hibernation level.
+                                       # 0 to disable(default),
+                                       # 1 for saving buffer descriptors only
+                                       #   (recommended),
+                                       # 2 for saving buffer descriptors and
+                                       #   buffer blocks(slower at shutdown).
+
 # - Kernel Resource Usage -
 
 #max_files_per_process = 1000          # min 25
diff --git src/include/access/xlog.h src/include/access/xlog.h
index 7056fd6..7a9fb99 100644
--- src/include/access/xlog.h
+++ src/include/access/xlog.h
@@ -13,6 +13,7 @@
 
 #include "access/rmgr.h"
 #include "access/xlogdefs.h"
+#include "catalog/pg_control.h"
 #include "lib/stringinfo.h"
 #include "storage/buf.h"
 #include "utils/pg_crc.h"
@@ -294,6 +295,7 @@ extern bool XLogInsertAllowed(void);
 extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream);
 extern XLogRecPtr GetXLogReplayRecPtr(void);
 
+extern bool GetControlFile(ControlFileData *controlFile);
 extern void UpdateControlFile(void);
 extern uint64 GetSystemIdentifier(void);
 extern Size XLOGShmemSize(void);
diff --git src/include/storage/buf_internals.h 
src/include/storage/buf_internals.h
index b7d4ea5..d537ef1 100644
--- src/include/storage/buf_internals.h
+++ src/include/storage/buf_internals.h
@@ -167,6 +167,7 @@ typedef struct sbufdesc
  */
 #define LockBufHdr(bufHdr)             SpinLockAcquire(&(bufHdr)->buf_hdr_lock)
 #define UnlockBufHdr(bufHdr)   SpinLockRelease(&(bufHdr)->buf_hdr_lock)
+#define IsUnlockBufHdr(bufHdr) SpinLockFree(&(bufHdr)->buf_hdr_lock)
 
 
 /* in buf_init.c */
@@ -190,6 +191,7 @@ extern bool StrategyRejectBuffer(BufferAccessStrategy 
strategy,
 extern int     StrategySyncStart(uint32 *complete_passes, uint32 
*num_buf_alloc);
 extern Size StrategyShmemSize(void);
 extern void StrategyInitialize(bool init);
+extern void AdjustStrategyControl(int oldNBuffers);
 
 /* buf_table.c */
 extern Size BufTableShmemSize(int size);
diff --git src/include/storage/bufmgr.h src/include/storage/bufmgr.h
index b8fc87e..ddfeb9d 100644
--- src/include/storage/bufmgr.h
+++ src/include/storage/bufmgr.h
@@ -211,6 +211,20 @@ extern void BgBufferSync(void);
 
 extern void AtProcExit_LocalBuffers(void);
 
+/* buffer cache hibernation support stuff */
+extern int     BufferCacheHibernationLevel;
+
+typedef enum BufferHibernationFileType
+{   
+    BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY,
+    BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS,
+    BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS
+} BufferHibernationFileType;
+
+extern void ResisterBufferCacheHibernation(BufferHibernationFileType id,
+                               char *ptr, Size record_length, Size 
num_records);
+extern void ResumeBufferCacheHibernation(void);
+
 /* in freelist.c */
 extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype);
 extern void FreeAccessStrategy(BufferAccessStrategy strategy);

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to