Hi,

I have implemented initial concept of 2nd level cache. Idea is to keep some 
segments of shared memory for special buffers (e.g. indices) to prevent 
overwrite those by other operations. I added those functionality to nbtree 
index scan.

I tested this with doing index scan, seq read, drop system buffers, do index 
scan and in few places I saw performance improvements, but actually, I'm not 
sure if this was just "random" or intended improvement.

There is few places to optimize code as well, and patch need many work, but 
may you see it and give opinions?

Regards,
Radek
diff --git a/.gitignore b/.gitignore
index 3f11f2e..6542e35 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,3 +22,4 @@ lcov.info
 /GNUmakefile
 /config.log
 /config.status
+/nbproject/private/
\ No newline at end of file
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index 2796445..0229f5a 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -508,7 +508,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
 	if (blkno != P_NEW)
 	{
 		/* Read an existing block of the relation */
-		buf = ReadBuffer(rel, blkno);
+		buf = ReadBufferLevel(rel, blkno, BUFFER_LEVEL_2ND);
 		LockBuffer(buf, access);
 		_bt_checkpage(rel, buf);
 	}
@@ -548,7 +548,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
 			blkno = GetFreeIndexPage(rel);
 			if (blkno == InvalidBlockNumber)
 				break;
-			buf = ReadBuffer(rel, blkno);
+			buf = ReadBufferLevel(rel, blkno, BUFFER_LEVEL_2ND);
 			if (ConditionalLockBuffer(buf))
 			{
 				page = BufferGetPage(buf);
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c
index dadb49d..2922711 100644
--- a/src/backend/storage/buffer/buf_init.c
+++ b/src/backend/storage/buffer/buf_init.c
@@ -22,6 +22,7 @@ BufferDesc *BufferDescriptors;
 char	   *BufferBlocks;
 int32	   *PrivateRefCount;
 
+BufferLevelDesc *bufferLevels;
 
 /*
  * Data Structures:
@@ -72,8 +73,7 @@ int32	   *PrivateRefCount;
 void
 InitBufferPool(void)
 {
-	bool		foundBufs,
-				foundDescs;
+	bool		foundBufs, foundDescs, foundBufferLevels = false;
 
 	BufferDescriptors = (BufferDesc *)
 		ShmemInitStruct("Buffer Descriptors",
@@ -83,19 +83,38 @@ InitBufferPool(void)
 		ShmemInitStruct("Buffer Blocks",
 						NBuffers * (Size) BLCKSZ, &foundBufs);
 
-	if (foundDescs || foundBufs)
+        bufferLevels = (BufferLevelDesc*)
+                ShmemInitStruct("Buffer Levels Descriptors Table",
+						sizeof(BufferLevelDesc) * BUFFER_LEVEL_SIZE, 
+                                                &foundBufferLevels);
+	if (foundDescs || foundBufs || foundBufferLevels)
 	{
 		/* both should be present or neither */
-		Assert(foundDescs && foundBufs);
+		Assert(foundDescs && foundBufs && foundBufferLevels);
 		/* note: this path is only taken in EXEC_BACKEND case */
 	}
 	else
 	{
 		BufferDesc *buf;
+                BufferLevelDesc *bufferLevelDesc;
+                
 		int			i;
-
+                
 		buf = BufferDescriptors;
 
+                /* Initialize buffer levels. */
+                //1st Level - Default
+                bufferLevelDesc = bufferLevels;
+                bufferLevelDesc->index = 0;
+                bufferLevelDesc->super = BUFFER_LEVEL_END_OF_LIST;
+                bufferLevelDesc->lower = BUFFER_LEVEL_END_OF_LIST;
+                
+                //2nd Level - For indices
+                bufferLevelDesc++;
+                bufferLevelDesc->index = 1;
+                bufferLevelDesc->super = BUFFER_LEVEL_END_OF_LIST;
+                bufferLevelDesc->lower = 0;                
+                
 		/*
 		 * Initialize all the buffer headers.
 		 */
@@ -117,6 +136,10 @@ InitBufferPool(void)
 			 */
 			buf->freeNext = i + 1;
 
+                        /* Assign buffer level. */
+                        //TODO Currently hardcoded - 
+                        buf->buf_level = ( 0.3 * NBuffers > i ) ? BUFFER_LEVEL_DEFAULT : BUFFER_LEVEL_2ND;
+                        
 			buf->io_in_progress_lock = LWLockAssign();
 			buf->content_lock = LWLockAssign();
 		}
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 1f89e52..867bae0 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -47,7 +47,8 @@
 #include "storage/standby.h"
 #include "utils/rel.h"
 #include "utils/resowner.h"
-
+#include "catalog/pg_type.h"
+#include "funcapi.h"
 
 /* Note: these two macros only work on shared buffers, not local ones! */
 #define BufHdrGetBlock(bufHdr)	((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
@@ -85,7 +86,7 @@ static volatile BufferDesc *PinCountWaitBuf = NULL;
 static Buffer ReadBuffer_common(SMgrRelation reln, char relpersistence,
 				  ForkNumber forkNum, BlockNumber blockNum,
 				  ReadBufferMode mode, BufferAccessStrategy strategy,
-				  bool *hit);
+				  bool *hit, BufferLevel bufferLevel);
 static bool PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy);
 static void PinBuffer_Locked(volatile BufferDesc *buf);
 static void UnpinBuffer(volatile BufferDesc *buf, bool fixOwner);
@@ -102,7 +103,8 @@ static volatile BufferDesc *BufferAlloc(SMgrRelation smgr,
 			ForkNumber forkNum,
 			BlockNumber blockNum,
 			BufferAccessStrategy strategy,
-			bool *foundPtr);
+			bool *foundPtr,
+                        BufferLevel bufferLevel);
 static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln);
 static void AtProcExit_Buffers(int code, Datum arg);
 
@@ -186,6 +188,18 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
 	return ReadBufferExtended(reln, MAIN_FORKNUM, blockNum, RBM_NORMAL, NULL);
 }
 
+Buffer 
+ReadBufferLevel(Relation reln, BlockNumber blockNum, BufferLevel bufferLevel) {
+    return ReadBufferExtendedLevel(reln, MAIN_FORKNUM, blockNum, RBM_NORMAL, NULL, bufferLevel);
+}
+
+/** See {@link } for more details. */
+Buffer
+ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum,
+				   ReadBufferMode mode, BufferAccessStrategy strategy) {
+    return ReadBufferExtendedLevel(reln, forkNum, blockNum, mode, strategy, BUFFER_LEVEL_DEFAULT);
+}
+
 /*
  * ReadBufferExtended -- returns a buffer containing the requested
  *		block of the requested relation.  If the blknum
@@ -219,8 +233,8 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
  * See buffer/README for details.
  */
 Buffer
-ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum,
-				   ReadBufferMode mode, BufferAccessStrategy strategy)
+ReadBufferExtendedLevel(Relation reln, ForkNumber forkNum, BlockNumber blockNum,
+				   ReadBufferMode mode, BufferAccessStrategy strategy, BufferLevel bufferLevel)
 {
 	bool		hit;
 	Buffer		buf;
@@ -244,12 +258,19 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum,
 	 */
 	pgstat_count_buffer_read(reln);
 	buf = ReadBuffer_common(reln->rd_smgr, reln->rd_rel->relpersistence,
-							forkNum, blockNum, mode, strategy, &hit);
+							forkNum, blockNum, mode, strategy, &hit, bufferLevel);
 	if (hit)
 		pgstat_count_buffer_hit(reln);
 	return buf;
 }
 
+/** See {@link ReadBufferWithoutRelcacheLevel} for more details. */
+Buffer
+ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum,
+						  BlockNumber blockNum, ReadBufferMode mode,
+						  BufferAccessStrategy strategy) {
+    return ReadBufferWithoutRelcacheLevel(rnode, forkNum, blockNum, mode, strategy, BUFFER_LEVEL_DEFAULT);
+}
 
 /*
  * ReadBufferWithoutRelcache -- like ReadBufferExtended, but doesn't require
@@ -261,16 +282,16 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum,
  * parameters.
  */
 Buffer
-ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum,
+ReadBufferWithoutRelcacheLevel(RelFileNode rnode, ForkNumber forkNum,
 						  BlockNumber blockNum, ReadBufferMode mode,
-						  BufferAccessStrategy strategy)
+						  BufferAccessStrategy strategy, BufferLevel bufferLevel)
 {
 	bool		hit;
 
 	SMgrRelation smgr = smgropen(rnode, InvalidBackendId);
 
 	return ReadBuffer_common(smgr, RELPERSISTENCE_PERMANENT, forkNum, blockNum,
-							 mode, strategy, &hit);
+							 mode, strategy, &hit, bufferLevel);
 }
 
 
@@ -282,7 +303,7 @@ ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum,
 static Buffer
 ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 				  BlockNumber blockNum, ReadBufferMode mode,
-				  BufferAccessStrategy strategy, bool *hit)
+				  BufferAccessStrategy strategy, bool *hit, BufferLevel bufferLevel)
 {
 	volatile BufferDesc *bufHdr;
 	Block		bufBlock;
@@ -323,7 +344,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 		 * not currently in memory.
 		 */
 		bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum,
-							 strategy, &found);
+							 strategy, &found, bufferLevel);
 		if (found)
 			pgBufferUsage.shared_blks_hit++;
 		else
@@ -507,7 +528,8 @@ static volatile BufferDesc *
 BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			BlockNumber blockNum,
 			BufferAccessStrategy strategy,
-			bool *foundPtr)
+			bool *foundPtr,
+                        BufferLevel bufferLevel)
 {
 	BufferTag	newTag;			/* identity of requested block */
 	uint32		newHash;		/* hash value for newTag */
@@ -585,7 +607,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 		 * still held, since it would be bad to hold the spinlock while
 		 * possibly waking up other processes.
 		 */
-		buf = StrategyGetBuffer(strategy, &lock_held);
+		buf = StrategyGetBufferLevel(strategy, &lock_held, bufferLevel);
 
 		Assert(buf->refcount == 0);
 
@@ -2772,3 +2794,69 @@ local_buffer_write_error_callback(void *arg)
 		pfree(path);
 	}
 }
+
+Datum
+dump_shared_buffers_info(PG_FUNCTION_ARGS)
+{
+	FuncCallContext *funcctx;
+	int		   idx = 2;
+        
+        //fprintf(stderr, "Executing dump_shared_buffers_info\n");
+        if (SRF_IS_FIRSTCALL())
+	{
+		TupleDesc	tupdesc;
+		MemoryContext oldcontext;
+                int val = 1;
+                
+		funcctx = SRF_FIRSTCALL_INIT();
+		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+		/*
+		 * build tupdesc for result tuples (matches out parameters in pg_proc
+		 * entry)
+		 */
+		tupdesc = CreateTemplateTupleDesc(3, false);
+		TupleDescInitEntry(tupdesc, (AttrNumber) val++, "buf_idx",
+						   INT4OID, -1, 0);
+		TupleDescInitEntry(tupdesc, (AttrNumber) val++, "buf_level",
+						   INT4OID, -1, 0);
+		TupleDescInitEntry(tupdesc, (AttrNumber) val++, "buf_free_next",
+						   INT4OID, -1, 0);
+
+		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+
+		/* allocate memory for user context */
+		funcctx->user_fctx = (void*) palloc(sizeof(int));
+                *((int *) funcctx->user_fctx) = 0;
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	funcctx = SRF_PERCALL_SETUP();
+	idx = *((int *) funcctx->user_fctx);
+        if (idx < NBuffers) {
+            BufferDesc *bufferDesc;
+            Datum result;
+            Datum values[4];
+            bool nulls[4];
+            HeapTuple tuple;
+            int val = 0;
+            
+            bufferDesc = BufferDescriptors + idx;
+            
+            values[val++] = Int32GetDatum(idx);
+            values[val++] = Int32GetDatum(bufferDesc->buf_level);
+            values[val++] = Int32GetDatum(bufferDesc->freeNext);
+            MemSet(nulls, 0, sizeof(nulls));
+            
+            idx++;
+            *((int *) funcctx->user_fctx) = idx;
+            tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+            result = HeapTupleGetDatum(tuple);
+            
+            SRF_RETURN_NEXT(funcctx, result);
+        }else {
+            SRF_RETURN_DONE(funcctx);
+        }
+
+	SRF_RETURN_DONE(funcctx);
+}
\ No newline at end of file
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index 72968db..fc27609 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -25,10 +25,14 @@
 typedef struct
 {
 	/* Clock sweep hand: index of next buffer to consider grabbing */
-	int			nextVictimBuffer;
+	int nextVictimBufferByLevel[BUFFER_LEVEL_SIZE];
 
-	int			firstFreeBuffer;	/* Head of list of unused buffers */
-	int			lastFreeBuffer; /* Tail of list of unused buffers */
+        /** Head of list of unused buffers */
+	int firstFreeBufferByLevel[BUFFER_LEVEL_SIZE];
+
+        //Currently unused
+        ///** Tail of list of unused buffers */
+	//int lastFreeBufferByLevel[BUFFER_LEVEL_SIZE];
 
 	/*
 	 * NOTE: lastFreeBuffer is undefined when firstFreeBuffer is -1 (that is,
@@ -81,11 +85,16 @@ typedef struct BufferAccessStrategyData
 
 
 /* Prototypes for internal functions */
-static volatile BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy);
+static volatile BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy, BufferLevel level);
 static void AddBufferToRing(BufferAccessStrategy strategy,
 				volatile BufferDesc *buf);
 
+static int findFisrtBufferByLevel(BufferLevel level);
 
+volatile BufferDesc *
+StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held) {
+    return StrategyGetBufferLevel(strategy, lock_held, BUFFER_LEVEL_DEFAULT);
+}
 /*
  * StrategyGetBuffer
  *
@@ -104,18 +113,21 @@ static void AddBufferToRing(BufferAccessStrategy strategy,
  *	kernel calls while holding the buffer header spinlock.
  */
 volatile BufferDesc *
-StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
+StrategyGetBufferLevel(BufferAccessStrategy strategy, bool *lock_held, BufferLevel level)
 {
-	volatile BufferDesc *buf;
+	volatile BufferDesc *buf = NULL;
 	int			trycounter;
-
+        int     bufferToExamine;
+        volatile BufferDesc *prevExaminedBuffer;
+        
+	//level = BUFFER_LEVEL_DEFAULT;
 	/*
 	 * If given a strategy object, see whether it can select a buffer. We
 	 * assume strategy objects don't need the BufFreelistLock.
 	 */
 	if (strategy != NULL)
 	{
-		buf = GetBufferFromRing(strategy);
+		buf = GetBufferFromRing(strategy, level);
 		if (buf != NULL)
 		{
 			*lock_held = false;
@@ -139,14 +151,33 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
 	 * are considered to be protected by the BufFreelistLock not the
 	 * individual buffer spinlocks, so it's OK to manipulate them without
 	 * holding the spinlock.
+         * 
+         * We do not keep lock on previous buffer, as free list is modified
+         * only by freelist.c and list is locked exclusivly (ensure this).
 	 */
-	while (StrategyControl->firstFreeBuffer >= 0)
+        prevExaminedBuffer = NULL;
+        bufferToExamine = StrategyControl->firstFreeBufferByLevel[level];
+	while (bufferToExamine >= 0)
 	{
-		buf = &BufferDescriptors[StrategyControl->firstFreeBuffer];
+		buf = &BufferDescriptors[bufferToExamine];
 		Assert(buf->freeNext != FREENEXT_NOT_IN_LIST);
-
+                if (buf->buf_level != level) {
+                    prevExaminedBuffer = buf;
+                    bufferToExamine = buf->freeNext;
+                    //fprintf(stderr, "Skipping buffer %d due to level missmatch %d, next is %d\n", buf->buf_id, buf->buf_level, buf->freeNext);
+                    continue;
+                }else {
+                    //fprintf(stderr, "Selecting buffer %d with level %d\n", buf->buf_id, buf->buf_level);
+                }
+                
 		/* Unconditionally remove buffer from freelist */
-		StrategyControl->firstFreeBuffer = buf->freeNext;
+                if (prevExaminedBuffer != NULL) {
+                    /* We are after list head. */
+                    prevExaminedBuffer->freeNext = buf->freeNext;                    
+                }else {
+                    /* We modify head of list. */
+                    StrategyControl->firstFreeBufferByLevel[level] = buf->freeNext;
+                }
 		buf->freeNext = FREENEXT_NOT_IN_LIST;
 
 		/*
@@ -161,23 +192,50 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
 		{
 			if (strategy != NULL)
 				AddBufferToRing(strategy, buf);
+                        if (buf->buf_level != BUFFER_LEVEL_DEFAULT) {
+                            //fprintf(stderr, "Returing free buffer %d with level %d\n", buf->buf_id, buf->buf_level);
+                        }
+                        Assert(buf->buf_level == level);
 			return buf;
 		}
 		UnlockBufHdr(buf);
 	}
-
+        if (buf != NULL && buf->buf_level != BUFFER_LEVEL_DEFAULT) {
+                //fprintf(stderr, "No free buffers found with level %d\n", buf->buf_level);
+        }
 	/* Nothing on the freelist, so run the "clock sweep" algorithm */
-	trycounter = NBuffers;
+	trycounter = NBuffers; //I know, we should only do NBuffersByLevel, but this is not initialized still
 	for (;;)
 	{
-		buf = &BufferDescriptors[StrategyControl->nextVictimBuffer];
+		buf = &BufferDescriptors[StrategyControl->nextVictimBufferByLevel[level]];
 
-		if (++StrategyControl->nextVictimBuffer >= NBuffers)
+		if (++StrategyControl->nextVictimBufferByLevel[level] >= NBuffers)
 		{
-			StrategyControl->nextVictimBuffer = 0;
-			StrategyControl->completePasses++;
+                    /* We need to determine next victim buffer. We assume
+                     * for now, that we can sweep buffers by changing
+                     * buf_level. I don't know if such feature will be used
+                     * or not.
+                     */    
+                    StrategyControl->nextVictimBufferByLevel[level] = findFisrtBufferByLevel(level);
+                    
+                    Assert(StrategyControl->nextVictimBufferByLevel[level] >= 0);
+                    
+                    StrategyControl->completePasses++;
 		}
 
+                /* If buffer doesn't have required level move next. It is almost
+                 * same as code checking if buffer is pinned, but we don't use
+                 * locking here.
+                 */
+                if (buf->buf_level != level) {
+                    if (--trycounter == 0)
+                        elog(ERROR, "no unpinned buffers available with requested level");
+                    //fprintf(stderr, "Skipping in clock sweep buffer %d due to buffer level mismatch %d\n", buf->buf_id, buf->buf_level);
+                    continue;
+                }
+
+                Assert(buf->buf_level == level);
+                
 		/*
 		 * If the buffer is pinned or has a nonzero usage_count, we cannot use
 		 * it; decrement the usage_count (unless pinned) and keep scanning.
@@ -195,6 +253,10 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
 				/* Found a usable buffer */
 				if (strategy != NULL)
 					AddBufferToRing(strategy, buf);
+                                if (buf->buf_level != BUFFER_LEVEL_DEFAULT) {
+                                        //fprintf(stderr, "Returing free buffer from clock sweep %d with level %d\n", buf->buf_id, buf->buf_level);
+                                }
+                                Assert(buf->buf_level == level);
 				return buf;
 			}
 		}
@@ -231,10 +293,10 @@ StrategyFreeBuffer(volatile BufferDesc *buf)
 	 */
 	if (buf->freeNext == FREENEXT_NOT_IN_LIST)
 	{
-		buf->freeNext = StrategyControl->firstFreeBuffer;
+		buf->freeNext = StrategyControl->firstFreeBufferByLevel[buf->buf_level];
 		if (buf->freeNext < 0)
-			StrategyControl->lastFreeBuffer = buf->buf_id;
-		StrategyControl->firstFreeBuffer = buf->buf_id;
+			//StrategyControl->lastFreeBufferByLevel[buf->buf_level] = buf->buf_id;
+		StrategyControl->firstFreeBufferByLevel[buf->buf_level] = buf->buf_id;
 	}
 
 	LWLockRelease(BufFreelistLock);
@@ -257,7 +319,8 @@ StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
 	int			result;
 
 	LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
-	result = StrategyControl->nextVictimBuffer;
+        //TODO We take next vitim buffer from default level, as we have many levels
+	result = StrategyControl->nextVictimBufferByLevel[BUFFER_LEVEL_DEFAULT];
 	if (complete_passes)
 		*complete_passes = StrategyControl->completePasses;
 	if (num_buf_alloc)
@@ -326,6 +389,11 @@ StrategyInitialize(bool init)
 
 	if (!found)
 	{
+		int i;
+		BufferDesc *buf;
+		BufferLevel level;
+                int previousBufferLevel;
+                
 		/*
 		 * Only done once, usually in postmaster
 		 */
@@ -335,12 +403,38 @@ StrategyInitialize(bool init)
 		 * Grab the whole linked list of free buffers for our strategy. We
 		 * assume it was previously set up by InitBufferPool().
 		 */
-		StrategyControl->firstFreeBuffer = 0;
-		StrategyControl->lastFreeBuffer = NBuffers - 1;
-
-		/* Initialize the clock sweep pointer */
-		StrategyControl->nextVictimBuffer = 0;
+		//StrategyControl->firstFreeBuffer = 0;
+		//StrategyControl->lastFreeBuffer = NBuffers - 1;
+		//Devo mode:
+		// - we don't set lastFreeBuffer is never used
+		for (i=0; i < BUFFER_LEVEL_SIZE; i++) {
+			StrategyControl->firstFreeBufferByLevel[i] = FREENEXT_END_OF_LIST;
+			//StrategyControl->lastFreeBufferByLevel[i] = FREENEXT_END_OF_LIST;
+		}
 
+		buf = BufferDescriptors;
+		for (i=0; i < NBuffers; i++, buf++) {
+			level = buf->buf_level;
+			if (StrategyControl->firstFreeBufferByLevel[level] == FREENEXT_END_OF_LIST) {
+				StrategyControl->firstFreeBufferByLevel[level] = i;
+			}
+		}
+                previousBufferLevel = NBuffers - 1;
+                
+                /* Coerce last free nexts. */
+                for (level = BUFFER_LEVEL_SIZE - 1; level > -1; level--) {
+                    if (StrategyControl->firstFreeBufferByLevel[level] != FREENEXT_END_OF_LIST) {
+                        BufferDescriptors[previousBufferLevel].freeNext = FREENEXT_END_OF_LIST;
+                        previousBufferLevel = StrategyControl->firstFreeBufferByLevel[level] - 1;
+
+                    }
+                    StrategyControl->nextVictimBufferByLevel[level] 
+                            = StrategyControl->firstFreeBufferByLevel[level];                
+                }
+
+                //Just simple check if everything is ok
+                Assert(previousBufferLevel == -1);
+                
 		/* Clear statistics */
 		StrategyControl->completePasses = 0;
 		StrategyControl->numBufferAllocs = 0;
@@ -431,7 +525,7 @@ FreeAccessStrategy(BufferAccessStrategy strategy)
  * The bufhdr spin lock is held on the returned buffer.
  */
 static volatile BufferDesc *
-GetBufferFromRing(BufferAccessStrategy strategy)
+GetBufferFromRing(BufferAccessStrategy strategy, BufferLevel level)
 {
 	volatile BufferDesc *buf;
 	Buffer		bufnum;
@@ -452,6 +546,8 @@ GetBufferFromRing(BufferAccessStrategy strategy)
 		return NULL;
 	}
 
+        // TODO: If buffer doesn't have requested level tall caller to allocate new buffer. Is it ok?
+        
 	/*
 	 * If the buffer is pinned we cannot use it under any circumstances.
 	 *
@@ -521,3 +617,16 @@ StrategyRejectBuffer(BufferAccessStrategy strategy, volatile BufferDesc *buf)
 
 	return true;
 }
+
+static int findFisrtBufferByLevel(BufferLevel level) {
+    int i;
+    BufferDesc *buf;
+    
+    buf = BufferDescriptors;
+    for (i=0; i < NBuffers; i++, buf++) {
+        if (buf->buf_level == level)
+            return i;
+    }
+    
+    return FREENEXT_END_OF_LIST;
+}
\ No newline at end of file
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index cff64ba..d634f84 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4320,7 +4320,8 @@ DATA(insert OID = 3113 (  last_value	PGNSP PGUID 12 1 0 0 f t f t f i 1 0 2283 "
 DESCR("fetch the last row value");
 DATA(insert OID = 3114 (  nth_value		PGNSP PGUID 12 1 0 0 f t f t f i 2 0 2283 "2283 23" _null_ _null_ _null_ _null_ window_nth_value _null_ _null_ _null_ ));
 DESCR("fetch the Nth row value");
-
+DATA(insert OID = 3833 (  dump_shared_buffers_info	PGNSP PGUID 12 1 10 0 f f f t t s 0 0 2249 "" "{23,23,23}" "{o,o,o}" "{buf_idx,buf_level,buf_free_next}" _null_ dump_shared_buffers_info _null_ _null_ _null_ ));
+DESCR("Dump information about usage of shared buffers");
 
 /*
  * Symbolic values for provolatile column: these indicate whether the result
diff --git a/src/include/storage/buf.h b/src/include/storage/buf.h
index 5347ab0..01b51d2 100644
--- a/src/include/storage/buf.h
+++ b/src/include/storage/buf.h
@@ -43,4 +43,15 @@ typedef int Buffer;
  */
 typedef struct BufferAccessStrategyData *BufferAccessStrategy;
 
+
+/** Type for buffer level. */
+typedef int BufferLevel;
+
+#define BUFFER_LEVEL_END_OF_LIST -2
+#define BUFFER_LEVEL_DEFAULT 0
+#define BUFFER_LEVEL_2ND 1
+/** I wonder to mark end of list with zeroed struct, not to put all here. */
+#define BUFFER_LEVEL_SIZE 2
+
+extern int NBuffersByLevel[BUFFER_LEVEL_SIZE];
 #endif   /* BUF_H */
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 0652bdf..1a019ac 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -104,6 +104,23 @@ typedef struct buftag
 #define BufMappingPartitionLock(hashcode) \
 	((LWLockId) (FirstBufMappingLock + BufTableHashPartition(hashcode)))
 
+/** 
+ * Shared descriptor of buffer level. Each level contains index of super
+ * and lower level. {@code BUFFER_LEVEL_END_OF_LIST} in lower level means end of list,
+ * and {@code BUFFER_LEVEL_END_OF_LIST} in super means begin of list. This structure allows
+ * us to define tree-like structure of levels.
+ * 
+ * Note: Currently we use only 2 levels; 1st Level is standard level, 2nd
+ * level is designed for use with index pages (when requested).
+ */
+typedef struct sbuflevel {
+    BufferLevel index;
+    
+    /** Not used currently. */
+    BufferLevel super;
+    BufferLevel lower;
+} BufferLevelDesc;
+
 /*
  *	BufferDesc -- shared descriptor/state data for a single shared buffer.
  *
@@ -142,10 +159,28 @@ typedef struct sbufdesc
 	int			buf_id;			/* buffer's index number (from 0) */
 	int			freeNext;		/* link in freelist chain */
 
+        /** Holds level of buffer. */
+        BufferLevel             buf_level;
 	LWLockId	io_in_progress_lock;	/* to wait for I/O to complete */
 	LWLockId	content_lock;	/* to lock access to buffer contents */
 } BufferDesc;
 
+/** Struct for holding parameters for getting buffer. We emulate polymorphic 
+ * methods here*/
+typedef struct _StrategyGetBufferOptsV1 {    
+    int version;
+    BufferLevel level;
+}StrategyGetBufferOptsV1;
+
+typedef union _StrategyGetBufferUnion {
+    StrategyGetBufferOptsV1 optsV1;
+}StrategyGetBufferUnion;
+
+typedef struct _StrategyGetBufferOpts {
+    int version;
+    StrategyGetBufferOptsV1 opts;
+}StrategyGetBufferOpts;
+
 #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
 
 /*
@@ -174,14 +209,18 @@ extern PGDLLIMPORT BufferDesc *BufferDescriptors;
 /* in localbuf.c */
 extern BufferDesc *LocalBufferDescriptors;
 
+/** Shared table for buffer levels */
+extern PGDLLIMPORT BufferLevelDesc *bufferLevels;
 
 /*
  * Internal routines: only called by bufmgr
  */
 
 /* freelist.c */
-extern volatile BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy,
-				  bool *lock_held);
+extern volatile BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held);
+
+extern volatile BufferDesc * StrategyGetBufferLevel(BufferAccessStrategy strategy, bool *lock_held, BufferLevel level);
+
 extern void StrategyFreeBuffer(volatile BufferDesc *buf);
 extern bool StrategyRejectBuffer(BufferAccessStrategy strategy,
 					 volatile BufferDesc *buf);
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index b8fc87e..d8b1b46 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -59,6 +59,9 @@ extern PGDLLIMPORT int NLocBuffer;
 extern PGDLLIMPORT Block *LocalBufferBlockPointers;
 extern PGDLLIMPORT int32 *LocalRefCount;
 
+/** Dumps informations about shared buffers. */
+extern Datum dump_shared_buffers_info(PG_FUNCTION_ARGS);
+
 /* special block number for ReadBuffer() */
 #define P_NEW	InvalidBlockNumber		/* grow the file to get a new page */
 
@@ -157,12 +160,21 @@ extern PGDLLIMPORT int32 *LocalRefCount;
 extern void PrefetchBuffer(Relation reln, ForkNumber forkNum,
 			   BlockNumber blockNum);
 extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
+extern Buffer ReadBufferLevel(Relation reln, BlockNumber blockNum, BufferLevel bufferLevel);
 extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum,
 				   BlockNumber blockNum, ReadBufferMode mode,
 				   BufferAccessStrategy strategy);
+extern Buffer ReadBufferExtendedLevel(Relation reln, ForkNumber forkNum,
+				   BlockNumber blockNum, ReadBufferMode mode,
+				   BufferAccessStrategy strategy,
+                                   BufferLevel bufferLevel);
 extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode,
 						  ForkNumber forkNum, BlockNumber blockNum,
 						  ReadBufferMode mode, BufferAccessStrategy strategy);
+extern Buffer ReadBufferWithoutRelcacheLevel(RelFileNode rnode,
+						  ForkNumber forkNum, BlockNumber blockNum,
+						  ReadBufferMode mode, BufferAccessStrategy strategy,
+                                                  BufferLevel bufferLevel);
 extern void ReleaseBuffer(Buffer buffer);
 extern void UnlockReleaseBuffer(Buffer buffer);
 extern void MarkBufferDirty(Buffer buffer);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to