Hello, Please CC: replies to me, since I am not subscribed. >> http://citeseer.nj.nec.com/63909.html > The "resistance to scanning" seemed interesting, maybe one-time > activities like a "find" run or big cat/dd will have less impact with > this. Exactly. But not only that. I have made some experimental tests. When you have totally random access to data, you get degradation to LRU performance (but no worse). However, when doing normal work, results are quite encouraging. Speedup percentage is computed assuming processing in-memory buffer takes x and loading on-disk buffer takes 100x time: #blocks buffers P hitLRU hit2Q 2Q+ speedup 262400 16384 8 19.29% 24.89% 29% 7.365% 262400 16384 4 11.56% 14.23% 23% 3.084% 1024K 16384 32 16.90% 22.82% 35% 7.573% 1024K 16384 16 9.00% 11.94% 33% 3.305% 1024K 16384 8 4.94% 6.38% 29% 1.515% 4096K 16384 64 8.40% 11.39% 36% 3.339% 4096K 16384 32 4.32% 5.79% 34% 1.536% I used reasonable figures for filesystem size (1G, 4G and 16G) and buffer cache (64MB), with blocksize 4K. All simulations used 10% for A1in and 25% for A1out queues. Almost all simulations show over 30% better hit rate as LRU, translating to ~3% real time speedup for normal workload. Speedup for scanning type workload (find, cat large file, etc.) should be even better - write access pattern generator and try it :-) Constant P determines "randomness" of accesses as follows: int x = random() % (NR * LEVEL); for (int i = LEVEL - 1; i > 0; --i) if (x >= NR * i) x = (x - NR * i) / (i + 1); where x is generated block # to be accessed, LEVEL is P and NR is # of disk blocks. I attach a crude program to simulate LRU, 2Q and FIFO buffer management policies. If you want, you can play with parameters a little bit and simulate other workloads (e.g., record what's going on in the system and then simulate real system block accesses). I would like to hear from you if you have some interesting results. -- Ivan Schreter [EMAIL PROTECTED]
#include <stdio.h> #include <string.h> #include <stdlib.h> #include <time.h> #include <signal.h> int ops = 0, hit_lru = 0, hit_2q = 0, hit_fifo = 0; #define BUFS 16384 // # of buffer entries in memory #define NR 262400 // # of DB buffers #define LEVEL 8 // max. test level (1=total random) //#define USE_FASTAGE #define BUFSA1IN (BUFS/10) #define BUFSA1OUT (BUFS/4) typedef struct { int next; int prev; int id; int age; } lru_entry; lru_entry lru[BUFS]; int lru_idx[NR]; int lru_free = 0, lru_head = 0, lru_tail = 0; void lru_page(int x) { if (lru_idx[x] >= 0) { hit_lru++; // move to beginning of the buffer x = lru_idx[x]; if (x == lru_head) return; lru[lru[x].prev].next = lru[x].next; if (x == lru_tail) { lru_tail = lru[x].prev; } else { lru[lru[x].next].prev = lru[x].prev; } lru[x].prev = -1; lru[x].next = lru_head; lru[lru_head].prev = x; lru_head = x; return; } if (lru_free == -1) { // remove one from tail lru[lru_tail].next = lru_free; lru[lru_tail = lru[lru_free = lru_tail].prev].next = -1; lru_idx[lru[lru_free].id] = -1; } // add to head int r = lru_free; lru_free = lru[r].next; lru[r].prev = -1; lru[r].next = lru_head; lru[r].id = x; lru[lru_head].prev = r; if (lru_head < 0) lru_tail = r; lru_head = r; lru_idx[x] = r; } lru_entry r2q[BUFS]; int r2q_a1out[BUFSA1OUT]; int r2q_idx[NR]; int r2q_free = 0, r2q_head = -1, r2q_tail = -1; int r2q_a1i_head = -1, r2q_a1i_tail = -1, r2q_a1i_cnt = 0; int r2q_a1o_head = 0, r2q_a1o_tail = 0, r2q_age = 0; #define R2Q_MASK 0xffffff #define R2Q_FLG_MASK 0xff000000 #define R2Q_A1I_FLG 0x1000000 #define R2Q_A1O_FLG 0x2000000 #define R2Q_AM_FLG 0x4000000 void r2q_reclaim() { // free one page if (r2q_a1i_cnt > BUFSA1IN) { // remove last page from A1in and put to A1out int r = r2q_a1i_tail; #if 0 if (r < 0) { printf("Reclaim err: %d - %d\n", r2q_a1i_tail, r2q_a1i_head); raise(SIGSEGV); } #endif r2q_a1i_tail = r2q[r].prev; r2q[r2q_a1i_tail].next = -1; --r2q_a1i_cnt; //printf("Del2 %d, cnt = %d\n", r, r2q_a1i_cnt); int x = r2q[r].id; r2q[r].next = r2q_free; r2q_free = r; r2q_idx[x] = R2Q_MASK | R2Q_A1O_FLG; //if (x >= NR || x < 0) printf("set at %d = %d (@%d), head %d\n", // r2q_a1o_tail, x, r, r2q_a1o_head); r2q_a1out[r2q_a1o_tail++] = x; if (r2q_a1o_tail == BUFSA1OUT) r2q_a1o_tail = 0; if (r2q_a1o_tail == r2q_a1o_head) { // overflow, remove from buf int d = r2q_a1out[r2q_a1o_head++]; if (r2q_a1o_head == BUFSA1OUT) r2q_a1o_head = 0; // unmark in index r2q_idx[d] &= ~R2Q_A1O_FLG; } return; } // remove from tail of Am, do not put in A1out (it was least recently // used, so no point in keeping it) int x = r2q[r2q_tail].id; int r = r2q_tail; r2q_idx[x] = R2Q_MASK; r2q_tail = r2q[r].prev; r2q[r2q_tail].next = -1; r2q[r].next = r2q_free; r2q_free = r; } void r2q_page(int x) { int r = r2q_idx[x] & R2Q_MASK, rm = r2q_idx[x] & R2Q_FLG_MASK; if (rm & R2Q_AM_FLG) { // already in buffer hit_2q++; // in LRU buffer, move to first if (r == r2q_head) return; r2q[r2q[r].prev].next = r2q[r].next; if (r == r2q_tail) { r2q_tail = r2q[r].prev; } else { r2q[r2q[r].next].prev = r2q[r].prev; } r2q[r].prev = -1; r2q[r].next = r2q_head; r2q[r2q_head].prev = r; r2q_head = r; return; } else if (rm & R2Q_A1O_FLG) { // was in A1out, put to head of Am if (rm & R2Q_A1I_FLG) { // remove from A1in (already in memory) hit_2q++; --r2q_a1i_cnt; if (r == r2q_a1i_head) { r2q_a1i_head = r2q[r].next; if (r2q_a1i_cnt) r2q[r2q_a1i_head].prev = -1; } else if (r == r2q_a1i_tail) { r2q_a1i_tail = r2q[r].prev; r2q[r2q_a1i_tail].next = -1; } else { // in the middle r2q[r2q[r].next].prev = r2q[r].prev; r2q[r2q[r].prev].next = r2q[r].next; } //printf("Del %d, cnt = %d\n", r, r2q_a1i_cnt); } else { // not yet in memory if (r2q_free < 0) r2q_reclaim(); r = r2q_free; r2q_free = r2q[r].next; r2q[r].id = x; r2q_idx[x] = r; } // add to head of Am r2q[r].prev = -1; r2q[r].next = r2q_head; if (r2q_head < 0) r2q_tail = r; else r2q[r2q_head].prev = r; r2q_head = r; // must check out flag, may be changed by reclaim r2q_idx[x] = r | R2Q_AM_FLG | (r2q_idx[x] & R2Q_A1O_FLG); return; } else if (rm & R2Q_A1I_FLG) { // do nothing - is in memory in A1in queue hit_2q++; #ifdef USE_FASTAGE #warning Using FastAge // mark into R2Q_A1O when in last 25% or so... int age = r2q_age - r2q[r].age; // age cannot be significantly more than r2q_cnt, normally is // less if ((r2q_a1i_cnt - age) < (r2q_a1i_cnt << 2)) { // is in last 25% of its life, put to out pages r2q_idx[x] |= R2Q_A1O_FLG; r2q_a1out[r2q_a1o_tail++] = x; if (r2q_a1o_tail == BUFSA1OUT) r2q_a1o_tail = 0; if (r2q_a1o_tail == r2q_a1o_head) { // overflow, remove from buf int d = r2q_a1out[r2q_a1o_head++]; if (r2q_a1o_head == BUFSA1OUT) r2q_a1o_head = 0; // unmark in index r2q_idx[d] &= ~R2Q_A1O_FLG; } } #endif return; } else { // is in no queue, load & add to head of A1in if (r2q_free < 0) r2q_reclaim(); r = r2q_free; r2q_free = r2q[r].next; r2q[r].id = x; r2q[r].age = r2q_age++; r2q[r].next = r2q_a1i_head; r2q[r].prev = -1; if (!r2q_a1i_cnt) { // first item, set also tail r2q_a1i_tail = r; } else { r2q[r2q_a1i_head].prev = r; } r2q_a1i_cnt++; r2q_a1i_head = r; //printf("Add %d, cnt = %d\n", r, r2q_a1i_cnt); //if (r == 0 && r2q_a1i_cnt > 1) raise(SIGSTOP); r2q_idx[x] = r | R2Q_A1I_FLG; } } int fifo_idx[NR]; int fifo[BUFS]; int fifo_head = 0, fifo_tail = 0; void fifo_page(int x) { if (fifo_idx[x] >= 0) { hit_fifo++; return; } fifo_idx[x] = fifo_head; fifo[fifo_head++] = x; if (fifo_head == BUFS) fifo_head = 0; if (fifo_head == fifo_tail) { fifo_idx[fifo[fifo_tail++]] = -1; if (fifo_tail == BUFS) fifo_tail = 0; } } int main() { memset(lru_idx, -1, sizeof(lru_idx)); memset(fifo_idx, -1, sizeof(fifo_idx)); for (int i = 0; i < NR; ++i) r2q_idx[i] = R2Q_MASK; for (int i = 0; i < BUFS - 1; ++i) { lru[i].next = i + 1; r2q[i].next = i + 1; } lru[BUFS-1].next = -1; r2q[BUFS-1].next = -1; time_t tm = time(NULL); for (;;) { int x = random() % (NR * LEVEL); for (int i = LEVEL - 1; i > 0; --i) if (x >= NR * i) x = (x - NR * i) / (i + 1); lru_page(x); r2q_page(x); fifo_page(x); ops++; time_t cur_tm = time(NULL); if (cur_tm != tm) { int lru_ptime = (ops + 99 * (ops - hit_lru)) / 1000; int r2q_ptime = (ops + 99 * (ops - hit_2q)) / 1000; printf("Ops: %d, LRU: %d %.2f%%, 2Q: %d %.2f%%, " "FIFO: %d %.2f%%, 2Q/LRU %.2f\n" "LRU ptime: %d, 2Q ptime: %d, " "ratio 2Q/LRU: %.3f%% +%.3f%%\n", ops / 1000, hit_lru / 1000, hit_lru * 100.0 / ops, hit_2q / 1000, hit_2q * 100.0 / ops, hit_fifo / 1000, hit_fifo * 100.0 / ops, hit_2q * 1.0 / hit_lru, lru_ptime, r2q_ptime, r2q_ptime * 100.0 / lru_ptime, lru_ptime * 100.0 / r2q_ptime - 100.0); tm = cur_tm; } } return 0; }