Hi,everyone,
I am performing research work on PCM using PTLsim. I post it here since the
PTLsim development has been stopped and MARSS is based on PTLsim. I find that
there is a loop inside the function void MissBuffer::clock() in the file
dcache.cpp as shown below:
template <int SIZE>
void MissBuffer<SIZE>::clock() {
if likely (freemap.allset()) return;
bool DEBUG = logable(6);
foreach (i, SIZE) {
Entry& mb = missbufs[i];
switch (mb.state) {
case STATE_IDLE:
break;
#ifdef ENABLE_L3_CACHE
case STATE_DELIVER_TO_L3: {
if (DEBUG) logfile << "[vcpu ", mb.threadid, "] mb", i, ": deliver ",
(void*)(Waddr)mb.addr, " to L3 (", mb.cycles, " cycles left) (iter ",
iterations, ")", endl;
mb.cycles--;
if unlikely (!mb.cycles) {
hierarchy.L3.validate(mb.addr);
mb.cycles = L3_LATENCY;
mb.state = STATE_DELIVER_TO_L2;
stats.dcache.missbuf.deliver.mem_to_L3++;
}
break;
}
#endif
case STATE_DELIVER_TO_L2: {
if (DEBUG) logfile << "[vcpu ", mb.threadid, "] mb", i, ": deliver ",
(void*)(Waddr)mb.addr, " to L2 (", mb.cycles, " cycles left) (iter ",
iterations, ")", endl;
mb.cycles--;
if unlikely (!mb.cycles) {
if (DEBUG) logfile << "[vcpu ", mb.threadid, "] mb", i, ": delivered to
L2 (map ", mb.lfrqmap, ")", endl;
hierarchy.L2.validate(mb.addr);
mb.cycles = L2_LATENCY;
mb.state = STATE_DELIVER_TO_L1;
stats.dcache.missbuf.deliver.L3_to_L2++;
}
break;
}
case STATE_DELIVER_TO_L1: {
if (DEBUG) logfile << "[vcpu ", mb.threadid, "] mb", i, ": deliver ",
(void*)(Waddr)mb.addr, " to L1 (", mb.cycles, " cycles left) (iter ",
iterations, ")", endl;
mb.cycles--;
if unlikely (!mb.cycles) {
if (DEBUG) logfile << "[vcpu ", mb.threadid, "] mb", i, ": delivered to
L1 switch (map ", mb.lfrqmap, ")", endl;
if likely (mb.dcache) {
if (DEBUG) logfile << "[vcpu ", mb.threadid, "] mb", i, ": delivered
", (void*)(Waddr)mb.addr, " to L1 dcache (map ", mb.lfrqmap, ")", endl;
// If the L2 line size is bigger than the L1 line size, this will
validate multiple lines in the L1 when an L2 line arrives:
// foreach (i, L2_LINE_SIZE / L1_LINE_SIZE) L1.validate(mb.addr +
i*L1_LINE_SIZE, bitvec<L1_LINE_SIZE>().setall());
hierarchy.L1.validate(mb.addr, bitvec<L1_LINE_SIZE>().setall());
stats.dcache.missbuf.deliver.L2_to_L1D++;
hierarchy.lfrq.wakeup(mb.addr, mb.lfrqmap);
}
if unlikely (mb.icache) {
// Sometimes we can initiate an icache miss on an existing dcache
line in the missbuf
if (DEBUG) logfile << "[vcpu ", mb.threadid, "] mb", i, ": delivered
", (void*)(Waddr)mb.addr, " to L1 icache", endl;
// If the L2 line size is bigger than the L1 line size, this will
validate multiple lines in the L1 when an L2 line arrives:
// foreach (i, L2_LINE_SIZE / L1I_LINE_SIZE) L1I.validate(mb.addr +
i*L1I_LINE_SIZE, bitvec<L1I_LINE_SIZE>().setall());
hierarchy.L1I.validate(mb.addr, bitvec<L1I_LINE_SIZE>().setall());
stats.dcache.missbuf.deliver.L2_to_L1I++;
LoadStoreInfo lsi = 0;
lsi.rob = mb.rob;
lsi.threadid = mb.threadid;
if likely (hierarchy.callback) hierarchy.callback->icache_wakeup(lsi,
mb.addr);
}
assert(!freemap[i]);
freemap[i] = 1;
mb.reset();
count--;
assert(count >= 0);
}
break;
}
}
}
}
It implies that parallel access to memory or caches happens. But how can it
happen with only a memory rank. In addition, the number of Level-1 D-cache has
only 8 banks while the size of MissBuffer is 64. So the banks cannot explain
the parallel access. I get confused. If I want to add some function such as
record the write-memeory time, how can I do it ?
Thanks!
Huang Lianjun
2012-09-30
hljhnu
_______________________________________________
http://www.marss86.org
Marss86-Devel mailing list
[email protected]
https://www.cs.binghamton.edu/mailman/listinfo/marss86-devel