Matthew Poremba has submitted this change. (
https://gem5-review.googlesource.com/c/public/gem5/+/56446 )
Change subject: mem-ruby: Remove DirectoryMemory storage in
MOESI_AMD_BASE-dir
......................................................................
mem-ruby: Remove DirectoryMemory storage in MOESI_AMD_BASE-dir
This protocol is using an old style where read/writes to memory were
being done by writing to a DataBlock in a DirectoryMemory entry. This
results in having multiple copies of memory, leads to stale copies in at
least one memory (usually DRAM), and require --access-backing-store in
most cases to work properly. This changeset removes all references to
getDirectoryEntry(...).DataBlk and instead forwards those reads and
writes to DRAM always.
This results in new transient states BL_WM, BDW_WM, and B_WM which are
blocked states waiting on memory acks indicating a write request is
complete. The appropriate transitions are updates to move to these new
states and stall states are updated to include them. DMA write ACK is
also moved to when the request is sent to memory, rather than when the
request is received.
Change-Id: Ic5bd6a8a8881d7df782e0f7eed8be9d873610e04
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/56446
Reviewed-by: Matt Sinclair <mattdsincl...@gmail.com>
Maintainer: Matt Sinclair <mattdsincl...@gmail.com>
Tested-by: kokoro <noreply+kok...@google.com>
---
M src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
1 file changed, 122 insertions(+), 64 deletions(-)
Approvals:
Matt Sinclair: Looks good to me, approved; Looks good to me, approved
kokoro: Regressions pass
diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
index 2496935..bcac2c1 100644
--- a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
+++ b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
@@ -61,26 +61,29 @@
{
// STATES
state_declaration(State, desc="Directory states",
default="Directory_State_U") {
- U, AccessPermission:Backing_Store, desc="unblocked";
- BL, AccessPermission:Busy, desc="got L3 WB request";
+ U, AccessPermission:Backing_Store, desc="unblocked";
// BL is Busy because it's possible for the data only to be in the
network
// in the WB, L3 has sent it and gone on with its business in possibly
I
// state.
- BDR_M, AccessPermission:Backing_Store, desc="DMA read, blocked
waiting for memory";
- BS_M, AccessPermission:Backing_Store, desc="blocked
waiting for memory";
- BM_M, AccessPermission:Backing_Store, desc="blocked
waiting for memory";
- B_M, AccessPermission:Backing_Store, desc="blocked
waiting for memory";
- BP, AccessPermission:Backing_Store, desc="blocked
waiting for probes, no need for memory";
- BDR_PM, AccessPermission:Backing_Store, desc="DMA read, blocked
waiting for probes and memory";
- BS_PM, AccessPermission:Backing_Store, desc="blocked
waiting for probes and Memory";
- BM_PM, AccessPermission:Backing_Store, desc="blocked
waiting for probes and Memory";
- B_PM, AccessPermission:Backing_Store, desc="blocked
waiting for probes and Memory";
- BDW_P, AccessPermission:Backing_Store, desc="DMA write, blocked
waiting for probes, no need for memory";
- BDR_Pm, AccessPermission:Backing_Store, desc="DMA read, blocked
waiting for probes, already got memory";
- BS_Pm, AccessPermission:Backing_Store, desc="blocked
waiting for probes, already got memory";
- BM_Pm, AccessPermission:Backing_Store, desc="blocked
waiting for probes, already got memory";
- B_Pm, AccessPermission:Backing_Store, desc="blocked
waiting for probes, already got memory";
- B, AccessPermission:Backing_Store, desc="sent
response, Blocked til ack";
+ BL, AccessPermission:Busy, desc="got L3 WB request";
+ BL_WM, AccessPermission:Busy, desc="writing L3 WB to
memory, waiting for ack";
+ BDR_M, AccessPermission:Backing_Store, desc="DMA read, blocked
waiting for memory";
+ BS_M, AccessPermission:Backing_Store, desc="blocked waiting for
memory";
+ BM_M, AccessPermission:Backing_Store, desc="blocked waiting for
memory";
+ B_M, AccessPermission:Backing_Store, desc="blocked waiting for
memory";
+ BP, AccessPermission:Backing_Store, desc="blocked waiting for
probes, no need for memory";
+ BDR_PM, AccessPermission:Backing_Store, desc="DMA read, blocked
waiting for probes and memory";
+ BS_PM, AccessPermission:Backing_Store, desc="blocked waiting for
probes and Memory";
+ BM_PM, AccessPermission:Backing_Store, desc="blocked waiting for
probes and Memory";
+ B_PM, AccessPermission:Backing_Store, desc="blocked waiting for
probes and Memory";
+ BDW_P, AccessPermission:Backing_Store, desc="DMA write, blocked
waiting for probes, no need for memory";
+ BDW_WM, AccessPermission:Backing_Store, desc="DMA write, writing
to memory, waiting for ack";
+ BDR_Pm, AccessPermission:Backing_Store, desc="DMA read, blocked
waiting for probes, already got memory";
+ BS_Pm, AccessPermission:Backing_Store, desc="blocked waiting for
probes, already got memory";
+ BM_Pm, AccessPermission:Backing_Store, desc="blocked waiting for
probes, already got memory";
+ B_Pm, AccessPermission:Backing_Store, desc="blocked waiting for
probes, already got memory";
+ B_WM, AccessPermission:Backing_Store, desc="writing to memory,
waiting for ack";
+ B, AccessPermission:Backing_Store, desc="sent response,
Blocked til ack";
}
// Events
@@ -132,7 +135,6 @@
// DirectoryEntry
structure(Entry, desc="...", interface="AbstractCacheEntry",
main="false") {
State DirectoryState, desc="Directory state";
- DataBlock DataBlk, desc="data for the block";
NetDest VicDirtyIgnore, desc="VicDirty coming from whom to ignore";
}
@@ -195,16 +197,6 @@
return dir_entry;
}
- DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
- TBE tbe := TBEs.lookup(addr);
- if (is_valid(tbe) && tbe.MemData) {
- DPRINTF(RubySlicc, "Returning DataBlk from TBE %s:%s\n", addr, tbe);
- return tbe.DataBlk;
- }
- DPRINTF(RubySlicc, "Returning DataBlk from Dir %s:%s\n", addr,
getDirectoryEntry(addr));
- return getDirectoryEntry(addr).DataBlk;
- }
-
State getState(TBE tbe, CacheEntry entry, Addr addr) {
return getDirectoryEntry(addr).DirectoryState;
}
@@ -379,7 +371,7 @@
trigger(Event:MemData, in_msg.addr, entry, tbe);
DPRINTF(RubySlicc, "%s\n", in_msg);
} else if (in_msg.Type == MemoryRequestType:MEMORY_WB) {
- trigger(Event:WBAck, in_msg.addr, entry, tbe); // ignore WBAcks,
don't care about them.
+ trigger(Event:WBAck, in_msg.addr, entry, tbe);
} else {
DPRINTF(RubySlicc, "%s\n", in_msg.Type);
error("Invalid message");
@@ -920,7 +912,13 @@
action(d_writeDataToMemory, "d", desc="Write data to memory") {
peek(responseNetwork_in, ResponseMsg) {
- getDirectoryEntry(address).DataBlk := in_msg.DataBlk;
+ enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) {
+ out_msg.addr := address;
+ out_msg.Type := MemoryRequestType:MEMORY_WB;
+ out_msg.Sender := machineID;
+ out_msg.MessageSize := MessageSizeType:Writeback_Data;
+ out_msg.DataBlk := in_msg.DataBlk;
+ }
if (tbe.Dirty == false) {
// have to update the TBE, too, because of how this
// directory deals with functional writes
@@ -966,7 +964,6 @@
tbe.WTRequestor := in_msg.WTRequestor;
tbe.LastSender := in_msg.Requestor;
}
- tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for
WBs
tbe.Dirty := false;
if (in_msg.Type == CoherenceRequestType:WriteThrough) {
tbe.DataBlk.copyPartial(in_msg.DataBlk,in_msg.writeMask);
@@ -980,30 +977,37 @@
}
action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") {
- if (tbe.Dirty == false) {
- getDirectoryEntry(address).DataBlk := tbe.DataBlk;
- }
TBEs.deallocate(address);
unset_tbe();
}
action(wd_writeBackData, "wd", desc="Write back data if needed") {
- if (tbe.wtData) {
- getDirectoryEntry(address).DataBlk.copyPartial(tbe.DataBlk,
tbe.writeMask);
- } else if (tbe.atomicData) {
-
tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,tbe.writeMask);
- getDirectoryEntry(address).DataBlk := tbe.DataBlk;
- } else if (tbe.Dirty == false) {
- getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+ if (tbe.wtData || tbe.atomicData || tbe.Dirty == false) {
+ if (tbe.atomicData) {
+ tbe.DataBlk.atomicPartial(tbe.DataBlk, tbe.writeMask);
+ }
+ enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) {
+ out_msg.addr := address;
+ out_msg.Type := MemoryRequestType:MEMORY_WB;
+ out_msg.Sender := machineID;
+ out_msg.MessageSize := MessageSizeType:Writeback_Data;
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.Len := tbe.Len;
+ DPRINTF(ProtocolTrace, "%s\n", out_msg);
+ }
}
}
action(mt_writeMemDataToTBE, "mt", desc="write Mem data to TBE") {
peek(memQueue_in, MemoryMsg) {
if (tbe.wtData == true) {
- // do nothing
+ // Keep the write-through data based on mask, but use the memory
block
+ // for the masked-off data.
+ DataBlock tmpBlk := in_msg.DataBlk;
+ tmpBlk.copyPartial(tbe.DataBlk, tbe.writeMask);
+ tbe.DataBlk := tmpBlk;
} else if (tbe.Dirty == false) {
- tbe.DataBlk := getDirectoryEntry(address).DataBlk;
+ tbe.DataBlk := in_msg.DataBlk;
}
tbe.MemData := true;
}
@@ -1183,6 +1187,10 @@
stall_and_wait(dmaRequestQueue_in, address);
}
+ action(su_stallAndWaitRequest, "su", desc="Stall and wait on the
address") {
+ stall_and_wait(unblockNetwork_in, address);
+ }
+
action(wad_wakeUpDependents, "wad", desc="Wake up any requests waiting
for this address") {
wakeUpBuffers(address);
}
@@ -1199,18 +1207,18 @@
}
// TRANSITIONS
- transition({BL, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BS_PM, BM_PM,
B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B}, {RdBlkS, RdBlkM, RdBlk, CtoD}) {
+ transition({BL, BL_WM, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P,
BDW_WM, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B, B_WM}, {RdBlkS,
RdBlkM, RdBlk, CtoD}) {
st_stallAndWaitRequest;
}
// It may be possible to save multiple invalidations here!
- transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm,
B_Pm, B}, {Atomic, WriteThrough}) {
+ transition({BL, BL_WM, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P,
BDW_WM, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B, B_WM}, {Atomic,
WriteThrough}) {
st_stallAndWaitRequest;
}
// The exit state is always going to be U, so wakeUpDependents logic
should be covered in all the
// transitions which are flowing into U.
- transition({BL, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BS_PM, BM_PM,
B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B}, {DmaRead,DmaWrite}){
+ transition({BL, BL_WM, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P,
BDW_WM, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B, B_WM},
{DmaRead,DmaWrite}){
sd_stallAndWaitRequest;
}
@@ -1233,7 +1241,6 @@
transition(U, DmaWrite, BDW_P) {L3TagArrayRead} {
atd_allocateTBEforDMA;
- da_sendResponseDmaAck;
icd_probeInvCoreDataForDMA;
pd_popDmaRequestQueue;
}
@@ -1293,12 +1300,10 @@
zz_recycleRequestQueue;
}
- transition(BL, CPUData, U) {L3TagArrayWrite, L3DataArrayWrite} {
+ transition(BL, CPUData, BL_WM) {L3TagArrayWrite, L3DataArrayWrite} {
d_writeDataToMemory;
al_allocateL3Block;
pr_profileL3HitMiss; //Must come after al_allocateL3Block and before
dt_deallocateTBE
- wad_wakeUpDependents;
- dt_deallocateTBE;
pr_popResponseQueue;
}
@@ -1308,7 +1313,13 @@
pr_popResponseQueue;
}
- transition({B, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BS_PM, BM_PM,
B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm}, {VicDirty, VicClean}) {
+ transition(BL_WM, WBAck, U) {
+ wad_wakeUpDependents;
+ dt_deallocateTBE;
+ pm_popMemQueue;
+ }
+
+ transition({B, B_WM, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BDW_WM,
BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm}, {VicDirty, VicClean}) {
z_stall;
}
@@ -1316,7 +1327,7 @@
pm_popMemQueue;
}
- transition({U, BL, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BS_PM,
BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B}, StaleVicDirty) {
+ transition({U, BL, BL_WM, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P,
BDW_WM, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B, B_WM},
StaleVicDirty) {
rv_removeVicDirtyIgnore;
w_sendResponseWBAck;
p_popRequestQueue;
@@ -1376,7 +1387,7 @@
pm_popMemQueue;
}
- transition(BS_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
+ transition(BS_M, MemData, B_WM){L3TagArrayWrite, L3DataArrayWrite} {
mt_writeMemDataToTBE;
s_sendResponseS;
wd_writeBackData;
@@ -1385,7 +1396,7 @@
pm_popMemQueue;
}
- transition(BM_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
+ transition(BM_M, MemData, B_WM){L3TagArrayWrite, L3DataArrayWrite} {
mt_writeMemDataToTBE;
m_sendResponseM;
wd_writeBackData;
@@ -1394,7 +1405,7 @@
pm_popMemQueue;
}
- transition(B_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
+ transition(B_M, MemData, B_WM){L3TagArrayWrite, L3DataArrayWrite} {
mt_writeMemDataToTBE;
es_sendResponseES;
wd_writeBackData;
@@ -1403,7 +1414,7 @@
pm_popMemQueue;
}
- transition(BS_M, L3Hit, B) {L3TagArrayWrite, L3DataArrayWrite} {
+ transition(BS_M, L3Hit, B_WM) {L3TagArrayWrite, L3DataArrayWrite} {
s_sendResponseS;
wd_writeBackData;
alwt_allocateL3BlockOnWT;
@@ -1411,7 +1422,7 @@
ptl_popTriggerQueue;
}
- transition(BM_M, L3Hit, B) {L3DataArrayWrite, L3TagArrayWrite} {
+ transition(BM_M, L3Hit, B_WM) {L3DataArrayWrite, L3TagArrayWrite} {
m_sendResponseM;
wd_writeBackData;
alwt_allocateL3BlockOnWT;
@@ -1419,7 +1430,7 @@
ptl_popTriggerQueue;
}
- transition(B_M, L3Hit, B) {L3DataArrayWrite, L3TagArrayWrite} {
+ transition(B_M, L3Hit, B_WM) {L3DataArrayWrite, L3TagArrayWrite} {
es_sendResponseES;
wd_writeBackData;
alwt_allocateL3BlockOnWT;
@@ -1453,12 +1464,18 @@
pt_popTriggerQueue;
}
- transition(BDW_P, ProbeAcksComplete, U) {
+ transition(BDW_P, ProbeAcksComplete, BDW_WM) {
+ wd_writeBackData;
+ da_sendResponseDmaAck;
+ pt_popTriggerQueue;
+ }
+
+ transition(BDW_WM, WBAck, U) {
// Check for pending requests from the core we put to sleep while
waiting
// for a response
wada_wakeUpAllDependentsAddr;
dt_deallocateTBE;
- pt_popTriggerQueue;
+ pm_popMemQueue;
}
transition(BDR_Pm, ProbeAcksComplete, U) {
@@ -1470,7 +1487,7 @@
pt_popTriggerQueue;
}
- transition(BS_Pm, ProbeAcksComplete, B){L3DataArrayWrite,
L3TagArrayWrite} {
+ transition(BS_Pm, ProbeAcksComplete, B_WM){L3DataArrayWrite,
L3TagArrayWrite} {
sf_setForwardReqTime;
s_sendResponseS;
wd_writeBackData;
@@ -1479,7 +1496,7 @@
pt_popTriggerQueue;
}
- transition(BM_Pm, ProbeAcksComplete, B){L3DataArrayWrite,
L3TagArrayWrite} {
+ transition(BM_Pm, ProbeAcksComplete, B_WM){L3DataArrayWrite,
L3TagArrayWrite} {
sf_setForwardReqTime;
m_sendResponseM;
wd_writeBackData;
@@ -1488,7 +1505,7 @@
pt_popTriggerQueue;
}
- transition(B_Pm, ProbeAcksComplete, B){L3DataArrayWrite,
L3TagArrayWrite} {
+ transition(B_Pm, ProbeAcksComplete, B_WM){L3DataArrayWrite,
L3TagArrayWrite} {
sf_setForwardReqTime;
es_sendResponseES;
wd_writeBackData;
@@ -1497,7 +1514,7 @@
pt_popTriggerQueue;
}
- transition(BP, ProbeAcksComplete, B){L3TagArrayWrite, L3TagArrayWrite} {
+ transition(BP, ProbeAcksComplete, B_WM){L3TagArrayWrite,
L3TagArrayWrite} {
sf_setForwardReqTime;
c_sendResponseCtoD;
wd_writeBackData;
@@ -1505,4 +1522,17 @@
dt_deallocateTBE;
pt_popTriggerQueue;
}
+
+ transition(B_WM, WBAck, B) {
+ wada_wakeUpAllDependentsAddr;
+ pm_popMemQueue;
+ }
+
+ transition(B_WM, UnblockWriteThrough) {
+ z_stall;
+ }
+
+ transition(B_WM, CoreUnblock) {
+ su_stallAndWaitRequest;
+ }
}
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/56446
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Ic5bd6a8a8881d7df782e0f7eed8be9d873610e04
Gerrit-Change-Number: 56446
Gerrit-PatchSet: 3
Gerrit-Owner: Matthew Poremba <matthew.pore...@amd.com>
Gerrit-Reviewer: Jason Lowe-Power <ja...@lowepower.com>
Gerrit-Reviewer: Matt Sinclair <mattdsincl...@gmail.com>
Gerrit-Reviewer: Matthew Poremba <matthew.pore...@amd.com>
Gerrit-Reviewer: kokoro <noreply+kok...@google.com>
Gerrit-MessageType: merged
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s