Matthew Poremba has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/56446 )

Change subject: mem-ruby: Remove DirectoryMemory storage in MOESI_AMD_BASE-dir
......................................................................

mem-ruby: Remove DirectoryMemory storage in MOESI_AMD_BASE-dir

This protocol is using an old style where read/writes to memory were
being done by writing to a DataBlock in a DirectoryMemory entry. This
results in having multiple copies of memory, leads to stale copies in at
least one memory (usually DRAM), and require --access-backing-store in
most cases to work properly. This changeset removes all references to
getDirectoryEntry(...).DataBlk and instead forwards those reads and
writes to DRAM always.

This results in new transient states BL_WM, BDW_WM, and B_WM which are
blocked states waiting on memory acks indicating a write request is
complete. The appropriate transitions are updates to move to these new
states and stall states are updated to include them. DMA write ACK is
also moved to when the request is sent to memory, rather than when the
request is received.

Change-Id: Ic5bd6a8a8881d7df782e0f7eed8be9d873610e04
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/56446
Reviewed-by: Matt Sinclair <mattdsincl...@gmail.com>
Maintainer: Matt Sinclair <mattdsincl...@gmail.com>
Tested-by: kokoro <noreply+kok...@google.com>
---
M src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
1 file changed, 122 insertions(+), 64 deletions(-)

Approvals:
  Matt Sinclair: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass




diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
index 2496935..bcac2c1 100644
--- a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
+++ b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
@@ -61,26 +61,29 @@
 {
   // STATES
state_declaration(State, desc="Directory states", default="Directory_State_U") {
-    U, AccessPermission:Backing_Store,                 desc="unblocked";
-    BL, AccessPermission:Busy,                  desc="got L3 WB request";
+    U, AccessPermission:Backing_Store,          desc="unblocked";
// BL is Busy because it's possible for the data only to be in the network // in the WB, L3 has sent it and gone on with its business in possibly I
     // state.
- BDR_M, AccessPermission:Backing_Store, desc="DMA read, blocked waiting for memory"; - BS_M, AccessPermission:Backing_Store, desc="blocked waiting for memory"; - BM_M, AccessPermission:Backing_Store, desc="blocked waiting for memory"; - B_M, AccessPermission:Backing_Store, desc="blocked waiting for memory"; - BP, AccessPermission:Backing_Store, desc="blocked waiting for probes, no need for memory"; - BDR_PM, AccessPermission:Backing_Store, desc="DMA read, blocked waiting for probes and memory"; - BS_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory"; - BM_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory"; - B_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory"; - BDW_P, AccessPermission:Backing_Store, desc="DMA write, blocked waiting for probes, no need for memory"; - BDR_Pm, AccessPermission:Backing_Store, desc="DMA read, blocked waiting for probes, already got memory"; - BS_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; - BM_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; - B_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; - B, AccessPermission:Backing_Store, desc="sent response, Blocked til ack";
+    BL, AccessPermission:Busy,                  desc="got L3 WB request";
+ BL_WM, AccessPermission:Busy, desc="writing L3 WB to memory, waiting for ack"; + BDR_M, AccessPermission:Backing_Store, desc="DMA read, blocked waiting for memory"; + BS_M, AccessPermission:Backing_Store, desc="blocked waiting for memory"; + BM_M, AccessPermission:Backing_Store, desc="blocked waiting for memory"; + B_M, AccessPermission:Backing_Store, desc="blocked waiting for memory"; + BP, AccessPermission:Backing_Store, desc="blocked waiting for probes, no need for memory"; + BDR_PM, AccessPermission:Backing_Store, desc="DMA read, blocked waiting for probes and memory"; + BS_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory"; + BM_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory"; + B_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory"; + BDW_P, AccessPermission:Backing_Store, desc="DMA write, blocked waiting for probes, no need for memory"; + BDW_WM, AccessPermission:Backing_Store, desc="DMA write, writing to memory, waiting for ack"; + BDR_Pm, AccessPermission:Backing_Store, desc="DMA read, blocked waiting for probes, already got memory"; + BS_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; + BM_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; + B_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; + B_WM, AccessPermission:Backing_Store, desc="writing to memory, waiting for ack"; + B, AccessPermission:Backing_Store, desc="sent response, Blocked til ack";
   }

   // Events
@@ -132,7 +135,6 @@
   // DirectoryEntry
structure(Entry, desc="...", interface="AbstractCacheEntry", main="false") {
     State DirectoryState,          desc="Directory state";
-    DataBlock DataBlk,             desc="data for the block";
     NetDest VicDirtyIgnore,  desc="VicDirty coming from whom to ignore";
   }

@@ -195,16 +197,6 @@
     return dir_entry;
   }

-  DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
-    TBE tbe := TBEs.lookup(addr);
-    if (is_valid(tbe) && tbe.MemData) {
-      DPRINTF(RubySlicc, "Returning DataBlk from TBE %s:%s\n", addr, tbe);
-      return tbe.DataBlk;
-    }
- DPRINTF(RubySlicc, "Returning DataBlk from Dir %s:%s\n", addr, getDirectoryEntry(addr));
-    return getDirectoryEntry(addr).DataBlk;
-  }
-
   State getState(TBE tbe, CacheEntry entry, Addr addr) {
     return getDirectoryEntry(addr).DirectoryState;
   }
@@ -379,7 +371,7 @@
           trigger(Event:MemData, in_msg.addr, entry, tbe);
           DPRINTF(RubySlicc, "%s\n", in_msg);
         } else if (in_msg.Type == MemoryRequestType:MEMORY_WB) {
- trigger(Event:WBAck, in_msg.addr, entry, tbe); // ignore WBAcks, don't care about them.
+          trigger(Event:WBAck, in_msg.addr, entry, tbe);
         } else {
           DPRINTF(RubySlicc, "%s\n", in_msg.Type);
           error("Invalid message");
@@ -920,7 +912,13 @@

   action(d_writeDataToMemory, "d", desc="Write data to memory") {
     peek(responseNetwork_in, ResponseMsg) {
-      getDirectoryEntry(address).DataBlk := in_msg.DataBlk;
+      enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) {
+        out_msg.addr := address;
+        out_msg.Type := MemoryRequestType:MEMORY_WB;
+        out_msg.Sender := machineID;
+        out_msg.MessageSize := MessageSizeType:Writeback_Data;
+        out_msg.DataBlk := in_msg.DataBlk;
+      }
       if (tbe.Dirty == false) {
           // have to update the TBE, too, because of how this
           // directory deals with functional writes
@@ -966,7 +964,6 @@
         tbe.WTRequestor := in_msg.WTRequestor;
         tbe.LastSender := in_msg.Requestor;
       }
- tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for WBs
       tbe.Dirty := false;
       if (in_msg.Type == CoherenceRequestType:WriteThrough) {
         tbe.DataBlk.copyPartial(in_msg.DataBlk,in_msg.writeMask);
@@ -980,30 +977,37 @@
   }

   action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") {
-    if (tbe.Dirty == false) {
-        getDirectoryEntry(address).DataBlk := tbe.DataBlk;
-    }
     TBEs.deallocate(address);
     unset_tbe();
   }

   action(wd_writeBackData, "wd", desc="Write back data if needed") {
-    if (tbe.wtData) {
- getDirectoryEntry(address).DataBlk.copyPartial(tbe.DataBlk, tbe.writeMask);
-    } else if (tbe.atomicData) {
- tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,tbe.writeMask);
-      getDirectoryEntry(address).DataBlk := tbe.DataBlk;
-    } else if (tbe.Dirty == false) {
-      getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+    if (tbe.wtData || tbe.atomicData || tbe.Dirty == false) {
+      if (tbe.atomicData) {
+        tbe.DataBlk.atomicPartial(tbe.DataBlk, tbe.writeMask);
+      }
+      enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) {
+        out_msg.addr := address;
+        out_msg.Type := MemoryRequestType:MEMORY_WB;
+        out_msg.Sender := machineID;
+        out_msg.MessageSize := MessageSizeType:Writeback_Data;
+        out_msg.DataBlk := tbe.DataBlk;
+        out_msg.Len := tbe.Len;
+        DPRINTF(ProtocolTrace, "%s\n", out_msg);
+      }
     }
   }

   action(mt_writeMemDataToTBE, "mt", desc="write Mem data to TBE") {
     peek(memQueue_in, MemoryMsg) {
       if (tbe.wtData == true) {
-          // do nothing
+ // Keep the write-through data based on mask, but use the memory block
+        // for the masked-off data.
+        DataBlock tmpBlk := in_msg.DataBlk;
+        tmpBlk.copyPartial(tbe.DataBlk, tbe.writeMask);
+        tbe.DataBlk := tmpBlk;
       } else if (tbe.Dirty == false) {
-        tbe.DataBlk := getDirectoryEntry(address).DataBlk;
+        tbe.DataBlk := in_msg.DataBlk;
       }
       tbe.MemData := true;
     }
@@ -1183,6 +1187,10 @@
     stall_and_wait(dmaRequestQueue_in, address);
   }

+ action(su_stallAndWaitRequest, "su", desc="Stall and wait on the address") {
+    stall_and_wait(unblockNetwork_in, address);
+  }
+
action(wad_wakeUpDependents, "wad", desc="Wake up any requests waiting for this address") {
     wakeUpBuffers(address);
   }
@@ -1199,18 +1207,18 @@
   }

   // TRANSITIONS
- transition({BL, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B}, {RdBlkS, RdBlkM, RdBlk, CtoD}) { + transition({BL, BL_WM, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BDW_WM, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B, B_WM}, {RdBlkS, RdBlkM, RdBlk, CtoD}) {
       st_stallAndWaitRequest;
   }

   // It may be possible to save multiple invalidations here!
- transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, {Atomic, WriteThrough}) { + transition({BL, BL_WM, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BDW_WM, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B, B_WM}, {Atomic, WriteThrough}) {
       st_stallAndWaitRequest;
   }

// The exit state is always going to be U, so wakeUpDependents logic should be covered in all the
   // transitions which are flowing into U.
- transition({BL, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B}, {DmaRead,DmaWrite}){ + transition({BL, BL_WM, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BDW_WM, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B, B_WM}, {DmaRead,DmaWrite}){
     sd_stallAndWaitRequest;
   }

@@ -1233,7 +1241,6 @@

   transition(U, DmaWrite, BDW_P) {L3TagArrayRead} {
     atd_allocateTBEforDMA;
-    da_sendResponseDmaAck;
     icd_probeInvCoreDataForDMA;
     pd_popDmaRequestQueue;
   }
@@ -1293,12 +1300,10 @@
     zz_recycleRequestQueue;
   }

-  transition(BL, CPUData, U) {L3TagArrayWrite, L3DataArrayWrite} {
+  transition(BL, CPUData, BL_WM) {L3TagArrayWrite, L3DataArrayWrite} {
     d_writeDataToMemory;
     al_allocateL3Block;
pr_profileL3HitMiss; //Must come after al_allocateL3Block and before dt_deallocateTBE
-    wad_wakeUpDependents;
-    dt_deallocateTBE;
     pr_popResponseQueue;
   }

@@ -1308,7 +1313,13 @@
     pr_popResponseQueue;
   }

- transition({B, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm}, {VicDirty, VicClean}) {
+  transition(BL_WM, WBAck, U) {
+    wad_wakeUpDependents;
+    dt_deallocateTBE;
+    pm_popMemQueue;
+  }
+
+ transition({B, B_WM, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BDW_WM, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm}, {VicDirty, VicClean}) {
     z_stall;
   }

@@ -1316,7 +1327,7 @@
     pm_popMemQueue;
   }

- transition({U, BL, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B}, StaleVicDirty) { + transition({U, BL, BL_WM, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BDW_WM, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B, B_WM}, StaleVicDirty) {
     rv_removeVicDirtyIgnore;
     w_sendResponseWBAck;
     p_popRequestQueue;
@@ -1376,7 +1387,7 @@
     pm_popMemQueue;
   }

-  transition(BS_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
+  transition(BS_M, MemData, B_WM){L3TagArrayWrite, L3DataArrayWrite} {
     mt_writeMemDataToTBE;
     s_sendResponseS;
     wd_writeBackData;
@@ -1385,7 +1396,7 @@
     pm_popMemQueue;
   }

-  transition(BM_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
+  transition(BM_M, MemData, B_WM){L3TagArrayWrite, L3DataArrayWrite} {
     mt_writeMemDataToTBE;
     m_sendResponseM;
     wd_writeBackData;
@@ -1394,7 +1405,7 @@
     pm_popMemQueue;
   }

-  transition(B_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
+  transition(B_M, MemData, B_WM){L3TagArrayWrite, L3DataArrayWrite} {
     mt_writeMemDataToTBE;
     es_sendResponseES;
     wd_writeBackData;
@@ -1403,7 +1414,7 @@
     pm_popMemQueue;
   }

-  transition(BS_M, L3Hit, B) {L3TagArrayWrite, L3DataArrayWrite} {
+  transition(BS_M, L3Hit, B_WM) {L3TagArrayWrite, L3DataArrayWrite} {
     s_sendResponseS;
     wd_writeBackData;
     alwt_allocateL3BlockOnWT;
@@ -1411,7 +1422,7 @@
     ptl_popTriggerQueue;
   }

-  transition(BM_M, L3Hit, B) {L3DataArrayWrite, L3TagArrayWrite} {
+  transition(BM_M, L3Hit, B_WM) {L3DataArrayWrite, L3TagArrayWrite} {
     m_sendResponseM;
     wd_writeBackData;
     alwt_allocateL3BlockOnWT;
@@ -1419,7 +1430,7 @@
     ptl_popTriggerQueue;
   }

-  transition(B_M, L3Hit, B) {L3DataArrayWrite, L3TagArrayWrite} {
+  transition(B_M, L3Hit, B_WM) {L3DataArrayWrite, L3TagArrayWrite} {
     es_sendResponseES;
     wd_writeBackData;
     alwt_allocateL3BlockOnWT;
@@ -1453,12 +1464,18 @@
     pt_popTriggerQueue;
   }

-  transition(BDW_P, ProbeAcksComplete, U) {
+  transition(BDW_P, ProbeAcksComplete, BDW_WM) {
+    wd_writeBackData;
+    da_sendResponseDmaAck;
+    pt_popTriggerQueue;
+  }
+
+  transition(BDW_WM, WBAck, U) {
// Check for pending requests from the core we put to sleep while waiting
     // for a response
     wada_wakeUpAllDependentsAddr;
     dt_deallocateTBE;
-    pt_popTriggerQueue;
+    pm_popMemQueue;
   }

   transition(BDR_Pm, ProbeAcksComplete, U) {
@@ -1470,7 +1487,7 @@
     pt_popTriggerQueue;
   }

- transition(BS_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} { + transition(BS_Pm, ProbeAcksComplete, B_WM){L3DataArrayWrite, L3TagArrayWrite} {
     sf_setForwardReqTime;
     s_sendResponseS;
     wd_writeBackData;
@@ -1479,7 +1496,7 @@
     pt_popTriggerQueue;
   }

- transition(BM_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} { + transition(BM_Pm, ProbeAcksComplete, B_WM){L3DataArrayWrite, L3TagArrayWrite} {
     sf_setForwardReqTime;
     m_sendResponseM;
     wd_writeBackData;
@@ -1488,7 +1505,7 @@
     pt_popTriggerQueue;
   }

- transition(B_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} { + transition(B_Pm, ProbeAcksComplete, B_WM){L3DataArrayWrite, L3TagArrayWrite} {
     sf_setForwardReqTime;
     es_sendResponseES;
     wd_writeBackData;
@@ -1497,7 +1514,7 @@
     pt_popTriggerQueue;
   }

-  transition(BP, ProbeAcksComplete, B){L3TagArrayWrite, L3TagArrayWrite} {
+ transition(BP, ProbeAcksComplete, B_WM){L3TagArrayWrite, L3TagArrayWrite} {
     sf_setForwardReqTime;
     c_sendResponseCtoD;
     wd_writeBackData;
@@ -1505,4 +1522,17 @@
     dt_deallocateTBE;
     pt_popTriggerQueue;
   }
+
+  transition(B_WM, WBAck, B) {
+    wada_wakeUpAllDependentsAddr;
+    pm_popMemQueue;
+  }
+
+  transition(B_WM, UnblockWriteThrough) {
+    z_stall;
+  }
+
+  transition(B_WM, CoreUnblock) {
+    su_stallAndWaitRequest;
+  }
 }

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/56446
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Ic5bd6a8a8881d7df782e0f7eed8be9d873610e04
Gerrit-Change-Number: 56446
Gerrit-PatchSet: 3
Gerrit-Owner: Matthew Poremba <matthew.pore...@amd.com>
Gerrit-Reviewer: Jason Lowe-Power <ja...@lowepower.com>
Gerrit-Reviewer: Matt Sinclair <mattdsincl...@gmail.com>
Gerrit-Reviewer: Matthew Poremba <matthew.pore...@amd.com>
Gerrit-Reviewer: kokoro <noreply+kok...@google.com>
Gerrit-MessageType: merged
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

Reply via email to