[gem5-dev] Change in gem5/gem5[develop]: mem: Support stats for Cache hitLatency

Huang Jiasen (Gerrit) via gem5-dev Mon, 22 Nov 2021 21:46:03 -0800

Huang Jiasen has uploaded this change for review. (https://gem5-review.googlesource.com/c/public/gem5/+/53103 )


Change subject: mem: Support stats for Cache hitLatency
......................................................................

mem: Support stats for Cache hitLatency

Change-Id: I18de75d3b6c4ce1784b90653e2d132ffecf1b1af
---
M src/mem/xbar.cc
M src/mem/cache/base.cc
M src/mem/cache/base.hh
M src/mem/coherent_xbar.cc
M src/mem/packet.hh
M src/mem/port.hh
6 files changed, 115 insertions(+), 7 deletions(-)



diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
index f97c30a..fb450ef 100644
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -112,6 +112,8 @@
       system(p.system),
       stats(*this)
 {
+    cache_type_ = getCacheType(p.name);
+
     // the MSHR queue has no reserve entries as we check the MSHR
     // queue on every single allocation, whereas the write queue has
     // as many reserve entries as we have MSHRs, since every MSHR may
@@ -225,8 +227,24 @@
 {
     if (pkt->needsResponse()) {
         // These delays should have been consumed by now
-        assert(pkt->headerDelay == 0);
-        assert(pkt->payloadDelay == 0);
+        // assert(pkt->headerDelay == 0);
+        // assert(pkt->payloadDelay == 0);
+
+        pkt->headerDelay = request_time - curTick();
+        if (!cache_type_.compare("L2C")) {
+            pkt->probeUpXBar = true;
+            uint32_t orig_pkt_header_delay = pkt->headerDelay;
+            cpuSidePort.sendFunctionalSnoop(pkt);
+            DPRINTF(Cache, "BaseCache::%s pkt is %s "
+                            "orig pkt->headerDelay = %dC "
+                            "updated pkt->headerDelay = %dC\n",
+                            __func__,
+                            pkt->print(),
+                            ticksToCycles(orig_pkt_header_delay),
+                            ticksToCycles(pkt->headerDelay));
+            stats.cmdStats(pkt).hitLatency[pkt->req->requestorId()] +=
+                    ticksToCycles(pkt->headerDelay);
+        }

         pkt->makeTimingResponse();

@@ -388,6 +406,11 @@
             blk->clearPrefetched();
         }

+        incHitCount(pkt);
+        if (cache_type_.compare("L2C")) {
+            stats.cmdStats(pkt).hitLatency[pkt->req->requestorId()] += lat;
+        }
+
         handleTimingReqHit(pkt, blk, request_time);
     } else {
         handleTimingReqMiss(pkt, blk, forward_time, request_time);
@@ -1292,7 +1315,6 @@

         updateBlockData(blk, pkt, has_old_data);
         DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print());
-        incHitCount(pkt);

// When the packet metadata arrives, the tag lookup will be donewhile// the payload is arriving. Then the block will be ready to accessas

@@ -1368,8 +1390,6 @@
         updateBlockData(blk, pkt, has_old_data);
         DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print());

-        incHitCount(pkt);
-

// When the packet metadata arrives, the tag lookup will be donewhile// the payload is arriving. Then the block will be ready to accessas

         // soon as the fill is done
@@ -1381,8 +1401,6 @@
     } else if (blk && (pkt->needsWritable() ?
             blk->isSet(CacheBlk::WritableBit) :
             blk->isSet(CacheBlk::ReadableBit))) {
-        // OK to satisfy access
-        incHitCount(pkt);

// Calculate access latency based on the need to access the dataarray

         if (pkt->isRead()) {
@@ -1954,6 +1972,8 @@
                ("number of " + name + " hits").c_str()),
       ADD_STAT(misses, statistics::units::Count::get(),
                ("number of " + name + " misses").c_str()),
+      ADD_STAT(hitLatency, statistics::units::Tick::get(),
+               ("number of " + name + " hit ticks").c_str()),
       ADD_STAT(missLatency, statistics::units::Tick::get(),
                ("number of " + name + " miss ticks").c_str()),
       ADD_STAT(accesses, statistics::units::Count::get(),
@@ -2010,6 +2030,15 @@
         misses.subname(i, system->getRequestorName(i));
     }

+    // Hit latency statistics
+    hitLatency
+        .init(max_requestors)
+        .flags(total | nozero | nonan)
+        ;
+    for (int i = 0; i < max_requestors; i++) {
+        hitLatency.subname(i, system->getRequestorName(i));
+    }
+
     // Miss latency statistics
     missLatency
         .init(max_requestors)
@@ -2116,6 +2145,10 @@
              "number of demand (read+write) hits"),
     ADD_STAT(overallHits, statistics::units::Count::get(),
              "number of overall hits"),
+    ADD_STAT(demandHitLatency, statistics::units::Tick::get(),
+             "number of demand (read+write) hit ticks"),
+    ADD_STAT(overallHitLatency, statistics::units::Tick::get(),
+            "number of overall hit ticks"),
     ADD_STAT(demandMisses, statistics::units::Count::get(),
              "number of demand (read+write) misses"),
     ADD_STAT(overallMisses, statistics::units::Count::get(),
@@ -2250,6 +2283,17 @@
         overallMissLatency.subname(i, system->getRequestorName(i));
     }

+    demandHitLatency.flags(total | nozero | nonan);
+    demandHitLatency = SUM_DEMAND(hitLatency);
+    for (int i = 0; i < max_requestors; i++) {
+        demandHitLatency.subname(i, system->getRequestorName(i));
+    }
+    overallHitLatency.flags(total | nozero | nonan);
+    overallHitLatency = demandHitLatency + SUM_NON_DEMAND(hitLatency);
+    for (int i = 0; i < max_requestors; i++) {
+        overallHitLatency.subname(i, system->getRequestorName(i));
+    }
+
     demandAccesses.flags(total | nozero | nonan);
     demandAccesses = demandHits + demandMisses;
     for (int i = 0; i < max_requestors; i++) {
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh
index 988a678..a9e64d9 100644
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -96,6 +96,11 @@
 {
   protected:
     /**
+     * Identify current cache level
+     */
+    std::string cache_type_;
+
+    /**
      * Indexes to enumerate the MSHR queues.
      */
     enum MSHRQueueIndex
@@ -1006,6 +1011,11 @@
             @sa Packet::Command */
         statistics::Vector misses;
         /**

+ * Total number of cycles per thread/command spent waiting for ahit.

+         * Used to calculate the average hit latency.
+         */
+        statistics::Vector hitLatency;
+        /**

* Total number of cycles per thread/command spent waiting for amiss.

          * Used to calculate the average miss latency.
          */
@@ -1050,6 +1060,10 @@
         statistics::Formula demandHits;
         /** Number of hit for all accesses. */
         statistics::Formula overallHits;
+        /** Total number of cycles spent waiting for demand hits. */
+        statistics::Formula demandHitLatency;
+        /** Total number of cycles spent waiting for all hits. */
+        statistics::Formula overallHitLatency;

         /** Number of misses for demand accesses. */
         statistics::Formula demandMisses;
@@ -1159,6 +1173,13 @@
         return blkSize;
     }

+    std::string getCacheType(const std::string& port_name) {
+        if (!port_name.compare("system.cpu.icache")) { return "I$"; }
+        else if (!port_name.compare("system.cpu.dcache")) { return "D$"; }
+        else if (!port_name.compare("system.l2")) { return "L2C"; }
+        else { return "Invalid $"; }
+    }
+
     const AddrRangeList &getAddrRanges() const { return addrRanges; }

MSHR *allocateMissBuffer(PacketPtr pkt, Tick time, bool sched_send =true)

diff --git a/src/mem/coherent_xbar.cc b/src/mem/coherent_xbar.cc
index 7d1cd5d..d7b70e5 100644
--- a/src/mem/coherent_xbar.cc
+++ b/src/mem/coherent_xbar.cc
@@ -1000,6 +1000,22 @@
 void
 CoherentXBar::recvFunctional(PacketPtr pkt, PortID cpu_side_port_id)
 {
+    if (pkt->probeUpXBar) {
+        Tick xbar_delay = responseLatency * clockPeriod();
+        uint32_t old_pkt_header_delay = pkt->headerDelay;
+        calcPacketTiming(pkt, xbar_delay);
+        pkt->probeUpXBar = false;
+        DPRINTF(CoherentXBar, "CoherentXBar::%s "
+                            "pkt->headerDelay: %dC "
+                            "= old headerDelay: %d + "
+                            "xbar response_latency: %dC\n",
+                            __func__,
+                            ticksToCycles(pkt->headerDelay),
+                            ticksToCycles(old_pkt_header_delay),
+                            ticksToCycles(xbar_delay));
+        return;
+    }
+
     if (!pkt->isPrint()) {
         // don't do DPRINTFs on PrintReq as it clutters up the output
         DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 88995f1..140b9a5 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -411,6 +411,12 @@
   public:

     /**
+     * Flag indicating cache is snooping a possible upper-level XBar
+     * in functional mode.
+     */
+    bool probeUpXBar;
+
+    /**
      * The extra delay from seeing the packet until the header is
      * transmitted. This delay is used to communicate the crossbar
      * forwarding latency to the neighbouring object (e.g. a cache)
@@ -857,6 +863,8 @@
            headerDelay(0), snoopDelay(0),
            payloadDelay(0), senderState(NULL)
     {
+        probeUpXBar = false;
+
         flags.clear();
         if (req->hasPaddr()) {
             addr = req->getPaddr();
@@ -898,6 +906,8 @@
            headerDelay(0),
            snoopDelay(0), payloadDelay(0), senderState(NULL)
     {
+        probeUpXBar = false;
+
         flags.clear();
         if (req->hasPaddr()) {
             addr = req->getPaddr() & ~(_blkSize - 1);
@@ -928,6 +938,8 @@
            payloadDelay(pkt->payloadDelay),
            senderState(pkt->senderState)
     {
+        probeUpXBar = false;
+
         if (!clear_flags)
             flags.set(pkt->flags & COPY_FLAGS);

diff --git a/src/mem/port.hh b/src/mem/port.hh
index 33ff117..090ff7f 100644
--- a/src/mem/port.hh
+++ b/src/mem/port.hh
@@ -232,6 +232,8 @@
     void
     recvFunctionalSnoop(PacketPtr pkt) override
     {
+        if (pkt->probeUpXBar) return;
+
         panic("%s was not expecting a functional snoop request\n", name());
     }

diff --git a/src/mem/xbar.cc b/src/mem/xbar.cc
index e1b2a8b..81ab809 100644
--- a/src/mem/xbar.cc
+++ b/src/mem/xbar.cc
@@ -114,6 +114,10 @@
     // value
     pkt->headerDelay += offset + header_delay;

+    if (pkt->probeUpXBar) {
+        return;
+    }
+
     // note that we add the header delay to the existing value, and
     // align it to the crossbar clock


--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/53103

To unsubscribe, or for help writing mail filters, visithttps://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I18de75d3b6c4ce1784b90653e2d132ffecf1b1af
Gerrit-Change-Number: 53103
Gerrit-PatchSet: 1
Gerrit-Owner: Huang Jiasen <[email protected]>
Gerrit-MessageType: newchange

_______________________________________________
gem5-dev mailing list -- [email protected]
To unsubscribe send an email to [email protected]
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

[gem5-dev] Change in gem5/gem5[develop]: mem: Support stats for Cache hitLatency

Reply via email to