Huang Jiasen has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/53103 )
Change subject: mem: Support stats for Cache hitLatency
......................................................................
mem: Support stats for Cache hitLatency
Change-Id: I18de75d3b6c4ce1784b90653e2d132ffecf1b1af
---
M src/mem/xbar.cc
M src/mem/cache/base.cc
M src/mem/cache/base.hh
M src/mem/coherent_xbar.cc
M src/mem/packet.hh
M src/mem/port.hh
6 files changed, 115 insertions(+), 7 deletions(-)
diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
index f97c30a..fb450ef 100644
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -112,6 +112,8 @@
system(p.system),
stats(*this)
{
+ cache_type_ = getCacheType(p.name);
+
// the MSHR queue has no reserve entries as we check the MSHR
// queue on every single allocation, whereas the write queue has
// as many reserve entries as we have MSHRs, since every MSHR may
@@ -225,8 +227,24 @@
{
if (pkt->needsResponse()) {
// These delays should have been consumed by now
- assert(pkt->headerDelay == 0);
- assert(pkt->payloadDelay == 0);
+ // assert(pkt->headerDelay == 0);
+ // assert(pkt->payloadDelay == 0);
+
+ pkt->headerDelay = request_time - curTick();
+ if (!cache_type_.compare("L2C")) {
+ pkt->probeUpXBar = true;
+ uint32_t orig_pkt_header_delay = pkt->headerDelay;
+ cpuSidePort.sendFunctionalSnoop(pkt);
+ DPRINTF(Cache, "BaseCache::%s pkt is %s "
+ "orig pkt->headerDelay = %dC "
+ "updated pkt->headerDelay = %dC\n",
+ __func__,
+ pkt->print(),
+ ticksToCycles(orig_pkt_header_delay),
+ ticksToCycles(pkt->headerDelay));
+ stats.cmdStats(pkt).hitLatency[pkt->req->requestorId()] +=
+ ticksToCycles(pkt->headerDelay);
+ }
pkt->makeTimingResponse();
@@ -388,6 +406,11 @@
blk->clearPrefetched();
}
+ incHitCount(pkt);
+ if (cache_type_.compare("L2C")) {
+ stats.cmdStats(pkt).hitLatency[pkt->req->requestorId()] += lat;
+ }
+
handleTimingReqHit(pkt, blk, request_time);
} else {
handleTimingReqMiss(pkt, blk, forward_time, request_time);
@@ -1292,7 +1315,6 @@
updateBlockData(blk, pkt, has_old_data);
DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print());
- incHitCount(pkt);
// When the packet metadata arrives, the tag lookup will be done
while
// the payload is arriving. Then the block will be ready to access
as
@@ -1368,8 +1390,6 @@
updateBlockData(blk, pkt, has_old_data);
DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print());
- incHitCount(pkt);
-
// When the packet metadata arrives, the tag lookup will be done
while
// the payload is arriving. Then the block will be ready to access
as
// soon as the fill is done
@@ -1381,8 +1401,6 @@
} else if (blk && (pkt->needsWritable() ?
blk->isSet(CacheBlk::WritableBit) :
blk->isSet(CacheBlk::ReadableBit))) {
- // OK to satisfy access
- incHitCount(pkt);
// Calculate access latency based on the need to access the data
array
if (pkt->isRead()) {
@@ -1954,6 +1972,8 @@
("number of " + name + " hits").c_str()),
ADD_STAT(misses, statistics::units::Count::get(),
("number of " + name + " misses").c_str()),
+ ADD_STAT(hitLatency, statistics::units::Tick::get(),
+ ("number of " + name + " hit ticks").c_str()),
ADD_STAT(missLatency, statistics::units::Tick::get(),
("number of " + name + " miss ticks").c_str()),
ADD_STAT(accesses, statistics::units::Count::get(),
@@ -2010,6 +2030,15 @@
misses.subname(i, system->getRequestorName(i));
}
+ // Hit latency statistics
+ hitLatency
+ .init(max_requestors)
+ .flags(total | nozero | nonan)
+ ;
+ for (int i = 0; i < max_requestors; i++) {
+ hitLatency.subname(i, system->getRequestorName(i));
+ }
+
// Miss latency statistics
missLatency
.init(max_requestors)
@@ -2116,6 +2145,10 @@
"number of demand (read+write) hits"),
ADD_STAT(overallHits, statistics::units::Count::get(),
"number of overall hits"),
+ ADD_STAT(demandHitLatency, statistics::units::Tick::get(),
+ "number of demand (read+write) hit ticks"),
+ ADD_STAT(overallHitLatency, statistics::units::Tick::get(),
+ "number of overall hit ticks"),
ADD_STAT(demandMisses, statistics::units::Count::get(),
"number of demand (read+write) misses"),
ADD_STAT(overallMisses, statistics::units::Count::get(),
@@ -2250,6 +2283,17 @@
overallMissLatency.subname(i, system->getRequestorName(i));
}
+ demandHitLatency.flags(total | nozero | nonan);
+ demandHitLatency = SUM_DEMAND(hitLatency);
+ for (int i = 0; i < max_requestors; i++) {
+ demandHitLatency.subname(i, system->getRequestorName(i));
+ }
+ overallHitLatency.flags(total | nozero | nonan);
+ overallHitLatency = demandHitLatency + SUM_NON_DEMAND(hitLatency);
+ for (int i = 0; i < max_requestors; i++) {
+ overallHitLatency.subname(i, system->getRequestorName(i));
+ }
+
demandAccesses.flags(total | nozero | nonan);
demandAccesses = demandHits + demandMisses;
for (int i = 0; i < max_requestors; i++) {
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh
index 988a678..a9e64d9 100644
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -96,6 +96,11 @@
{
protected:
/**
+ * Identify current cache level
+ */
+ std::string cache_type_;
+
+ /**
* Indexes to enumerate the MSHR queues.
*/
enum MSHRQueueIndex
@@ -1006,6 +1011,11 @@
@sa Packet::Command */
statistics::Vector misses;
/**
+ * Total number of cycles per thread/command spent waiting for a
hit.
+ * Used to calculate the average hit latency.
+ */
+ statistics::Vector hitLatency;
+ /**
* Total number of cycles per thread/command spent waiting for a
miss.
* Used to calculate the average miss latency.
*/
@@ -1050,6 +1060,10 @@
statistics::Formula demandHits;
/** Number of hit for all accesses. */
statistics::Formula overallHits;
+ /** Total number of cycles spent waiting for demand hits. */
+ statistics::Formula demandHitLatency;
+ /** Total number of cycles spent waiting for all hits. */
+ statistics::Formula overallHitLatency;
/** Number of misses for demand accesses. */
statistics::Formula demandMisses;
@@ -1159,6 +1173,13 @@
return blkSize;
}
+ std::string getCacheType(const std::string& port_name) {
+ if (!port_name.compare("system.cpu.icache")) { return "I$"; }
+ else if (!port_name.compare("system.cpu.dcache")) { return "D$"; }
+ else if (!port_name.compare("system.l2")) { return "L2C"; }
+ else { return "Invalid $"; }
+ }
+
const AddrRangeList &getAddrRanges() const { return addrRanges; }
MSHR *allocateMissBuffer(PacketPtr pkt, Tick time, bool sched_send =
true)
diff --git a/src/mem/coherent_xbar.cc b/src/mem/coherent_xbar.cc
index 7d1cd5d..d7b70e5 100644
--- a/src/mem/coherent_xbar.cc
+++ b/src/mem/coherent_xbar.cc
@@ -1000,6 +1000,22 @@
void
CoherentXBar::recvFunctional(PacketPtr pkt, PortID cpu_side_port_id)
{
+ if (pkt->probeUpXBar) {
+ Tick xbar_delay = responseLatency * clockPeriod();
+ uint32_t old_pkt_header_delay = pkt->headerDelay;
+ calcPacketTiming(pkt, xbar_delay);
+ pkt->probeUpXBar = false;
+ DPRINTF(CoherentXBar, "CoherentXBar::%s "
+ "pkt->headerDelay: %dC "
+ "= old headerDelay: %d + "
+ "xbar response_latency: %dC\n",
+ __func__,
+ ticksToCycles(pkt->headerDelay),
+ ticksToCycles(old_pkt_header_delay),
+ ticksToCycles(xbar_delay));
+ return;
+ }
+
if (!pkt->isPrint()) {
// don't do DPRINTFs on PrintReq as it clutters up the output
DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 88995f1..140b9a5 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -411,6 +411,12 @@
public:
/**
+ * Flag indicating cache is snooping a possible upper-level XBar
+ * in functional mode.
+ */
+ bool probeUpXBar;
+
+ /**
* The extra delay from seeing the packet until the header is
* transmitted. This delay is used to communicate the crossbar
* forwarding latency to the neighbouring object (e.g. a cache)
@@ -857,6 +863,8 @@
headerDelay(0), snoopDelay(0),
payloadDelay(0), senderState(NULL)
{
+ probeUpXBar = false;
+
flags.clear();
if (req->hasPaddr()) {
addr = req->getPaddr();
@@ -898,6 +906,8 @@
headerDelay(0),
snoopDelay(0), payloadDelay(0), senderState(NULL)
{
+ probeUpXBar = false;
+
flags.clear();
if (req->hasPaddr()) {
addr = req->getPaddr() & ~(_blkSize - 1);
@@ -928,6 +938,8 @@
payloadDelay(pkt->payloadDelay),
senderState(pkt->senderState)
{
+ probeUpXBar = false;
+
if (!clear_flags)
flags.set(pkt->flags & COPY_FLAGS);
diff --git a/src/mem/port.hh b/src/mem/port.hh
index 33ff117..090ff7f 100644
--- a/src/mem/port.hh
+++ b/src/mem/port.hh
@@ -232,6 +232,8 @@
void
recvFunctionalSnoop(PacketPtr pkt) override
{
+ if (pkt->probeUpXBar) return;
+
panic("%s was not expecting a functional snoop request\n", name());
}
diff --git a/src/mem/xbar.cc b/src/mem/xbar.cc
index e1b2a8b..81ab809 100644
--- a/src/mem/xbar.cc
+++ b/src/mem/xbar.cc
@@ -114,6 +114,10 @@
// value
pkt->headerDelay += offset + header_delay;
+ if (pkt->probeUpXBar) {
+ return;
+ }
+
// note that we add the header delay to the existing value, and
// align it to the crossbar clock
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/53103
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I18de75d3b6c4ce1784b90653e2d132ffecf1b1af
Gerrit-Change-Number: 53103
Gerrit-PatchSet: 1
Gerrit-Owner: Huang Jiasen <[email protected]>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- [email protected]
To unsubscribe send an email to [email protected]
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s