changeset 3d6da8559605 in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=3d6da8559605
description:
        Mem: Use cycles to express cache-related latencies

        This patch changes the cache-related latencies from an absolute time
        expressed in Ticks, to a number of cycles that can be scaled with the
        clock period of the caches. Ultimately this patch serves to enable
        future work that involves dynamic frequency scaling. As an immediate
        benefit it also makes it more convenient to specify cache performance
        without implicitly assuming a specific CPU core operating frequency.

        The stat blocked_cycles that actually counter in ticks is now updated
        to count in cycles.

        As the timing is now rounded to the clock edges of the cache, there
        are some regressions that change. Plenty of them have very minor
        changes, whereas some regressions with a short run-time are perturbed
        quite significantly. A follow-on patch updates all the statistics for
        the regressions.

diffstat:

 configs/common/Caches.py                     |  28 +++++++++++----
 configs/common/O3_ARM_v7a.py                 |  21 +++++-------
 configs/example/fs.py                        |   3 +-
 src/mem/cache/BaseCache.py                   |  16 ++++++++-
 src/mem/cache/base.cc                        |   4 ++
 src/mem/cache/base.hh                        |  10 ++--
 src/mem/cache/cache.hh                       |   8 ++--
 src/mem/cache/cache_impl.hh                  |  26 +++++++-------
 src/mem/cache/prefetch/Prefetcher.py         |  47 +++++++++++++++++++++++++--
 src/mem/cache/prefetch/base.cc               |   8 ++--
 src/mem/cache/prefetch/base.hh               |   8 ++--
 src/mem/cache/prefetch/ghb.cc                |   2 +-
 src/mem/cache/prefetch/ghb.hh                |   2 +-
 src/mem/cache/prefetch/stride.cc             |   2 +-
 src/mem/cache/prefetch/stride.hh             |   2 +-
 src/mem/cache/prefetch/tagged.cc             |   2 +-
 src/mem/cache/prefetch/tagged.hh             |   2 +-
 src/mem/cache/tags/fa_lru.cc                 |   4 +-
 src/mem/cache/tags/fa_lru.hh                 |   8 ++--
 src/mem/cache/tags/iic.cc                    |  17 ++++-----
 src/mem/cache/tags/iic.hh                    |   8 ++--
 src/mem/cache/tags/lru.cc                    |   6 +-
 src/mem/cache/tags/lru.hh                    |   6 +-
 tests/configs/inorder-timing.py              |   8 ++--
 tests/configs/memtest.py                     |  10 ++--
 tests/configs/o3-timing-checker.py           |   7 ++-
 tests/configs/o3-timing-mp.py                |  12 +++---
 tests/configs/o3-timing.py                   |   7 ++-
 tests/configs/pc-o3-timing.py                |  18 +++++-----
 tests/configs/pc-simple-atomic.py            |  18 +++++-----
 tests/configs/pc-simple-timing.py            |  18 +++++-----
 tests/configs/realview-o3-checker.py         |  14 ++++----
 tests/configs/realview-o3-dual.py            |  18 +++++-----
 tests/configs/realview-o3.py                 |  14 ++++----
 tests/configs/realview-simple-atomic-dual.py |  18 +++++-----
 tests/configs/realview-simple-atomic.py      |  14 ++++----
 tests/configs/realview-simple-timing-dual.py |  18 +++++-----
 tests/configs/realview-simple-timing.py      |  14 ++++----
 tests/configs/simple-atomic-mp.py            |  12 +++---
 tests/configs/simple-timing-mp.py            |  12 +++---
 tests/configs/simple-timing.py               |   7 ++-
 tests/configs/tsunami-inorder.py             |  14 ++++----
 tests/configs/tsunami-o3-dual.py             |  18 +++++-----
 tests/configs/tsunami-o3.py                  |  14 ++++----
 tests/configs/tsunami-simple-atomic-dual.py  |  18 +++++-----
 tests/configs/tsunami-simple-atomic.py       |  14 ++++----
 tests/configs/tsunami-simple-timing-dual.py  |  18 +++++-----
 tests/configs/tsunami-simple-timing.py       |  14 ++++----
 48 files changed, 330 insertions(+), 259 deletions(-)

diffs (truncated from 1885 to 300 lines):

diff -r 4482cfb36c51 -r 3d6da8559605 configs/common/Caches.py
--- a/configs/common/Caches.py  Mon Oct 15 08:10:52 2012 -0400
+++ b/configs/common/Caches.py  Mon Oct 15 08:10:54 2012 -0400
@@ -1,3 +1,15 @@
+# Copyright (c) 2012 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
 # Copyright (c) 2006-2007 The Regents of The University of Michigan
 # All rights reserved.
 #
@@ -31,8 +43,8 @@
 class L1Cache(BaseCache):
     assoc = 2
     block_size = 64
-    hit_latency = '1ns'
-    response_latency = '1ns'
+    hit_latency = 2
+    response_latency = 2
     mshrs = 10
     tgts_per_mshr = 20
     is_top_level = True
@@ -40,16 +52,16 @@
 class L2Cache(BaseCache):
     assoc = 8
     block_size = 64
-    hit_latency = '10ns'
-    response_latency = '10ns'
+    hit_latency = 20
+    response_latency = 20
     mshrs = 20
     tgts_per_mshr = 12
 
 class PageTableWalkerCache(BaseCache):
     assoc = 2
     block_size = 64
-    hit_latency = '1ns'
-    response_latency = '1ns'
+    hit_latency = 2
+    response_latency = 2
     mshrs = 10
     size = '1kB'
     tgts_per_mshr = 12
@@ -58,8 +70,8 @@
 class IOCache(BaseCache):
     assoc = 8
     block_size = 64
-    hit_latency = '10ns'
-    response_latency = '10ns'
+    hit_latency = 50
+    response_latency = 50
     mshrs = 20
     size = '1kB'
     tgts_per_mshr = 12
diff -r 4482cfb36c51 -r 3d6da8559605 configs/common/O3_ARM_v7a.py
--- a/configs/common/O3_ARM_v7a.py      Mon Oct 15 08:10:52 2012 -0400
+++ b/configs/common/O3_ARM_v7a.py      Mon Oct 15 08:10:54 2012 -0400
@@ -145,10 +145,9 @@
     defer_registration= False
 
 # Instruction Cache
-# All latencys assume a 1GHz clock rate, with a faster clock they would be 
faster
 class O3_ARM_v7a_ICache(BaseCache):
-    hit_latency = '1ns'
-    response_latency = '1ns'
+    hit_latency = 1
+    response_latency = 1
     block_size = 64
     mshrs = 2
     tgts_per_mshr = 8
@@ -157,10 +156,9 @@
     is_top_level = 'true'
 
 # Data Cache
-# All latencys assume a 1GHz clock rate, with a faster clock they would be 
faster
 class O3_ARM_v7a_DCache(BaseCache):
-    hit_latency = '2ns'
-    response_latency = '2ns'
+    hit_latency = 2
+    response_latency = 2
     block_size = 64
     mshrs = 6
     tgts_per_mshr = 8
@@ -172,8 +170,8 @@
 # TLB Cache 
 # Use a cache as a L2 TLB
 class O3_ARM_v7aWalkCache(BaseCache):
-    hit_latency = '4ns'
-    response_latency = '4ns'
+    hit_latency = 4
+    response_latency = 4
     block_size = 64
     mshrs = 6
     tgts_per_mshr = 8
@@ -184,10 +182,9 @@
 
 
 # L2 Cache
-# All latencys assume a 1GHz clock rate, with a faster clock they would be 
faster
 class O3_ARM_v7aL2(BaseCache):
-    hit_latency = '12ns'
-    response_latency = '12ns'
+    hit_latency = 12
+    response_latency = 12
     block_size = 64
     mshrs = 16
     tgts_per_mshr = 8
@@ -196,5 +193,5 @@
     write_buffers = 8
     prefetch_on_access = 'true'
     # Simple stride prefetcher
-    prefetcher = StridePrefetcher(degree=8, latency='1.0ns')
+    prefetcher = StridePrefetcher(degree=8, latency = 1)
 
diff -r 4482cfb36c51 -r 3d6da8559605 configs/example/fs.py
--- a/configs/example/fs.py     Mon Oct 15 08:10:52 2012 -0400
+++ b/configs/example/fs.py     Mon Oct 15 08:10:54 2012 -0400
@@ -122,7 +122,8 @@
 else:
     mem_size = SysConfig().mem()
 if options.caches or options.l2cache:
-    test_sys.iocache = IOCache(addr_ranges=[test_sys.physmem.range])
+    test_sys.iocache = IOCache(clock = '1GHz',
+                               addr_ranges=[test_sys.physmem.range])
     test_sys.iocache.cpu_side = test_sys.iobus.master
     test_sys.iocache.mem_side = test_sys.membus.slave
 else:
diff -r 4482cfb36c51 -r 3d6da8559605 src/mem/cache/BaseCache.py
--- a/src/mem/cache/BaseCache.py        Mon Oct 15 08:10:52 2012 -0400
+++ b/src/mem/cache/BaseCache.py        Mon Oct 15 08:10:54 2012 -0400
@@ -1,3 +1,15 @@
+# Copyright (c) 2012 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
 # Copyright (c) 2005-2007 The Regents of The University of Michigan
 # All rights reserved.
 #
@@ -36,8 +48,8 @@
     type = 'BaseCache'
     assoc = Param.Int("associativity")
     block_size = Param.Int("block size in bytes")
-    hit_latency = Param.Latency("The hit latency for this cache")
-    response_latency = Param.Latency(
+    hit_latency = Param.Cycles("The hit latency for this cache")
+    response_latency = Param.Cycles(
             "Additional cache latency for the return path to core on a miss");
     hash_delay = Param.Cycles(1, "time in cycles of hash access")
     max_miss_count = Param.Counter(0,
diff -r 4482cfb36c51 -r 3d6da8559605 src/mem/cache/base.cc
--- a/src/mem/cache/base.cc     Mon Oct 15 08:10:52 2012 -0400
+++ b/src/mem/cache/base.cc     Mon Oct 15 08:10:54 2012 -0400
@@ -81,6 +81,10 @@
       addrRanges(p->addr_ranges.begin(), p->addr_ranges.end()),
       system(p->system)
 {
+    // ensure the clock is not running at an unreasonable clock speed
+    if (clock == 1)
+        panic("Cache %s has a cycle time of 1 tick. Specify a clock.\n",
+              name());
 }
 
 void
diff -r 4482cfb36c51 -r 3d6da8559605 src/mem/cache/base.hh
--- a/src/mem/cache/base.hh     Mon Oct 15 08:10:52 2012 -0400
+++ b/src/mem/cache/base.hh     Mon Oct 15 08:10:54 2012 -0400
@@ -229,7 +229,7 @@
     /**
      * The latency of a hit in this device.
      */
-    const Tick hitLatency;
+    const Cycles hitLatency;
 
     /**
      * The latency of sending reponse to its upper level cache/core on a
@@ -237,7 +237,7 @@
      * miss is much quicker that the hit latency. The responseLatency parameter
      * tries to capture this latency.
      */
-    const Tick responseLatency;
+    const Cycles responseLatency;
 
     /** The number of targets for each MSHR. */
     const int numTarget;
@@ -260,7 +260,7 @@
     uint64_t order;
 
     /** Stores time the cache blocked for statistics. */
-    Tick blockedCycle;
+    Cycles blockedCycle;
 
     /** Pointer to the MSHR that has no targets. */
     MSHR *noTargetMSHR;
@@ -492,7 +492,7 @@
         uint8_t flag = 1 << cause;
         if (blocked == 0) {
             blocked_causes[cause]++;
-            blockedCycle = curTick();
+            blockedCycle = curCycle();
             cpuSidePort->setBlocked();
         }
         blocked |= flag;
@@ -512,7 +512,7 @@
         blocked &= ~flag;
         DPRINTF(Cache,"Unblocking for cause %d, mask=%d\n", cause, blocked);
         if (blocked == 0) {
-            blocked_cycles[cause] += curTick() - blockedCycle;
+            blocked_cycles[cause] += curCycle() - blockedCycle;
             cpuSidePort->clearBlocked();
         }
     }
diff -r 4482cfb36c51 -r 3d6da8559605 src/mem/cache/cache.hh
--- a/src/mem/cache/cache.hh    Mon Oct 15 08:10:52 2012 -0400
+++ b/src/mem/cache/cache.hh    Mon Oct 15 08:10:54 2012 -0400
@@ -206,7 +206,7 @@
      * @return Boolean indicating whether the request was satisfied.
      */
     bool access(PacketPtr pkt, BlkType *&blk,
-                int &lat, PacketList &writebacks);
+                Cycles &lat, PacketList &writebacks);
 
     /**
      *Handle doing the Compare and Swap function for SPARC.
@@ -272,7 +272,7 @@
     /**
      * Performs the access specified by the request.
      * @param pkt The request to perform.
-     * @return The result of the access.
+     * @return The number of ticks required for the access.
      */
     Tick atomicAccess(PacketPtr pkt);
 
@@ -299,9 +299,9 @@
      * Snoop for the provided request in the cache and return the estimated
      * time of completion.
      * @param pkt The memory request to snoop
-     * @return The estimated completion time.
+     * @return The number of cycles required for the snoop.
      */
-    Tick snoopAtomic(PacketPtr pkt);
+    Cycles snoopAtomic(PacketPtr pkt);
 
     /**
      * Squash all requests associated with specified thread.
diff -r 4482cfb36c51 -r 3d6da8559605 src/mem/cache/cache_impl.hh
--- a/src/mem/cache/cache_impl.hh       Mon Oct 15 08:10:52 2012 -0400
+++ b/src/mem/cache/cache_impl.hh       Mon Oct 15 08:10:54 2012 -0400
@@ -275,7 +275,7 @@
 template<class TagStore>
 bool
 Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
-                        int &lat, PacketList &writebacks)
+                        Cycles &lat, PacketList &writebacks)
 {
     if (pkt->req->isUncacheable()) {
         if (pkt->req->isClearLL()) {
@@ -392,7 +392,7 @@
     pendingDelete.clear();
 
     // we charge hitLatency for doing just about anything here
-    Tick time =  curTick() + hitLatency;
+    Tick time = clockEdge(hitLatency);
 
     if (pkt->isResponse()) {
         // must be cache-to-cache response from upper to lower level
@@ -463,7 +463,7 @@
         return true;
     }
 
-    int lat = hitLatency;
+    Cycles lat = hitLatency;
     BlkType *blk = NULL;
     PacketList writebacks;
 
@@ -505,7 +505,7 @@
 
         if (needsResponse) {
             pkt->makeTimingResponse();
-            cpuSidePort->schedTimingResp(pkt, curTick()+lat);
+            cpuSidePort->schedTimingResp(pkt, clockEdge(lat));
         } else {
             /// @todo nominally we should just delete the packet here,
             /// however, until 4-phase stuff we can't because sending
@@ -637,7 +637,7 @@
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev

Reply via email to