changeset a80884911971 in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=a80884911971
description:
        cpu: Fix LLSC atomic CPU wakeup

        Writes to locked memory addresses (LLSC) did not wake up the locking
        CPU. This can lead to deadlocks on multi-core runs. In AtomicSimpleCPU,
        recvAtomicSnoop was checking if the incoming packet was an invalidation
        (isInvalidate) and only then handled a locked snoop. But, writes are
        seen instead of invalidates when running without caches (fast-forward
        configurations). As as simple fix, now handleLockedSnoop is also called
        even if the incoming snoop packet are from writes.

diffstat:

 src/cpu/minor/lsq.cc                                                           
    |     4 +-
 src/cpu/o3/lsq_unit_impl.hh                                                    
    |     6 +-
 src/cpu/simple/atomic.cc                                                       
    |     5 +-
 src/cpu/simple/timing.cc                                                       
    |    10 +-
 
tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.ini
 |     8 +-
 tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/simout 
    |     8 +-
 
tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stats.txt
  |  2133 ++--
 
tests/quick/fs/10.linux-boot/ref/arm/linux/realview-simple-timing-dual/stats.txt
   |  4390 ++++-----
 8 files changed, 3272 insertions(+), 3292 deletions(-)

diffs (truncated from 7668 to 300 lines):

diff -r 46c7b3e35720 -r a80884911971 src/cpu/minor/lsq.cc
--- a/src/cpu/minor/lsq.cc      Mon Feb 29 19:13:13 2016 -0600
+++ b/src/cpu/minor/lsq.cc      Sun Jul 19 15:03:30 2015 -0500
@@ -1617,7 +1617,9 @@
      * this action on snoops. */
 
     /* THREAD */
-    TheISA::handleLockedSnoop(cpu.getContext(0), pkt, cacheBlockMask);
+    if (pkt->isInvalidate() || pkt->isWrite()) {
+        TheISA::handleLockedSnoop(cpu.getContext(0), pkt, cacheBlockMask);
+    }
 }
 
 }
diff -r 46c7b3e35720 -r a80884911971 src/cpu/o3/lsq_unit_impl.hh
--- a/src/cpu/o3/lsq_unit_impl.hh       Mon Feb 29 19:13:13 2016 -0600
+++ b/src/cpu/o3/lsq_unit_impl.hh       Sun Jul 19 15:03:30 2015 -0500
@@ -438,10 +438,8 @@
     int load_idx = loadHead;
     DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr());
 
-    // Unlock the cpu-local monitor when the CPU sees a snoop to a locked
-    // address. The CPU can speculatively execute a LL operation after a 
pending
-    // SC operation in the pipeline and that can make the cache monitor the CPU
-    // is connected to valid while it really shouldn't be.
+    // Only Invalidate packet calls checkSnoop
+    assert(pkt->isInvalidate());
     for (int x = 0; x < cpu->numContexts(); x++) {
         ThreadContext *tc = cpu->getContext(x);
         bool no_squash = cpu->thread[x]->noSquashFromTC;
diff -r 46c7b3e35720 -r a80884911971 src/cpu/simple/atomic.cc
--- a/src/cpu/simple/atomic.cc  Mon Feb 29 19:13:13 2016 -0600
+++ b/src/cpu/simple/atomic.cc  Sun Jul 19 15:03:30 2015 -0500
@@ -292,7 +292,10 @@
     }
 
     // if snoop invalidates, release any associated locks
-    if (pkt->isInvalidate()) {
+    // When run without caches, Invalidation packets will not be received
+    // hence we must check if the incoming packets are writes and wakeup
+    // the processor accordingly
+    if (pkt->isInvalidate() || pkt->isWrite()) {
         DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
                 pkt->getAddr());
         for (auto &t_info : cpu->threadInfo) {
diff -r 46c7b3e35720 -r a80884911971 src/cpu/simple/timing.cc
--- a/src/cpu/simple/timing.cc  Mon Feb 29 19:13:13 2016 -0600
+++ b/src/cpu/simple/timing.cc  Sun Jul 19 15:03:30 2015 -0500
@@ -876,8 +876,14 @@
         }
     }
 
-    for (auto &t_info : cpu->threadInfo) {
-        TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
+    // Making it uniform across all CPUs:
+    // The CPUs need to be woken up only on an invalidation packet (when using 
caches)
+    // or on an incoming write packet (when not using caches)
+    // It is not necessary to wake up the processor on all incoming packets
+    if (pkt->isInvalidate() || pkt->isWrite()) {
+        for (auto &t_info : cpu->threadInfo) {
+            TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
+        }
     }
 }
 
diff -r 46c7b3e35720 -r a80884911971 
tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.ini
--- 
a/tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.ini
        Mon Feb 29 19:13:13 2016 -0600
+++ 
b/tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.ini
        Sun Jul 19 15:03:30 2015 -0500
@@ -104,7 +104,6 @@
 clusivity=mostly_incl
 demand_mshr_reserve=1
 eventq_index=0
-forward_snoops=true
 hit_latency=2
 is_read_only=false
 max_miss_count=0
@@ -146,7 +145,6 @@
 clusivity=mostly_incl
 demand_mshr_reserve=1
 eventq_index=0
-forward_snoops=true
 hit_latency=2
 is_read_only=true
 max_miss_count=0
@@ -234,7 +232,6 @@
 clusivity=mostly_incl
 demand_mshr_reserve=1
 eventq_index=0
-forward_snoops=true
 hit_latency=2
 is_read_only=false
 max_miss_count=0
@@ -276,7 +273,6 @@
 clusivity=mostly_incl
 demand_mshr_reserve=1
 eventq_index=0
-forward_snoops=true
 hit_latency=2
 is_read_only=true
 max_miss_count=0
@@ -410,7 +406,6 @@
 clusivity=mostly_incl
 demand_mshr_reserve=1
 eventq_index=0
-forward_snoops=false
 hit_latency=50
 is_read_only=false
 max_miss_count=0
@@ -447,7 +442,6 @@
 clusivity=mostly_incl
 demand_mshr_reserve=1
 eventq_index=0
-forward_snoops=true
 hit_latency=20
 is_read_only=false
 max_miss_count=0
@@ -482,6 +476,7 @@
 eventq_index=0
 forward_latency=4
 frontend_latency=3
+point_of_coherency=true
 response_latency=2
 snoop_filter=Null
 snoop_response_latency=4
@@ -615,6 +610,7 @@
 eventq_index=0
 forward_latency=0
 frontend_latency=1
+point_of_coherency=false
 response_latency=1
 snoop_filter=system.toL2Bus.snoop_filter
 snoop_response_latency=1
diff -r 46c7b3e35720 -r a80884911971 
tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/simout
--- 
a/tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/simout
    Mon Feb 29 19:13:13 2016 -0600
+++ 
b/tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/simout
    Sun Jul 19 15:03:30 2015 -0500
@@ -1,9 +1,9 @@
 gem5 Simulator System.  http://gem5.org
 gem5 is copyrighted software; use the --copyright option for details.
 
-gem5 compiled Jan 21 2016 13:49:21
-gem5 started Jan 21 2016 13:50:00
-gem5 executing on zizzer, pid 33973
+gem5 compiled Feb 29 2016 18:59:12
+gem5 started Feb 29 2016 18:59:20
+gem5 executing on redacted.arm.com, pid 18325
 command line: build/ALPHA/gem5.opt -d 
build/ALPHA/tests/opt/quick/fs/10.linux-boot/alpha/linux/tsunami-simple-timing-dual
 -re /z/atgutier/gem5/gem5-commit/tests/run.py 
build/ALPHA/tests/opt/quick/fs/10.linux-boot/alpha/linux/tsunami-simple-timing-dual
 
 Global frequency set at 1000000000000 ticks per second
@@ -11,4 +11,4 @@
       0: system.tsunami.io.rtc: Real-time clock set to Thu Jan  1 00:00:00 2009
 info: Entering event queue @ 0.  Starting simulation...
 info: Launching CPU 1 @ 881785000
-Exiting @ tick 1982594146000 because m5_exit instruction encountered
+Exiting @ tick 1982592736000 because m5_exit instruction encountered
diff -r 46c7b3e35720 -r a80884911971 
tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stats.txt
--- 
a/tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stats.txt
 Mon Feb 29 19:13:13 2016 -0600
+++ 
b/tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stats.txt
 Sun Jul 19 15:03:30 2015 -0500
@@ -1,79 +1,79 @@
 
 ---------- Begin Simulation Statistics ----------
 sim_seconds                                  1.982593                       # 
Number of seconds simulated
-sim_ticks                                1982593132000                       # 
Number of ticks simulated
-final_tick                               1982593132000                       # 
Number of ticks from beginning of simulation (restored from checkpoints and 
never reset)
+sim_ticks                                1982592736000                       # 
Number of ticks simulated
+final_tick                               1982592736000                       # 
Number of ticks from beginning of simulation (restored from checkpoints and 
never reset)
 sim_freq                                 1000000000000                       # 
Frequency of simulated ticks
-host_inst_rate                                1109655                       # 
Simulator instruction rate (inst/s)
-host_op_rate                                  1109654                       # 
Simulator op (including micro ops) rate (op/s)
-host_tick_rate                            36063876778                       # 
Simulator tick rate (ticks/s)
-host_mem_usage                                 333984                       # 
Number of bytes of host memory used
-host_seconds                                    54.97                       # 
Real time elapsed on the host
-sim_insts                                    61002651                       # 
Number of instructions simulated
-sim_ops                                      61002651                       # 
Number of ops (including micro ops) simulated
+host_inst_rate                                 753764                       # 
Simulator instruction rate (inst/s)
+host_op_rate                                   753764                       # 
Simulator op (including micro ops) rate (op/s)
+host_tick_rate                            24497172234                       # 
Simulator tick rate (ticks/s)
+host_mem_usage                                 320072                       # 
Number of bytes of host memory used
+host_seconds                                    80.93                       # 
Real time elapsed on the host
+sim_insts                                    61003209                       # 
Number of instructions simulated
+sim_ops                                      61003209                       # 
Number of ops (including micro ops) simulated
 system.voltage_domain.voltage                       1                       # 
Voltage in Volts
 system.clk_domain.clock                          1000                       # 
Clock period in ticks
-system.physmem.bytes_read::cpu0.inst           800256                       # 
Number of bytes read from this memory
-system.physmem.bytes_read::cpu0.data         24686464                       # 
Number of bytes read from this memory
-system.physmem.bytes_read::cpu1.inst            59392                       # 
Number of bytes read from this memory
-system.physmem.bytes_read::cpu1.data           523264                       # 
Number of bytes read from this memory
+system.physmem.bytes_read::cpu0.inst           800192                       # 
Number of bytes read from this memory
+system.physmem.bytes_read::cpu0.data         24686016                       # 
Number of bytes read from this memory
+system.physmem.bytes_read::cpu1.inst            59328                       # 
Number of bytes read from this memory
+system.physmem.bytes_read::cpu1.data           523328                       # 
Number of bytes read from this memory
 system.physmem.bytes_read::tsunami.ide            960                       # 
Number of bytes read from this memory
-system.physmem.bytes_read::total             26070336                       # 
Number of bytes read from this memory
-system.physmem.bytes_inst_read::cpu0.inst       800256                       # 
Number of instructions bytes read from this memory
-system.physmem.bytes_inst_read::cpu1.inst        59392                       # 
Number of instructions bytes read from this memory
-system.physmem.bytes_inst_read::total          859648                       # 
Number of instructions bytes read from this memory
-system.physmem.bytes_written::writebacks      7739904                       # 
Number of bytes written to this memory
-system.physmem.bytes_written::total           7739904                       # 
Number of bytes written to this memory
-system.physmem.num_reads::cpu0.inst             12504                       # 
Number of read requests responded to by this memory
-system.physmem.num_reads::cpu0.data            385726                       # 
Number of read requests responded to by this memory
-system.physmem.num_reads::cpu1.inst               928                       # 
Number of read requests responded to by this memory
-system.physmem.num_reads::cpu1.data              8176                       # 
Number of read requests responded to by this memory
+system.physmem.bytes_read::total             26069824                       # 
Number of bytes read from this memory
+system.physmem.bytes_inst_read::cpu0.inst       800192                       # 
Number of instructions bytes read from this memory
+system.physmem.bytes_inst_read::cpu1.inst        59328                       # 
Number of instructions bytes read from this memory
+system.physmem.bytes_inst_read::total          859520                       # 
Number of instructions bytes read from this memory
+system.physmem.bytes_written::writebacks      7739392                       # 
Number of bytes written to this memory
+system.physmem.bytes_written::total           7739392                       # 
Number of bytes written to this memory
+system.physmem.num_reads::cpu0.inst             12503                       # 
Number of read requests responded to by this memory
+system.physmem.num_reads::cpu0.data            385719                       # 
Number of read requests responded to by this memory
+system.physmem.num_reads::cpu1.inst               927                       # 
Number of read requests responded to by this memory
+system.physmem.num_reads::cpu1.data              8177                       # 
Number of read requests responded to by this memory
 system.physmem.num_reads::tsunami.ide              15                       # 
Number of read requests responded to by this memory
-system.physmem.num_reads::total                407349                       # 
Number of read requests responded to by this memory
-system.physmem.num_writes::writebacks          120936                       # 
Number of write requests responded to by this memory
-system.physmem.num_writes::total               120936                       # 
Number of write requests responded to by this memory
-system.physmem.bw_read::cpu0.inst              403641                       # 
Total read bandwidth from this memory (bytes/s)
-system.physmem.bw_read::cpu0.data            12451604                       # 
Total read bandwidth from this memory (bytes/s)
-system.physmem.bw_read::cpu1.inst               29957                       # 
Total read bandwidth from this memory (bytes/s)
-system.physmem.bw_read::cpu1.data              263929                       # 
Total read bandwidth from this memory (bytes/s)
+system.physmem.num_reads::total                407341                       # 
Number of read requests responded to by this memory
+system.physmem.num_writes::writebacks          120928                       # 
Number of write requests responded to by this memory
+system.physmem.num_writes::total               120928                       # 
Number of write requests responded to by this memory
+system.physmem.bw_read::cpu0.inst              403609                       # 
Total read bandwidth from this memory (bytes/s)
+system.physmem.bw_read::cpu0.data            12451380                       # 
Total read bandwidth from this memory (bytes/s)
+system.physmem.bw_read::cpu1.inst               29924                       # 
Total read bandwidth from this memory (bytes/s)
+system.physmem.bw_read::cpu1.data              263961                       # 
Total read bandwidth from this memory (bytes/s)
 system.physmem.bw_read::tsunami.ide               484                       # 
Total read bandwidth from this memory (bytes/s)
-system.physmem.bw_read::total                13149615                       # 
Total read bandwidth from this memory (bytes/s)
-system.physmem.bw_inst_read::cpu0.inst         403641                       # 
Instruction read bandwidth from this memory (bytes/s)
-system.physmem.bw_inst_read::cpu1.inst          29957                       # 
Instruction read bandwidth from this memory (bytes/s)
-system.physmem.bw_inst_read::total             433598                       # 
Instruction read bandwidth from this memory (bytes/s)
-system.physmem.bw_write::writebacks           3903930                       # 
Write bandwidth from this memory (bytes/s)
-system.physmem.bw_write::total                3903930                       # 
Write bandwidth from this memory (bytes/s)
-system.physmem.bw_total::writebacks           3903930                       # 
Total bandwidth to/from this memory (bytes/s)
-system.physmem.bw_total::cpu0.inst             403641                       # 
Total bandwidth to/from this memory (bytes/s)
-system.physmem.bw_total::cpu0.data           12451604                       # 
Total bandwidth to/from this memory (bytes/s)
-system.physmem.bw_total::cpu1.inst              29957                       # 
Total bandwidth to/from this memory (bytes/s)
-system.physmem.bw_total::cpu1.data             263929                       # 
Total bandwidth to/from this memory (bytes/s)
+system.physmem.bw_read::total                13149359                       # 
Total read bandwidth from this memory (bytes/s)
+system.physmem.bw_inst_read::cpu0.inst         403609                       # 
Instruction read bandwidth from this memory (bytes/s)
+system.physmem.bw_inst_read::cpu1.inst          29924                       # 
Instruction read bandwidth from this memory (bytes/s)
+system.physmem.bw_inst_read::total             433533                       # 
Instruction read bandwidth from this memory (bytes/s)
+system.physmem.bw_write::writebacks           3903672                       # 
Write bandwidth from this memory (bytes/s)
+system.physmem.bw_write::total                3903672                       # 
Write bandwidth from this memory (bytes/s)
+system.physmem.bw_total::writebacks           3903672                       # 
Total bandwidth to/from this memory (bytes/s)
+system.physmem.bw_total::cpu0.inst             403609                       # 
Total bandwidth to/from this memory (bytes/s)
+system.physmem.bw_total::cpu0.data           12451380                       # 
Total bandwidth to/from this memory (bytes/s)
+system.physmem.bw_total::cpu1.inst              29924                       # 
Total bandwidth to/from this memory (bytes/s)
+system.physmem.bw_total::cpu1.data             263961                       # 
Total bandwidth to/from this memory (bytes/s)
 system.physmem.bw_total::tsunami.ide              484                       # 
Total bandwidth to/from this memory (bytes/s)
-system.physmem.bw_total::total               17053544                       # 
Total bandwidth to/from this memory (bytes/s)
-system.physmem.readReqs                        407349                       # 
Number of read requests accepted
-system.physmem.writeReqs                       120936                       # 
Number of write requests accepted
-system.physmem.readBursts                      407349                       # 
Number of DRAM read bursts, including those serviced by the write queue
-system.physmem.writeBursts                     120936                       # 
Number of DRAM write bursts, including those merged in the write queue
-system.physmem.bytesReadDRAM                 26062656                       # 
Total number of bytes read from DRAM
-system.physmem.bytesReadWrQ                      7680                       # 
Total number of bytes read from write queue
-system.physmem.bytesWritten                   7738112                       # 
Total number of bytes written to DRAM
-system.physmem.bytesReadSys                  26070336                       # 
Total read bytes from the system interface side
-system.physmem.bytesWrittenSys                7739904                       # 
Total written bytes from the system interface side
-system.physmem.servicedByWrQ                      120                       # 
Number of DRAM read bursts serviced by the write queue
+system.physmem.bw_total::total               17053031                       # 
Total bandwidth to/from this memory (bytes/s)
+system.physmem.readReqs                        407341                       # 
Number of read requests accepted
+system.physmem.writeReqs                       120928                       # 
Number of write requests accepted
+system.physmem.readBursts                      407341                       # 
Number of DRAM read bursts, including those serviced by the write queue
+system.physmem.writeBursts                     120928                       # 
Number of DRAM write bursts, including those merged in the write queue
+system.physmem.bytesReadDRAM                 26061824                       # 
Total number of bytes read from DRAM
+system.physmem.bytesReadWrQ                      8000                       # 
Total number of bytes read from write queue
+system.physmem.bytesWritten                   7737600                       # 
Total number of bytes written to DRAM
+system.physmem.bytesReadSys                  26069824                       # 
Total read bytes from the system interface side
+system.physmem.bytesWrittenSys                7739392                       # 
Total written bytes from the system interface side
+system.physmem.servicedByWrQ                      125                       # 
Number of DRAM read bursts serviced by the write queue
 system.physmem.mergedWrBursts                       0                       # 
Number of DRAM write bursts merged with an existing one
 system.physmem.neitherReadNorWriteReqs              0                       # 
Number of requests that are neither read nor write
 system.physmem.perBankRdBursts::0               25226                       # 
Per bank write bursts
 system.physmem.perBankRdBursts::1               25379                       # 
Per bank write bursts
-system.physmem.perBankRdBursts::2               25428                       # 
Per bank write bursts
+system.physmem.perBankRdBursts::2               25423                       # 
Per bank write bursts
 system.physmem.perBankRdBursts::3               24855                       # 
Per bank write bursts
 system.physmem.perBankRdBursts::4               25157                       # 
Per bank write bursts
 system.physmem.perBankRdBursts::5               25423                       # 
Per bank write bursts
-system.physmem.perBankRdBursts::6               25496                       # 
Per bank write bursts
-system.physmem.perBankRdBursts::7               25345                       # 
Per bank write bursts
+system.physmem.perBankRdBursts::6               25497                       # 
Per bank write bursts
+system.physmem.perBankRdBursts::7               25338                       # 
Per bank write bursts
 system.physmem.perBankRdBursts::8               25239                       # 
Per bank write bursts
 system.physmem.perBankRdBursts::9               25589                       # 
Per bank write bursts
 system.physmem.perBankRdBursts::10              25733                       # 
Per bank write bursts
-system.physmem.perBankRdBursts::11              25919                       # 
Per bank write bursts
+system.physmem.perBankRdBursts::11              25917                       # 
Per bank write bursts
 system.physmem.perBankRdBursts::12              25947                       # 
Per bank write bursts
 system.physmem.perBankRdBursts::13              25572                       # 
Per bank write bursts
 system.physmem.perBankRdBursts::14              25277                       # 
Per bank write bursts
@@ -84,34 +84,34 @@
 system.physmem.perBankWrBursts::3                6886                       # 
Per bank write bursts
 system.physmem.perBankWrBursts::4                7104                       # 
Per bank write bursts
 system.physmem.perBankWrBursts::5                7345                       # 
Per bank write bursts
-system.physmem.perBankWrBursts::6                7430                       # 
Per bank write bursts
-system.physmem.perBankWrBursts::7                7151                       # 
Per bank write bursts
+system.physmem.perBankWrBursts::6                7431                       # 
Per bank write bursts
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev

Reply via email to