changeset a80884911971 in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=a80884911971
description:
cpu: Fix LLSC atomic CPU wakeup
Writes to locked memory addresses (LLSC) did not wake up the locking
CPU. This can lead to deadlocks on multi-core runs. In AtomicSimpleCPU,
recvAtomicSnoop was checking if the incoming packet was an invalidation
(isInvalidate) and only then handled a locked snoop. But, writes are
seen instead of invalidates when running without caches (fast-forward
configurations). As as simple fix, now handleLockedSnoop is also called
even if the incoming snoop packet are from writes.
diffstat:
src/cpu/minor/lsq.cc
| 4 +-
src/cpu/o3/lsq_unit_impl.hh
| 6 +-
src/cpu/simple/atomic.cc
| 5 +-
src/cpu/simple/timing.cc
| 10 +-
tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.ini
| 8 +-
tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/simout
| 8 +-
tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stats.txt
| 2133 ++--
tests/quick/fs/10.linux-boot/ref/arm/linux/realview-simple-timing-dual/stats.txt
| 4390 ++++-----
8 files changed, 3272 insertions(+), 3292 deletions(-)
diffs (truncated from 7668 to 300 lines):
diff -r 46c7b3e35720 -r a80884911971 src/cpu/minor/lsq.cc
--- a/src/cpu/minor/lsq.cc Mon Feb 29 19:13:13 2016 -0600
+++ b/src/cpu/minor/lsq.cc Sun Jul 19 15:03:30 2015 -0500
@@ -1617,7 +1617,9 @@
* this action on snoops. */
/* THREAD */
- TheISA::handleLockedSnoop(cpu.getContext(0), pkt, cacheBlockMask);
+ if (pkt->isInvalidate() || pkt->isWrite()) {
+ TheISA::handleLockedSnoop(cpu.getContext(0), pkt, cacheBlockMask);
+ }
}
}
diff -r 46c7b3e35720 -r a80884911971 src/cpu/o3/lsq_unit_impl.hh
--- a/src/cpu/o3/lsq_unit_impl.hh Mon Feb 29 19:13:13 2016 -0600
+++ b/src/cpu/o3/lsq_unit_impl.hh Sun Jul 19 15:03:30 2015 -0500
@@ -438,10 +438,8 @@
int load_idx = loadHead;
DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr());
- // Unlock the cpu-local monitor when the CPU sees a snoop to a locked
- // address. The CPU can speculatively execute a LL operation after a
pending
- // SC operation in the pipeline and that can make the cache monitor the CPU
- // is connected to valid while it really shouldn't be.
+ // Only Invalidate packet calls checkSnoop
+ assert(pkt->isInvalidate());
for (int x = 0; x < cpu->numContexts(); x++) {
ThreadContext *tc = cpu->getContext(x);
bool no_squash = cpu->thread[x]->noSquashFromTC;
diff -r 46c7b3e35720 -r a80884911971 src/cpu/simple/atomic.cc
--- a/src/cpu/simple/atomic.cc Mon Feb 29 19:13:13 2016 -0600
+++ b/src/cpu/simple/atomic.cc Sun Jul 19 15:03:30 2015 -0500
@@ -292,7 +292,10 @@
}
// if snoop invalidates, release any associated locks
- if (pkt->isInvalidate()) {
+ // When run without caches, Invalidation packets will not be received
+ // hence we must check if the incoming packets are writes and wakeup
+ // the processor accordingly
+ if (pkt->isInvalidate() || pkt->isWrite()) {
DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
pkt->getAddr());
for (auto &t_info : cpu->threadInfo) {
diff -r 46c7b3e35720 -r a80884911971 src/cpu/simple/timing.cc
--- a/src/cpu/simple/timing.cc Mon Feb 29 19:13:13 2016 -0600
+++ b/src/cpu/simple/timing.cc Sun Jul 19 15:03:30 2015 -0500
@@ -876,8 +876,14 @@
}
}
- for (auto &t_info : cpu->threadInfo) {
- TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
+ // Making it uniform across all CPUs:
+ // The CPUs need to be woken up only on an invalidation packet (when using
caches)
+ // or on an incoming write packet (when not using caches)
+ // It is not necessary to wake up the processor on all incoming packets
+ if (pkt->isInvalidate() || pkt->isWrite()) {
+ for (auto &t_info : cpu->threadInfo) {
+ TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
+ }
}
}
diff -r 46c7b3e35720 -r a80884911971
tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.ini
---
a/tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.ini
Mon Feb 29 19:13:13 2016 -0600
+++
b/tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.ini
Sun Jul 19 15:03:30 2015 -0500
@@ -104,7 +104,6 @@
clusivity=mostly_incl
demand_mshr_reserve=1
eventq_index=0
-forward_snoops=true
hit_latency=2
is_read_only=false
max_miss_count=0
@@ -146,7 +145,6 @@
clusivity=mostly_incl
demand_mshr_reserve=1
eventq_index=0
-forward_snoops=true
hit_latency=2
is_read_only=true
max_miss_count=0
@@ -234,7 +232,6 @@
clusivity=mostly_incl
demand_mshr_reserve=1
eventq_index=0
-forward_snoops=true
hit_latency=2
is_read_only=false
max_miss_count=0
@@ -276,7 +273,6 @@
clusivity=mostly_incl
demand_mshr_reserve=1
eventq_index=0
-forward_snoops=true
hit_latency=2
is_read_only=true
max_miss_count=0
@@ -410,7 +406,6 @@
clusivity=mostly_incl
demand_mshr_reserve=1
eventq_index=0
-forward_snoops=false
hit_latency=50
is_read_only=false
max_miss_count=0
@@ -447,7 +442,6 @@
clusivity=mostly_incl
demand_mshr_reserve=1
eventq_index=0
-forward_snoops=true
hit_latency=20
is_read_only=false
max_miss_count=0
@@ -482,6 +476,7 @@
eventq_index=0
forward_latency=4
frontend_latency=3
+point_of_coherency=true
response_latency=2
snoop_filter=Null
snoop_response_latency=4
@@ -615,6 +610,7 @@
eventq_index=0
forward_latency=0
frontend_latency=1
+point_of_coherency=false
response_latency=1
snoop_filter=system.toL2Bus.snoop_filter
snoop_response_latency=1
diff -r 46c7b3e35720 -r a80884911971
tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/simout
---
a/tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/simout
Mon Feb 29 19:13:13 2016 -0600
+++
b/tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/simout
Sun Jul 19 15:03:30 2015 -0500
@@ -1,9 +1,9 @@
gem5 Simulator System. http://gem5.org
gem5 is copyrighted software; use the --copyright option for details.
-gem5 compiled Jan 21 2016 13:49:21
-gem5 started Jan 21 2016 13:50:00
-gem5 executing on zizzer, pid 33973
+gem5 compiled Feb 29 2016 18:59:12
+gem5 started Feb 29 2016 18:59:20
+gem5 executing on redacted.arm.com, pid 18325
command line: build/ALPHA/gem5.opt -d
build/ALPHA/tests/opt/quick/fs/10.linux-boot/alpha/linux/tsunami-simple-timing-dual
-re /z/atgutier/gem5/gem5-commit/tests/run.py
build/ALPHA/tests/opt/quick/fs/10.linux-boot/alpha/linux/tsunami-simple-timing-dual
Global frequency set at 1000000000000 ticks per second
@@ -11,4 +11,4 @@
0: system.tsunami.io.rtc: Real-time clock set to Thu Jan 1 00:00:00 2009
info: Entering event queue @ 0. Starting simulation...
info: Launching CPU 1 @ 881785000
-Exiting @ tick 1982594146000 because m5_exit instruction encountered
+Exiting @ tick 1982592736000 because m5_exit instruction encountered
diff -r 46c7b3e35720 -r a80884911971
tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stats.txt
---
a/tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stats.txt
Mon Feb 29 19:13:13 2016 -0600
+++
b/tests/quick/fs/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stats.txt
Sun Jul 19 15:03:30 2015 -0500
@@ -1,79 +1,79 @@
---------- Begin Simulation Statistics ----------
sim_seconds 1.982593 #
Number of seconds simulated
-sim_ticks 1982593132000 #
Number of ticks simulated
-final_tick 1982593132000 #
Number of ticks from beginning of simulation (restored from checkpoints and
never reset)
+sim_ticks 1982592736000 #
Number of ticks simulated
+final_tick 1982592736000 #
Number of ticks from beginning of simulation (restored from checkpoints and
never reset)
sim_freq 1000000000000 #
Frequency of simulated ticks
-host_inst_rate 1109655 #
Simulator instruction rate (inst/s)
-host_op_rate 1109654 #
Simulator op (including micro ops) rate (op/s)
-host_tick_rate 36063876778 #
Simulator tick rate (ticks/s)
-host_mem_usage 333984 #
Number of bytes of host memory used
-host_seconds 54.97 #
Real time elapsed on the host
-sim_insts 61002651 #
Number of instructions simulated
-sim_ops 61002651 #
Number of ops (including micro ops) simulated
+host_inst_rate 753764 #
Simulator instruction rate (inst/s)
+host_op_rate 753764 #
Simulator op (including micro ops) rate (op/s)
+host_tick_rate 24497172234 #
Simulator tick rate (ticks/s)
+host_mem_usage 320072 #
Number of bytes of host memory used
+host_seconds 80.93 #
Real time elapsed on the host
+sim_insts 61003209 #
Number of instructions simulated
+sim_ops 61003209 #
Number of ops (including micro ops) simulated
system.voltage_domain.voltage 1 #
Voltage in Volts
system.clk_domain.clock 1000 #
Clock period in ticks
-system.physmem.bytes_read::cpu0.inst 800256 #
Number of bytes read from this memory
-system.physmem.bytes_read::cpu0.data 24686464 #
Number of bytes read from this memory
-system.physmem.bytes_read::cpu1.inst 59392 #
Number of bytes read from this memory
-system.physmem.bytes_read::cpu1.data 523264 #
Number of bytes read from this memory
+system.physmem.bytes_read::cpu0.inst 800192 #
Number of bytes read from this memory
+system.physmem.bytes_read::cpu0.data 24686016 #
Number of bytes read from this memory
+system.physmem.bytes_read::cpu1.inst 59328 #
Number of bytes read from this memory
+system.physmem.bytes_read::cpu1.data 523328 #
Number of bytes read from this memory
system.physmem.bytes_read::tsunami.ide 960 #
Number of bytes read from this memory
-system.physmem.bytes_read::total 26070336 #
Number of bytes read from this memory
-system.physmem.bytes_inst_read::cpu0.inst 800256 #
Number of instructions bytes read from this memory
-system.physmem.bytes_inst_read::cpu1.inst 59392 #
Number of instructions bytes read from this memory
-system.physmem.bytes_inst_read::total 859648 #
Number of instructions bytes read from this memory
-system.physmem.bytes_written::writebacks 7739904 #
Number of bytes written to this memory
-system.physmem.bytes_written::total 7739904 #
Number of bytes written to this memory
-system.physmem.num_reads::cpu0.inst 12504 #
Number of read requests responded to by this memory
-system.physmem.num_reads::cpu0.data 385726 #
Number of read requests responded to by this memory
-system.physmem.num_reads::cpu1.inst 928 #
Number of read requests responded to by this memory
-system.physmem.num_reads::cpu1.data 8176 #
Number of read requests responded to by this memory
+system.physmem.bytes_read::total 26069824 #
Number of bytes read from this memory
+system.physmem.bytes_inst_read::cpu0.inst 800192 #
Number of instructions bytes read from this memory
+system.physmem.bytes_inst_read::cpu1.inst 59328 #
Number of instructions bytes read from this memory
+system.physmem.bytes_inst_read::total 859520 #
Number of instructions bytes read from this memory
+system.physmem.bytes_written::writebacks 7739392 #
Number of bytes written to this memory
+system.physmem.bytes_written::total 7739392 #
Number of bytes written to this memory
+system.physmem.num_reads::cpu0.inst 12503 #
Number of read requests responded to by this memory
+system.physmem.num_reads::cpu0.data 385719 #
Number of read requests responded to by this memory
+system.physmem.num_reads::cpu1.inst 927 #
Number of read requests responded to by this memory
+system.physmem.num_reads::cpu1.data 8177 #
Number of read requests responded to by this memory
system.physmem.num_reads::tsunami.ide 15 #
Number of read requests responded to by this memory
-system.physmem.num_reads::total 407349 #
Number of read requests responded to by this memory
-system.physmem.num_writes::writebacks 120936 #
Number of write requests responded to by this memory
-system.physmem.num_writes::total 120936 #
Number of write requests responded to by this memory
-system.physmem.bw_read::cpu0.inst 403641 #
Total read bandwidth from this memory (bytes/s)
-system.physmem.bw_read::cpu0.data 12451604 #
Total read bandwidth from this memory (bytes/s)
-system.physmem.bw_read::cpu1.inst 29957 #
Total read bandwidth from this memory (bytes/s)
-system.physmem.bw_read::cpu1.data 263929 #
Total read bandwidth from this memory (bytes/s)
+system.physmem.num_reads::total 407341 #
Number of read requests responded to by this memory
+system.physmem.num_writes::writebacks 120928 #
Number of write requests responded to by this memory
+system.physmem.num_writes::total 120928 #
Number of write requests responded to by this memory
+system.physmem.bw_read::cpu0.inst 403609 #
Total read bandwidth from this memory (bytes/s)
+system.physmem.bw_read::cpu0.data 12451380 #
Total read bandwidth from this memory (bytes/s)
+system.physmem.bw_read::cpu1.inst 29924 #
Total read bandwidth from this memory (bytes/s)
+system.physmem.bw_read::cpu1.data 263961 #
Total read bandwidth from this memory (bytes/s)
system.physmem.bw_read::tsunami.ide 484 #
Total read bandwidth from this memory (bytes/s)
-system.physmem.bw_read::total 13149615 #
Total read bandwidth from this memory (bytes/s)
-system.physmem.bw_inst_read::cpu0.inst 403641 #
Instruction read bandwidth from this memory (bytes/s)
-system.physmem.bw_inst_read::cpu1.inst 29957 #
Instruction read bandwidth from this memory (bytes/s)
-system.physmem.bw_inst_read::total 433598 #
Instruction read bandwidth from this memory (bytes/s)
-system.physmem.bw_write::writebacks 3903930 #
Write bandwidth from this memory (bytes/s)
-system.physmem.bw_write::total 3903930 #
Write bandwidth from this memory (bytes/s)
-system.physmem.bw_total::writebacks 3903930 #
Total bandwidth to/from this memory (bytes/s)
-system.physmem.bw_total::cpu0.inst 403641 #
Total bandwidth to/from this memory (bytes/s)
-system.physmem.bw_total::cpu0.data 12451604 #
Total bandwidth to/from this memory (bytes/s)
-system.physmem.bw_total::cpu1.inst 29957 #
Total bandwidth to/from this memory (bytes/s)
-system.physmem.bw_total::cpu1.data 263929 #
Total bandwidth to/from this memory (bytes/s)
+system.physmem.bw_read::total 13149359 #
Total read bandwidth from this memory (bytes/s)
+system.physmem.bw_inst_read::cpu0.inst 403609 #
Instruction read bandwidth from this memory (bytes/s)
+system.physmem.bw_inst_read::cpu1.inst 29924 #
Instruction read bandwidth from this memory (bytes/s)
+system.physmem.bw_inst_read::total 433533 #
Instruction read bandwidth from this memory (bytes/s)
+system.physmem.bw_write::writebacks 3903672 #
Write bandwidth from this memory (bytes/s)
+system.physmem.bw_write::total 3903672 #
Write bandwidth from this memory (bytes/s)
+system.physmem.bw_total::writebacks 3903672 #
Total bandwidth to/from this memory (bytes/s)
+system.physmem.bw_total::cpu0.inst 403609 #
Total bandwidth to/from this memory (bytes/s)
+system.physmem.bw_total::cpu0.data 12451380 #
Total bandwidth to/from this memory (bytes/s)
+system.physmem.bw_total::cpu1.inst 29924 #
Total bandwidth to/from this memory (bytes/s)
+system.physmem.bw_total::cpu1.data 263961 #
Total bandwidth to/from this memory (bytes/s)
system.physmem.bw_total::tsunami.ide 484 #
Total bandwidth to/from this memory (bytes/s)
-system.physmem.bw_total::total 17053544 #
Total bandwidth to/from this memory (bytes/s)
-system.physmem.readReqs 407349 #
Number of read requests accepted
-system.physmem.writeReqs 120936 #
Number of write requests accepted
-system.physmem.readBursts 407349 #
Number of DRAM read bursts, including those serviced by the write queue
-system.physmem.writeBursts 120936 #
Number of DRAM write bursts, including those merged in the write queue
-system.physmem.bytesReadDRAM 26062656 #
Total number of bytes read from DRAM
-system.physmem.bytesReadWrQ 7680 #
Total number of bytes read from write queue
-system.physmem.bytesWritten 7738112 #
Total number of bytes written to DRAM
-system.physmem.bytesReadSys 26070336 #
Total read bytes from the system interface side
-system.physmem.bytesWrittenSys 7739904 #
Total written bytes from the system interface side
-system.physmem.servicedByWrQ 120 #
Number of DRAM read bursts serviced by the write queue
+system.physmem.bw_total::total 17053031 #
Total bandwidth to/from this memory (bytes/s)
+system.physmem.readReqs 407341 #
Number of read requests accepted
+system.physmem.writeReqs 120928 #
Number of write requests accepted
+system.physmem.readBursts 407341 #
Number of DRAM read bursts, including those serviced by the write queue
+system.physmem.writeBursts 120928 #
Number of DRAM write bursts, including those merged in the write queue
+system.physmem.bytesReadDRAM 26061824 #
Total number of bytes read from DRAM
+system.physmem.bytesReadWrQ 8000 #
Total number of bytes read from write queue
+system.physmem.bytesWritten 7737600 #
Total number of bytes written to DRAM
+system.physmem.bytesReadSys 26069824 #
Total read bytes from the system interface side
+system.physmem.bytesWrittenSys 7739392 #
Total written bytes from the system interface side
+system.physmem.servicedByWrQ 125 #
Number of DRAM read bursts serviced by the write queue
system.physmem.mergedWrBursts 0 #
Number of DRAM write bursts merged with an existing one
system.physmem.neitherReadNorWriteReqs 0 #
Number of requests that are neither read nor write
system.physmem.perBankRdBursts::0 25226 #
Per bank write bursts
system.physmem.perBankRdBursts::1 25379 #
Per bank write bursts
-system.physmem.perBankRdBursts::2 25428 #
Per bank write bursts
+system.physmem.perBankRdBursts::2 25423 #
Per bank write bursts
system.physmem.perBankRdBursts::3 24855 #
Per bank write bursts
system.physmem.perBankRdBursts::4 25157 #
Per bank write bursts
system.physmem.perBankRdBursts::5 25423 #
Per bank write bursts
-system.physmem.perBankRdBursts::6 25496 #
Per bank write bursts
-system.physmem.perBankRdBursts::7 25345 #
Per bank write bursts
+system.physmem.perBankRdBursts::6 25497 #
Per bank write bursts
+system.physmem.perBankRdBursts::7 25338 #
Per bank write bursts
system.physmem.perBankRdBursts::8 25239 #
Per bank write bursts
system.physmem.perBankRdBursts::9 25589 #
Per bank write bursts
system.physmem.perBankRdBursts::10 25733 #
Per bank write bursts
-system.physmem.perBankRdBursts::11 25919 #
Per bank write bursts
+system.physmem.perBankRdBursts::11 25917 #
Per bank write bursts
system.physmem.perBankRdBursts::12 25947 #
Per bank write bursts
system.physmem.perBankRdBursts::13 25572 #
Per bank write bursts
system.physmem.perBankRdBursts::14 25277 #
Per bank write bursts
@@ -84,34 +84,34 @@
system.physmem.perBankWrBursts::3 6886 #
Per bank write bursts
system.physmem.perBankWrBursts::4 7104 #
Per bank write bursts
system.physmem.perBankWrBursts::5 7345 #
Per bank write bursts
-system.physmem.perBankWrBursts::6 7430 #
Per bank write bursts
-system.physmem.perBankWrBursts::7 7151 #
Per bank write bursts
+system.physmem.perBankWrBursts::6 7431 #
Per bank write bursts
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev