Hi Lisa,

I'm sorry to say that this was a problem with my patches. I'm still unsure why all of the regressions passed for me - I looked over my logs and they were all fine. However, when I ran them again just now then I got the same problem as you. Looking into it, I had simply forgotten to propagate a fault back to the processor after a read or write.

This patch here should fix it all up again. I'm running all of the regressions again. I'll push the patch on Monday if they are all ok.

Sorry for the problem again.

Tim

On Fri, 19 Feb 2010 16:35:34 -0500, Lisa Hsu <[email protected]> wrote:

Timothy,

Did you check this against ALPHA_FS/test/fast/long/10.linux-boot tests under the O3 configurations? I believe this changeset may have broken them. I am
getting ready to make a push, and I passed ALPHA_FS linux boot regression
tests a few days ago, but since pulling the last 6 changesets, I've begun
failing on this test, and this O3 change seems to be the most obvious
breaker. What happens is that it seems like no forward progress is made in the program and it just gets hung and runs forever. This happens on a clean tree after having removed my patches that I want to push. Can you look into
it?

Thanks,
Lisa

On Fri, Feb 12, 2010 at 12:52 PM, Timothy M. Jones <[email protected]>wrote:

changeset 4d4903a3e7c5 in /z/repo/m5
details: http://repo.m5sim.org/m5?cmd=changeset;node=4d4903a3e7c5
description:
       O3PCU: Split loads and stores that cross cache line boundaries.

When each load or store is sent to the LSQ, we check whether it will
cross a
       cache line boundary and, if so, split it in two. This creates two
TLB
       translations and two memory requests. Care has to be taken if the
first
       packet of a split load is sent but the second blocks the cache.
Similarly,
for a store, if the first packet cannot be sent, we must store the
second
       one somewhere to retry later.

This modifies the LSQSenderState class to record both packets in a
split
       load or store.

Finally, a new const variable, HasUnalignedMemAcc, is added to each
ISA
to indicate whether unaligned memory accesses are allowed. This is
used
throughout the changed code so that compiler can optimise away code
dealing
       with split requests for ISAs that don't need them.

diffstat:

11 files changed, 378 insertions(+), 53 deletions(-)
src/arch/alpha/isa_traits.hh |    3
src/arch/arm/isa_traits.hh   |    3
src/arch/mips/isa_traits.hh  |    3
src/arch/power/isa_traits.hh |    3
src/arch/sparc/isa_traits.hh |    3
src/arch/x86/isa_traits.hh   |    3
src/cpu/base_dyn_inst.hh     |   75 ++++++++++++++++---
src/cpu/o3/cpu.hh            |   15 ++-
src/cpu/o3/lsq.hh            |   22 +++--
src/cpu/o3/lsq_unit.hh       |  138 +++++++++++++++++++++++++++++++----
src/cpu/o3/lsq_unit_impl.hh  |  163
++++++++++++++++++++++++++++++++++++++----

diffs (truncated from 831 to 300 lines):

diff -r a123bd350935 -r 4d4903a3e7c5 src/arch/alpha/isa_traits.hh
--- a/src/arch/alpha/isa_traits.hh      Fri Feb 12 19:53:19 2010 +0000
+++ b/src/arch/alpha/isa_traits.hh      Fri Feb 12 19:53:20 2010 +0000
@@ -131,6 +131,9 @@
 // Alpha UNOP (ldq_u r31,0(r0))
 const ExtMachInst NoopMachInst = 0x2ffe0000;

+// Memory accesses cannot be unaligned
+const bool HasUnalignedMemAcc = false;
+
 } // namespace AlphaISA

 #endif // __ARCH_ALPHA_ISA_TRAITS_HH__
diff -r a123bd350935 -r 4d4903a3e7c5 src/arch/arm/isa_traits.hh
--- a/src/arch/arm/isa_traits.hh        Fri Feb 12 19:53:19 2010 +0000
+++ b/src/arch/arm/isa_traits.hh        Fri Feb 12 19:53:20 2010 +0000
@@ -106,6 +106,9 @@
    const int ByteBytes = 1;

    const uint32_t HighVecs = 0xFFFF0000;
+
+    // Memory accesses cannot be unaligned
+    const bool HasUnalignedMemAcc = false;
 };

 using namespace ArmISA;
diff -r a123bd350935 -r 4d4903a3e7c5 src/arch/mips/isa_traits.hh
--- a/src/arch/mips/isa_traits.hh       Fri Feb 12 19:53:19 2010 +0000
+++ b/src/arch/mips/isa_traits.hh       Fri Feb 12 19:53:20 2010 +0000
@@ -164,6 +164,9 @@
 const int ANNOTE_NONE = 0;
 const uint32_t ITOUCH_ANNOTE = 0xffffffff;

+// Memory accesses cannot be unaligned
+const bool HasUnalignedMemAcc = false;
+
 };

 #endif // __ARCH_MIPS_ISA_TRAITS_HH__
diff -r a123bd350935 -r 4d4903a3e7c5 src/arch/power/isa_traits.hh
--- a/src/arch/power/isa_traits.hh      Fri Feb 12 19:53:19 2010 +0000
+++ b/src/arch/power/isa_traits.hh      Fri Feb 12 19:53:20 2010 +0000
@@ -70,6 +70,9 @@
 // This is ori 0, 0, 0
 const ExtMachInst NoopMachInst = 0x60000000;

+// Memory accesses can be unaligned
+const bool HasUnalignedMemAcc = true;
+
 } // PowerISA namespace

 #endif // __ARCH_POWER_ISA_TRAITS_HH__
diff -r a123bd350935 -r 4d4903a3e7c5 src/arch/sparc/isa_traits.hh
--- a/src/arch/sparc/isa_traits.hh      Fri Feb 12 19:53:19 2010 +0000
+++ b/src/arch/sparc/isa_traits.hh      Fri Feb 12 19:53:20 2010 +0000
@@ -98,6 +98,9 @@
    };

 #endif
+
+// Memory accesses cannot be unaligned
+const bool HasUnalignedMemAcc = false;
 }

 #endif // __ARCH_SPARC_ISA_TRAITS_HH__
diff -r a123bd350935 -r 4d4903a3e7c5 src/arch/x86/isa_traits.hh
--- a/src/arch/x86/isa_traits.hh        Fri Feb 12 19:53:19 2010 +0000
+++ b/src/arch/x86/isa_traits.hh        Fri Feb 12 19:53:20 2010 +0000
@@ -91,6 +91,9 @@
    StaticInstPtr decodeInst(ExtMachInst);

    const Addr LoadAddrMask = ULL(-1);
+
+    // Memory accesses can be unaligned
+    const bool HasUnalignedMemAcc = true;
 };

 #endif // __ARCH_X86_ISATRAITS_HH__
diff -r a123bd350935 -r 4d4903a3e7c5 src/cpu/base_dyn_inst.hh
--- a/src/cpu/base_dyn_inst.hh  Fri Feb 12 19:53:19 2010 +0000
+++ b/src/cpu/base_dyn_inst.hh  Fri Feb 12 19:53:20 2010 +0000
@@ -131,8 +131,13 @@
    template <class T>
    Fault write(T data, Addr addr, unsigned flags, uint64_t *res);

+    /** Splits a request in two if it crosses a dcache block. */
+    void splitRequest(RequestPtr req, RequestPtr &sreqLow,
+                      RequestPtr &sreqHigh);
+
    /** Initiate a DTB address translation. */
-    void initiateTranslation(RequestPtr req, uint64_t *res,
+    void initiateTranslation(RequestPtr req, RequestPtr sreqLow,
+                             RequestPtr sreqHigh, uint64_t *res,
                             BaseTLB::Mode mode);

    /** Finish a DTB address translation. */
@@ -870,12 +875,19 @@
    Request *req = new Request(asid, addr, sizeof(T), flags, this->PC,
                               thread->contextId(), threadNumber);

-    initiateTranslation(req, NULL, BaseTLB::Read);
+    Request *sreqLow = NULL;
+    Request *sreqHigh = NULL;
+
+    // Only split the request if the ISA supports unaligned accesses.
+    if (TheISA::HasUnalignedMemAcc) {
+        splitRequest(req, sreqLow, sreqHigh);
+    }
+    initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read);

    if (fault == NoFault) {
        effAddr = req->getVaddr();
        effAddrValid = true;
-        cpu->read(req, data, lqIdx);
+        cpu->read(req, sreqLow, sreqHigh, data, lqIdx);
    } else {

        // Return a fixed value to keep simulation deterministic even
@@ -909,12 +921,19 @@
    Request *req = new Request(asid, addr, sizeof(T), flags, this->PC,
                               thread->contextId(), threadNumber);

-    initiateTranslation(req, res, BaseTLB::Write);
+    Request *sreqLow = NULL;
+    Request *sreqHigh = NULL;
+
+    // Only split the request if the ISA supports unaligned accesses.
+    if (TheISA::HasUnalignedMemAcc) {
+        splitRequest(req, sreqLow, sreqHigh);
+    }
+    initiateTranslation(req, sreqLow, sreqHigh, res, BaseTLB::Write);

    if (fault == NoFault) {
        effAddr = req->getVaddr();
        effAddrValid = true;
-        cpu->write(req, data, sqIdx);
+        cpu->write(req, sreqLow, sreqHigh, data, sqIdx);
    }

    return fault;
@@ -922,14 +941,48 @@

 template<class Impl>
 inline void
-BaseDynInst<Impl>::initiateTranslation(RequestPtr req, uint64_t *res,
+BaseDynInst<Impl>::splitRequest(RequestPtr req, RequestPtr &sreqLow,
+                                RequestPtr &sreqHigh)
+{
+ // Check to see if the request crosses the next level block boundary.
+    unsigned block_size = cpu->getDcachePort()->peerBlockSize();
+    Addr addr = req->getVaddr();
+    Addr split_addr = roundDown(addr + req->getSize() - 1, block_size);
+    assert(split_addr <= addr || split_addr - addr < block_size);
+
+    // Spans two blocks.
+    if (split_addr > addr) {
+        req->splitOnVaddr(split_addr, sreqLow, sreqHigh);
+    }
+}
+
+template<class Impl>
+inline void
+BaseDynInst<Impl>::initiateTranslation(RequestPtr req, RequestPtr sreqLow, + RequestPtr sreqHigh, uint64_t *res,
                                       BaseTLB::Mode mode)
 {
-    WholeTranslationState *state =
-        new WholeTranslationState(req, NULL, res, mode);
-    DataTranslation<BaseDynInst<Impl> > *trans =
-        new DataTranslation<BaseDynInst<Impl> >(this, state);
-    cpu->dtb->translateTiming(req, thread->getTC(), trans, mode);
+    if (!TheISA::HasUnalignedMemAcc || sreqLow == NULL) {
+        WholeTranslationState *state =
+            new WholeTranslationState(req, NULL, res, mode);
+
+        // One translation if the request isn't split.
+        DataTranslation<BaseDynInst<Impl> > *trans =
+            new DataTranslation<BaseDynInst<Impl> >(this, state);
+        cpu->dtb->translateTiming(req, thread->getTC(), trans, mode);
+    } else {
+        WholeTranslationState *state =
+ new WholeTranslationState(req, sreqLow, sreqHigh, NULL, res,
mode);
+
+        // Two translations when the request is split.
+        DataTranslation<BaseDynInst<Impl> > *stransLow =
+            new DataTranslation<BaseDynInst<Impl> >(this, state, 0);
+        DataTranslation<BaseDynInst<Impl> > *stransHigh =
+            new DataTranslation<BaseDynInst<Impl> >(this, state, 1);
+
+        cpu->dtb->translateTiming(sreqLow, thread->getTC(), stransLow,
mode);
+ cpu->dtb->translateTiming(sreqHigh, thread->getTC(), stransHigh,
mode);
+    }
 }

 template<class Impl>
diff -r a123bd350935 -r 4d4903a3e7c5 src/cpu/o3/cpu.hh
--- a/src/cpu/o3/cpu.hh Fri Feb 12 19:53:19 2010 +0000
+++ b/src/cpu/o3/cpu.hh Fri Feb 12 19:53:20 2010 +0000
@@ -703,18 +703,25 @@

    /** CPU read function, forwards read to LSQ. */
    template <class T>
-    Fault read(RequestPtr &req, T &data, int load_idx)
+ Fault read(RequestPtr &req, RequestPtr &sreqLow, RequestPtr &sreqHigh,
+               T &data, int load_idx)
    {
-        return this->iew.ldstQueue.read(req, data, load_idx);
+        return this->iew.ldstQueue.read(req, sreqLow, sreqHigh,
+                                        data, load_idx);
    }

    /** CPU write function, forwards write to LSQ. */
    template <class T>
-    Fault write(RequestPtr &req, T &data, int store_idx)
+    Fault write(RequestPtr &req, RequestPtr &sreqLow, RequestPtr
&sreqHigh,
+                T &data, int store_idx)
    {
-        return this->iew.ldstQueue.write(req, data, store_idx);
+        return this->iew.ldstQueue.write(req, sreqLow, sreqHigh,
+                                         data, store_idx);
    }

+ /** Get the dcache port (used to find block size for translations). */ + Port *getDcachePort() { return this->iew.ldstQueue.getDcachePort(); }
+
    Addr lockAddr;

    /** Temporary fix for the lock flag, works in the UP case. */
diff -r a123bd350935 -r 4d4903a3e7c5 src/cpu/o3/lsq.hh
--- a/src/cpu/o3/lsq.hh Fri Feb 12 19:53:19 2010 +0000
+++ b/src/cpu/o3/lsq.hh Fri Feb 12 19:53:20 2010 +0000
@@ -270,15 +270,19 @@
    void dumpInsts(ThreadID tid)
    { thread[tid].dumpInsts(); }

-    /** Executes a read operation, using the load specified at the load
index. */
+    /** Executes a read operation, using the load specified at the load
+     * index.
+     */
    template <class T>
-    Fault read(RequestPtr req, T &data, int load_idx);
+    Fault read(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh,
+               T &data, int load_idx);

/** Executes a store operation, using the store specified at the store
-     *   index.
+     * index.
     */
    template <class T>
-    Fault write(RequestPtr req, T &data, int store_idx);
+ Fault write(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh,
+                T &data, int store_idx);

    /** The CPU pointer. */
    O3CPU *cpu;
@@ -369,21 +373,23 @@
 template <class Impl>
 template <class T>
 Fault
-LSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
+LSQ<Impl>::read(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh,
+                T &data, int load_idx)
 {
    ThreadID tid = req->threadId();

-    return thread[tid].read(req, data, load_idx);
+    return thread[tid].read(req, sreqLow, sreqHigh, data, load_idx);
 }

 template <class Impl>
 template <class T>
 Fault
-LSQ<Impl>::write(RequestPtr req, T &data, int store_idx)
+LSQ<Impl>::write(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh,
+                 T &data, int store_idx)
 {
    ThreadID tid = req->threadId();

-    return thread[tid].write(req, data, store_idx);
+    return thread[tid].write(req, sreqLow, sreqHigh, data, store_idx);
 }

 #endif // __CPU_O3_LSQ_HH__
diff -r a123bd350935 -r 4d4903a3e7c5 src/cpu/o3/lsq_unit.hh
--- a/src/cpu/o3/lsq_unit.hh    Fri Feb 12 19:53:19 2010 +0000
+++ b/src/cpu/o3/lsq_unit.hh    Fri Feb 12 19:53:20 2010 +0000
@@ -216,12 +216,18 @@
    /** Writes back the instruction, sending it to IEW. */
    void writeback(DynInstPtr &inst, PacketPtr pkt);

+ /** Writes back a store that couldn't be completed the previous cycle.
*/
+    void writebackPendingStore();
+
    /** Handles completing the send of a store to memory. */
    void storePostSend(PacketPtr pkt);

    /** Completes the store at the specified index. */
    void completeStore(int store_idx);

+    /** Attempts to send a store to the cache. */
_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev



--
The University of Edinburgh is a charitable body, registered in
Scotland, with registration number SC005336.

Attachment: preserve-read-write-faults.patch
Description: Binary data

_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev

Reply via email to