changeset 1daf51f62013 in /z/repo/m5
details: http://repo.m5sim.org/m5?cmd=changeset;node=1daf51f62013
description:
        O3: Enhance data address translation by supporting hardware page table 
walkers.

        Some ISAs (like ARM) relies on hardware page table walkers.  For those 
ISAs,
        when a TLB miss occurs, initiateTranslation() can return with NoFault 
but with
        the translation unfinished.

        Instructions experiencing a delayed translation due to a hardware page 
table
        walk are deferred until the translation completes and kept into the IQ. 
 In
        order to keep track of them, the IQ has been augmented with a queue of 
the
        outstanding delayed memory instructions.  When their translation 
completes,
        instructions are re-executed (only their initiateAccess() was already
        executed; their DTB translation is now skipped).  The IEW stage has been
        modified to support such a 2-pass execution.

diffstat:

 src/arch/arm/tlb.cc           |    2 +
 src/cpu/base_dyn_inst.hh      |  125 ++++++++++++++++++++++++++++++++---------
 src/cpu/base_dyn_inst_impl.hh |   15 +++++
 src/cpu/o3/fetch.hh           |    4 +
 src/cpu/o3/iew_impl.hh        |   21 +++++++
 src/cpu/o3/inst_queue.hh      |   28 +++++++++
 src/cpu/o3/inst_queue_impl.hh |   53 +++++++++++++++++-
 src/cpu/o3/lsq_unit_impl.hh   |   10 +++-
 src/cpu/simple/timing.hh      |    4 +
 src/cpu/translation.hh        |   32 +++++++++-
 src/sim/tlb.hh                |   18 ++++++
 11 files changed, 276 insertions(+), 36 deletions(-)

diffs (truncated from 593 to 300 lines):

diff -r 02f63121a9a1 -r 1daf51f62013 src/arch/arm/tlb.cc
--- a/src/arch/arm/tlb.cc       Fri Feb 11 18:29:35 2011 -0600
+++ b/src/arch/arm/tlb.cc       Fri Feb 11 18:29:35 2011 -0600
@@ -696,6 +696,8 @@
 #endif
     if (!delay)
         translation->finish(fault, req, tc, mode);
+    else
+        translation->markDelayed();
     return fault;
 }
 
diff -r 02f63121a9a1 -r 1daf51f62013 src/cpu/base_dyn_inst.hh
--- a/src/cpu/base_dyn_inst.hh  Fri Feb 11 18:29:35 2011 -0600
+++ b/src/cpu/base_dyn_inst.hh  Fri Feb 11 18:29:35 2011 -0600
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2011 ARM Limited
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2004-2006 The Regents of The University of Michigan
  * Copyright (c) 2009 The University of Edinburgh
  * All rights reserved.
@@ -150,6 +162,29 @@
     /** Finish a DTB address translation. */
     void finishTranslation(WholeTranslationState *state);
 
+    /** True if the DTB address translation has started. */
+    bool translationStarted;
+
+    /** True if the DTB address translation has completed. */
+    bool translationCompleted;
+
+    /**
+     * Returns true if the DTB address translation is being delayed due to a hw
+     * page table walk.
+     */
+    bool isTranslationDelayed() const
+    {
+        return (translationStarted && !translationCompleted);
+    }
+
+    /**
+     * Saved memory requests (needed when the DTB address translation is
+     * delayed due to a hw page table walk).
+     */
+    RequestPtr savedReq;
+    RequestPtr savedSreqLow;
+    RequestPtr savedSreqHigh;
+
     /** @todo: Consider making this private. */
   public:
     /** The sequence number of the instruction. */
@@ -835,33 +870,42 @@
                              unsigned size, unsigned flags)
 {
     reqMade = true;
-    Request *req = new Request(asid, addr, size, flags, this->pc.instAddr(),
-                               thread->contextId(), threadNumber);
-
+    Request *req = NULL;
     Request *sreqLow = NULL;
     Request *sreqHigh = NULL;
 
-    // Only split the request if the ISA supports unaligned accesses.
-    if (TheISA::HasUnalignedMemAcc) {
-        splitRequest(req, sreqLow, sreqHigh);
-    }
-    initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read);
+    if (reqMade && translationStarted) {
+        req = savedReq;
+        sreqLow = savedSreqLow;
+        sreqHigh = savedSreqHigh;
+    } else {
+        req = new Request(asid, addr, size, flags, this->pc.instAddr(),
+                          thread->contextId(), threadNumber);
 
-    if (fault == NoFault) {
-        effAddr = req->getVaddr();
-        effAddrValid = true;
-        fault = cpu->read(req, sreqLow, sreqHigh, data, lqIdx);
-    } else {
-        // Commit will have to clean up whatever happened.  Set this
-        // instruction as executed.
-        this->setExecuted();
+        // Only split the request if the ISA supports unaligned accesses.
+        if (TheISA::HasUnalignedMemAcc) {
+            splitRequest(req, sreqLow, sreqHigh);
+        }
+        initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read);
     }
 
-    if (fault != NoFault) {
-        // Return a fixed value to keep simulation deterministic even
-        // along misspeculated paths.
-        if (data)
-            bzero(data, size);
+    if (translationCompleted) {
+        if (fault == NoFault) {
+            effAddr = req->getVaddr();
+            effAddrValid = true;
+            fault = cpu->read(req, sreqLow, sreqHigh, data, lqIdx);
+        } else {
+            // Commit will have to clean up whatever happened.  Set this
+            // instruction as executed.
+            this->setExecuted();
+        }
+
+        if (fault != NoFault) {
+            // Return a fixed value to keep simulation deterministic even
+            // along misspeculated paths.
+            if (data)
+                bzero(data, size);
+        }
     }
 
     if (traceData) {
@@ -897,19 +941,26 @@
     }
 
     reqMade = true;
-    Request *req = new Request(asid, addr, size, flags, this->pc.instAddr(),
-                               thread->contextId(), threadNumber);
-
+    Request *req = NULL;
     Request *sreqLow = NULL;
     Request *sreqHigh = NULL;
 
-    // Only split the request if the ISA supports unaligned accesses.
-    if (TheISA::HasUnalignedMemAcc) {
-        splitRequest(req, sreqLow, sreqHigh);
+    if (reqMade && translationStarted) {
+        req = savedReq;
+        sreqLow = savedSreqLow;
+        sreqHigh = savedSreqHigh;
+    } else {
+        req = new Request(asid, addr, size, flags, this->pc.instAddr(),
+                          thread->contextId(), threadNumber);
+
+        // Only split the request if the ISA supports unaligned accesses.
+        if (TheISA::HasUnalignedMemAcc) {
+            splitRequest(req, sreqLow, sreqHigh);
+        }
+        initiateTranslation(req, sreqLow, sreqHigh, res, BaseTLB::Write);
     }
-    initiateTranslation(req, sreqLow, sreqHigh, res, BaseTLB::Write);
 
-    if (fault == NoFault) {
+    if (fault == NoFault && translationCompleted) {
         effAddr = req->getVaddr();
         effAddrValid = true;
         fault = cpu->write(req, sreqLow, sreqHigh, data, sqIdx);
@@ -953,6 +1004,8 @@
                                        RequestPtr sreqHigh, uint64_t *res,
                                        BaseTLB::Mode mode)
 {
+    translationStarted = true;
+
     if (!TheISA::HasUnalignedMemAcc || sreqLow == NULL) {
         WholeTranslationState *state =
             new WholeTranslationState(req, NULL, res, mode);
@@ -961,6 +1014,12 @@
         DataTranslation<BaseDynInst<Impl> > *trans =
             new DataTranslation<BaseDynInst<Impl> >(this, state);
         cpu->dtb->translateTiming(req, thread->getTC(), trans, mode);
+        if (!translationCompleted) {
+            // Save memory requests.
+            savedReq = state->mainReq;
+            savedSreqLow = state->sreqLow;
+            savedSreqHigh = state->sreqHigh;
+        }
     } else {
         WholeTranslationState *state =
             new WholeTranslationState(req, sreqLow, sreqHigh, NULL, res, mode);
@@ -973,6 +1032,12 @@
 
         cpu->dtb->translateTiming(sreqLow, thread->getTC(), stransLow, mode);
         cpu->dtb->translateTiming(sreqHigh, thread->getTC(), stransHigh, mode);
+        if (!translationCompleted) {
+            // Save memory requests.
+            savedReq = state->mainReq;
+            savedSreqLow = state->sreqLow;
+            savedSreqHigh = state->sreqHigh;
+        }
     }
 }
 
@@ -998,6 +1063,8 @@
         state->deleteReqs();
     }
     delete state;
+
+    translationCompleted = true;
 }
 
 #endif // __CPU_BASE_DYN_INST_HH__
diff -r 02f63121a9a1 -r 1daf51f62013 src/cpu/base_dyn_inst_impl.hh
--- a/src/cpu/base_dyn_inst_impl.hh     Fri Feb 11 18:29:35 2011 -0600
+++ b/src/cpu/base_dyn_inst_impl.hh     Fri Feb 11 18:29:35 2011 -0600
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2011 ARM Limited
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2004-2006 The Regents of The University of Michigan
  * All rights reserved.
  *
@@ -107,6 +119,9 @@
     effAddrValid = false;
     physEffAddr = 0;
 
+    translationStarted = false;
+    translationCompleted = false;
+
     isUncacheable = false;
     reqMade = false;
     readyRegs = 0;
diff -r 02f63121a9a1 -r 1daf51f62013 src/cpu/o3/fetch.hh
--- a/src/cpu/o3/fetch.hh       Fri Feb 11 18:29:35 2011 -0600
+++ b/src/cpu/o3/fetch.hh       Fri Feb 11 18:29:35 2011 -0600
@@ -137,6 +137,10 @@
         {}
 
         void
+        markDelayed()
+        {}
+
+        void
         finish(Fault fault, RequestPtr req, ThreadContext *tc,
                BaseTLB::Mode mode)
         {
diff -r 02f63121a9a1 -r 1daf51f62013 src/cpu/o3/iew_impl.hh
--- a/src/cpu/o3/iew_impl.hh    Fri Feb 11 18:29:35 2011 -0600
+++ b/src/cpu/o3/iew_impl.hh    Fri Feb 11 18:29:35 2011 -0600
@@ -1241,12 +1241,33 @@
                 // Loads will mark themselves as executed, and their writeback
                 // event adds the instruction to the queue to commit
                 fault = ldstQueue.executeLoad(inst);
+
+                if (inst->isTranslationDelayed() &&
+                    fault == NoFault) {
+                    // A hw page table walk is currently going on; the
+                    // instruction must be deferred.
+                    DPRINTF(IEW, "Execute: Delayed translation, deferring "
+                            "load.\n");
+                    instQueue.deferMemInst(inst);
+                    continue;
+                }
+
                 if (inst->isDataPrefetch() || inst->isInstPrefetch()) {
                     fault = NoFault;
                 }
             } else if (inst->isStore()) {
                 fault = ldstQueue.executeStore(inst);
 
+                if (inst->isTranslationDelayed() &&
+                    fault == NoFault) {
+                    // A hw page table walk is currently going on; the
+                    // instruction must be deferred.
+                    DPRINTF(IEW, "Execute: Delayed translation, deferring "
+                            "store.\n");
+                    instQueue.deferMemInst(inst);
+                    continue;
+                }
+
                 // If the store had a fault then it may not have a mem req
                 if (fault != NoFault || inst->readPredicate() == false ||
                         !inst->isStoreConditional()) {
diff -r 02f63121a9a1 -r 1daf51f62013 src/cpu/o3/inst_queue.hh
--- a/src/cpu/o3/inst_queue.hh  Fri Feb 11 18:29:35 2011 -0600
+++ b/src/cpu/o3/inst_queue.hh  Fri Feb 11 18:29:35 2011 -0600
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2011 ARM Limited
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev

Reply via email to