Hi Lisa, I'll have a look into it, yes. I did run all the regressions before submitting so there shouldn't have been any problems, but I'll check the results.
Tim On Fri, 19 Feb 2010 16:35:34 -0500, Lisa Hsu <[email protected]> wrote: > Timothy, > > Did you check this against ALPHA_FS/test/fast/long/10.linux-boot tests > under > the O3 configurations? I believe this changeset may have broken them. > I am > getting ready to make a push, and I passed ALPHA_FS linux boot regression > tests a few days ago, but since pulling the last 6 changesets, I've begun > failing on this test, and this O3 change seems to be the most obvious > breaker. What happens is that it seems like no forward progress is made > in > the program and it just gets hung and runs forever. This happens on a > clean > tree after having removed my patches that I want to push. Can you look > into > it? > > Thanks, > Lisa > > On Fri, Feb 12, 2010 at 12:52 PM, Timothy M. Jones > <[email protected]>wrote: > >> changeset 4d4903a3e7c5 in /z/repo/m5 >> details: http://repo.m5sim.org/m5?cmd=changeset;node=4d4903a3e7c5 >> description: >> O3PCU: Split loads and stores that cross cache line boundaries. >> >> When each load or store is sent to the LSQ, we check whether it >> will >> cross a >> cache line boundary and, if so, split it in two. This creates two >> TLB >> translations and two memory requests. Care has to be taken if the >> first >> packet of a split load is sent but the second blocks the cache. >> Similarly, >> for a store, if the first packet cannot be sent, we must store >> the >> second >> one somewhere to retry later. >> >> This modifies the LSQSenderState class to record both packets in >> a >> split >> load or store. >> >> Finally, a new const variable, HasUnalignedMemAcc, is added to >> each >> ISA >> to indicate whether unaligned memory accesses are allowed. This >> is >> used >> throughout the changed code so that compiler can optimise away >> code >> dealing >> with split requests for ISAs that don't need them. >> >> diffstat: >> >> 11 files changed, 378 insertions(+), 53 deletions(-) >> src/arch/alpha/isa_traits.hh | 3 >> src/arch/arm/isa_traits.hh | 3 >> src/arch/mips/isa_traits.hh | 3 >> src/arch/power/isa_traits.hh | 3 >> src/arch/sparc/isa_traits.hh | 3 >> src/arch/x86/isa_traits.hh | 3 >> src/cpu/base_dyn_inst.hh | 75 ++++++++++++++++--- >> src/cpu/o3/cpu.hh | 15 ++- >> src/cpu/o3/lsq.hh | 22 +++-- >> src/cpu/o3/lsq_unit.hh | 138 +++++++++++++++++++++++++++++++---- >> src/cpu/o3/lsq_unit_impl.hh | 163 >> ++++++++++++++++++++++++++++++++++++++---- >> >> diffs (truncated from 831 to 300 lines): >> >> diff -r a123bd350935 -r 4d4903a3e7c5 src/arch/alpha/isa_traits.hh >> --- a/src/arch/alpha/isa_traits.hh Fri Feb 12 19:53:19 2010 +0000 >> +++ b/src/arch/alpha/isa_traits.hh Fri Feb 12 19:53:20 2010 +0000 >> @@ -131,6 +131,9 @@ >> // Alpha UNOP (ldq_u r31,0(r0)) >> const ExtMachInst NoopMachInst = 0x2ffe0000; >> >> +// Memory accesses cannot be unaligned >> +const bool HasUnalignedMemAcc = false; >> + >> } // namespace AlphaISA >> >> #endif // __ARCH_ALPHA_ISA_TRAITS_HH__ >> diff -r a123bd350935 -r 4d4903a3e7c5 src/arch/arm/isa_traits.hh >> --- a/src/arch/arm/isa_traits.hh Fri Feb 12 19:53:19 2010 +0000 >> +++ b/src/arch/arm/isa_traits.hh Fri Feb 12 19:53:20 2010 +0000 >> @@ -106,6 +106,9 @@ >> const int ByteBytes = 1; >> >> const uint32_t HighVecs = 0xFFFF0000; >> + >> + // Memory accesses cannot be unaligned >> + const bool HasUnalignedMemAcc = false; >> }; >> >> using namespace ArmISA; >> diff -r a123bd350935 -r 4d4903a3e7c5 src/arch/mips/isa_traits.hh >> --- a/src/arch/mips/isa_traits.hh Fri Feb 12 19:53:19 2010 +0000 >> +++ b/src/arch/mips/isa_traits.hh Fri Feb 12 19:53:20 2010 +0000 >> @@ -164,6 +164,9 @@ >> const int ANNOTE_NONE = 0; >> const uint32_t ITOUCH_ANNOTE = 0xffffffff; >> >> +// Memory accesses cannot be unaligned >> +const bool HasUnalignedMemAcc = false; >> + >> }; >> >> #endif // __ARCH_MIPS_ISA_TRAITS_HH__ >> diff -r a123bd350935 -r 4d4903a3e7c5 src/arch/power/isa_traits.hh >> --- a/src/arch/power/isa_traits.hh Fri Feb 12 19:53:19 2010 +0000 >> +++ b/src/arch/power/isa_traits.hh Fri Feb 12 19:53:20 2010 +0000 >> @@ -70,6 +70,9 @@ >> // This is ori 0, 0, 0 >> const ExtMachInst NoopMachInst = 0x60000000; >> >> +// Memory accesses can be unaligned >> +const bool HasUnalignedMemAcc = true; >> + >> } // PowerISA namespace >> >> #endif // __ARCH_POWER_ISA_TRAITS_HH__ >> diff -r a123bd350935 -r 4d4903a3e7c5 src/arch/sparc/isa_traits.hh >> --- a/src/arch/sparc/isa_traits.hh Fri Feb 12 19:53:19 2010 +0000 >> +++ b/src/arch/sparc/isa_traits.hh Fri Feb 12 19:53:20 2010 +0000 >> @@ -98,6 +98,9 @@ >> }; >> >> #endif >> + >> +// Memory accesses cannot be unaligned >> +const bool HasUnalignedMemAcc = false; >> } >> >> #endif // __ARCH_SPARC_ISA_TRAITS_HH__ >> diff -r a123bd350935 -r 4d4903a3e7c5 src/arch/x86/isa_traits.hh >> --- a/src/arch/x86/isa_traits.hh Fri Feb 12 19:53:19 2010 +0000 >> +++ b/src/arch/x86/isa_traits.hh Fri Feb 12 19:53:20 2010 +0000 >> @@ -91,6 +91,9 @@ >> StaticInstPtr decodeInst(ExtMachInst); >> >> const Addr LoadAddrMask = ULL(-1); >> + >> + // Memory accesses can be unaligned >> + const bool HasUnalignedMemAcc = true; >> }; >> >> #endif // __ARCH_X86_ISATRAITS_HH__ >> diff -r a123bd350935 -r 4d4903a3e7c5 src/cpu/base_dyn_inst.hh >> --- a/src/cpu/base_dyn_inst.hh Fri Feb 12 19:53:19 2010 +0000 >> +++ b/src/cpu/base_dyn_inst.hh Fri Feb 12 19:53:20 2010 +0000 >> @@ -131,8 +131,13 @@ >> template <class T> >> Fault write(T data, Addr addr, unsigned flags, uint64_t *res); >> >> + /** Splits a request in two if it crosses a dcache block. */ >> + void splitRequest(RequestPtr req, RequestPtr &sreqLow, >> + RequestPtr &sreqHigh); >> + >> /** Initiate a DTB address translation. */ >> - void initiateTranslation(RequestPtr req, uint64_t *res, >> + void initiateTranslation(RequestPtr req, RequestPtr sreqLow, >> + RequestPtr sreqHigh, uint64_t *res, >> BaseTLB::Mode mode); >> >> /** Finish a DTB address translation. */ >> @@ -870,12 +875,19 @@ >> Request *req = new Request(asid, addr, sizeof(T), flags, this->PC, >> thread->contextId(), threadNumber); >> >> - initiateTranslation(req, NULL, BaseTLB::Read); >> + Request *sreqLow = NULL; >> + Request *sreqHigh = NULL; >> + >> + // Only split the request if the ISA supports unaligned accesses. >> + if (TheISA::HasUnalignedMemAcc) { >> + splitRequest(req, sreqLow, sreqHigh); >> + } >> + initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read); >> >> if (fault == NoFault) { >> effAddr = req->getVaddr(); >> effAddrValid = true; >> - cpu->read(req, data, lqIdx); >> + cpu->read(req, sreqLow, sreqHigh, data, lqIdx); >> } else { >> >> // Return a fixed value to keep simulation deterministic even >> @@ -909,12 +921,19 @@ >> Request *req = new Request(asid, addr, sizeof(T), flags, this->PC, >> thread->contextId(), threadNumber); >> >> - initiateTranslation(req, res, BaseTLB::Write); >> + Request *sreqLow = NULL; >> + Request *sreqHigh = NULL; >> + >> + // Only split the request if the ISA supports unaligned accesses. >> + if (TheISA::HasUnalignedMemAcc) { >> + splitRequest(req, sreqLow, sreqHigh); >> + } >> + initiateTranslation(req, sreqLow, sreqHigh, res, BaseTLB::Write); >> >> if (fault == NoFault) { >> effAddr = req->getVaddr(); >> effAddrValid = true; >> - cpu->write(req, data, sqIdx); >> + cpu->write(req, sreqLow, sreqHigh, data, sqIdx); >> } >> >> return fault; >> @@ -922,14 +941,48 @@ >> >> template<class Impl> >> inline void >> -BaseDynInst<Impl>::initiateTranslation(RequestPtr req, uint64_t *res, >> +BaseDynInst<Impl>::splitRequest(RequestPtr req, RequestPtr &sreqLow, >> + RequestPtr &sreqHigh) >> +{ >> + // Check to see if the request crosses the next level block >> boundary. >> + unsigned block_size = cpu->getDcachePort()->peerBlockSize(); >> + Addr addr = req->getVaddr(); >> + Addr split_addr = roundDown(addr + req->getSize() - 1, block_size); >> + assert(split_addr <= addr || split_addr - addr < block_size); >> + >> + // Spans two blocks. >> + if (split_addr > addr) { >> + req->splitOnVaddr(split_addr, sreqLow, sreqHigh); >> + } >> +} >> + >> +template<class Impl> >> +inline void >> +BaseDynInst<Impl>::initiateTranslation(RequestPtr req, RequestPtr >> sreqLow, >> + RequestPtr sreqHigh, uint64_t >> *res, >> BaseTLB::Mode mode) >> { >> - WholeTranslationState *state = >> - new WholeTranslationState(req, NULL, res, mode); >> - DataTranslation<BaseDynInst<Impl> > *trans = >> - new DataTranslation<BaseDynInst<Impl> >(this, state); >> - cpu->dtb->translateTiming(req, thread->getTC(), trans, mode); >> + if (!TheISA::HasUnalignedMemAcc || sreqLow == NULL) { >> + WholeTranslationState *state = >> + new WholeTranslationState(req, NULL, res, mode); >> + >> + // One translation if the request isn't split. >> + DataTranslation<BaseDynInst<Impl> > *trans = >> + new DataTranslation<BaseDynInst<Impl> >(this, state); >> + cpu->dtb->translateTiming(req, thread->getTC(), trans, mode); >> + } else { >> + WholeTranslationState *state = >> + new WholeTranslationState(req, sreqLow, sreqHigh, NULL, >> res, >> mode); >> + >> + // Two translations when the request is split. >> + DataTranslation<BaseDynInst<Impl> > *stransLow = >> + new DataTranslation<BaseDynInst<Impl> >(this, state, 0); >> + DataTranslation<BaseDynInst<Impl> > *stransHigh = >> + new DataTranslation<BaseDynInst<Impl> >(this, state, 1); >> + >> + cpu->dtb->translateTiming(sreqLow, thread->getTC(), stransLow, >> mode); >> + cpu->dtb->translateTiming(sreqHigh, thread->getTC(), >> stransHigh, >> mode); >> + } >> } >> >> template<class Impl> >> diff -r a123bd350935 -r 4d4903a3e7c5 src/cpu/o3/cpu.hh >> --- a/src/cpu/o3/cpu.hh Fri Feb 12 19:53:19 2010 +0000 >> +++ b/src/cpu/o3/cpu.hh Fri Feb 12 19:53:20 2010 +0000 >> @@ -703,18 +703,25 @@ >> >> /** CPU read function, forwards read to LSQ. */ >> template <class T> >> - Fault read(RequestPtr &req, T &data, int load_idx) >> + Fault read(RequestPtr &req, RequestPtr &sreqLow, RequestPtr >> &sreqHigh, >> + T &data, int load_idx) >> { >> - return this->iew.ldstQueue.read(req, data, load_idx); >> + return this->iew.ldstQueue.read(req, sreqLow, sreqHigh, >> + data, load_idx); >> } >> >> /** CPU write function, forwards write to LSQ. */ >> template <class T> >> - Fault write(RequestPtr &req, T &data, int store_idx) >> + Fault write(RequestPtr &req, RequestPtr &sreqLow, RequestPtr >> &sreqHigh, >> + T &data, int store_idx) >> { >> - return this->iew.ldstQueue.write(req, data, store_idx); >> + return this->iew.ldstQueue.write(req, sreqLow, sreqHigh, >> + data, store_idx); >> } >> >> + /** Get the dcache port (used to find block size for >> translations). */ >> + Port *getDcachePort() { return >> this->iew.ldstQueue.getDcachePort(); } >> + >> Addr lockAddr; >> >> /** Temporary fix for the lock flag, works in the UP case. */ >> diff -r a123bd350935 -r 4d4903a3e7c5 src/cpu/o3/lsq.hh >> --- a/src/cpu/o3/lsq.hh Fri Feb 12 19:53:19 2010 +0000 >> +++ b/src/cpu/o3/lsq.hh Fri Feb 12 19:53:20 2010 +0000 >> @@ -270,15 +270,19 @@ >> void dumpInsts(ThreadID tid) >> { thread[tid].dumpInsts(); } >> >> - /** Executes a read operation, using the load specified at the load >> index. */ >> + /** Executes a read operation, using the load specified at the load >> + * index. >> + */ >> template <class T> >> - Fault read(RequestPtr req, T &data, int load_idx); >> + Fault read(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh, >> + T &data, int load_idx); >> >> /** Executes a store operation, using the store specified at the >> store >> - * index. >> + * index. >> */ >> template <class T> >> - Fault write(RequestPtr req, T &data, int store_idx); >> + Fault write(RequestPtr req, RequestPtr sreqLow, RequestPtr >> sreqHigh, >> + T &data, int store_idx); >> >> /** The CPU pointer. */ >> O3CPU *cpu; >> @@ -369,21 +373,23 @@ >> template <class Impl> >> template <class T> >> Fault >> -LSQ<Impl>::read(RequestPtr req, T &data, int load_idx) >> +LSQ<Impl>::read(RequestPtr req, RequestPtr sreqLow, RequestPtr >> sreqHigh, >> + T &data, int load_idx) >> { >> ThreadID tid = req->threadId(); >> >> - return thread[tid].read(req, data, load_idx); >> + return thread[tid].read(req, sreqLow, sreqHigh, data, load_idx); >> } >> >> template <class Impl> >> template <class T> >> Fault >> -LSQ<Impl>::write(RequestPtr req, T &data, int store_idx) >> +LSQ<Impl>::write(RequestPtr req, RequestPtr sreqLow, RequestPtr >> sreqHigh, >> + T &data, int store_idx) >> { >> ThreadID tid = req->threadId(); >> >> - return thread[tid].write(req, data, store_idx); >> + return thread[tid].write(req, sreqLow, sreqHigh, data, store_idx); >> } >> >> #endif // __CPU_O3_LSQ_HH__ >> diff -r a123bd350935 -r 4d4903a3e7c5 src/cpu/o3/lsq_unit.hh >> --- a/src/cpu/o3/lsq_unit.hh Fri Feb 12 19:53:19 2010 +0000 >> +++ b/src/cpu/o3/lsq_unit.hh Fri Feb 12 19:53:20 2010 +0000 >> @@ -216,12 +216,18 @@ >> /** Writes back the instruction, sending it to IEW. */ >> void writeback(DynInstPtr &inst, PacketPtr pkt); >> >> + /** Writes back a store that couldn't be completed the previous >> cycle. >> */ >> + void writebackPendingStore(); >> + >> /** Handles completing the send of a store to memory. */ >> void storePostSend(PacketPtr pkt); >> >> /** Completes the store at the specified index. */ >> void completeStore(int store_idx); >> >> + /** Attempts to send a store to the cache. */ >> _______________________________________________ >> m5-dev mailing list >> [email protected] >> http://m5sim.org/mailman/listinfo/m5-dev >> >> -- The University of Edinburgh is a charitable body, registered in Scotland, with registration number SC005336. _______________________________________________ m5-dev mailing list [email protected] http://m5sim.org/mailman/listinfo/m5-dev
