changeset b243dc8cec8b in /z/repo/m5
details: http://repo.m5sim.org/m5?cmd=changeset;node=b243dc8cec8b
description:
X86: Don't read in dest regs if all bits are replaced.
In x86, 32 and 64 bit writes to registers in which registers appear to
be 32 or
64 bits wide overwrite all bits of the destination register. This change
removes false dependencies in these cases where the previous value of a
register doesn't need to be read to write a new value. New versions of
most
microops are created that have a "Big" suffix which simply overwrite
their
destination, and the right version to use is selected during microop
allocation based on the selected data size.
This does not change the performance of the O3 CPU model significantly,
I
assume because there are other false dependencies from the condition
code bits
in the flags register.
diffstat:
src/arch/x86/isa/microops/ldstop.isa | 78 ++++++++-
src/arch/x86/isa/microops/limmop.isa | 27 ++-
src/arch/x86/isa/microops/regop.isa | 269 +++++++++++++++++++++++-----------
3 files changed, 266 insertions(+), 108 deletions(-)
diffs (truncated from 765 to 300 lines):
diff -r 0dff1ff293d0 -r b243dc8cec8b src/arch/x86/isa/microops/ldstop.isa
--- a/src/arch/x86/isa/microops/ldstop.isa Sun Feb 13 17:42:56 2011 -0800
+++ b/src/arch/x86/isa/microops/ldstop.isa Sun Feb 13 17:44:24 2011 -0800
@@ -301,6 +301,46 @@
"dataSize" : self.dataSize, "addressSize" : self.addressSize,
"memFlags" : self.memFlags}
return allocator
+
+ class BigLdStOp(X86Microop):
+ def __init__(self, data, segment, addr, disp,
+ dataSize, addressSize, baseFlags, atCPL0, prefetch):
+ self.data = data
+ [self.scale, self.index, self.base] = addr
+ self.disp = disp
+ self.segment = segment
+ self.dataSize = dataSize
+ self.addressSize = addressSize
+ self.memFlags = baseFlags
+ if atCPL0:
+ self.memFlags += " | (CPL0FlagBit << FlagShift)"
+ if prefetch:
+ self.memFlags += " | Request::PREFETCH"
+ self.memFlags += " | (machInst.legacy.addr ? " + \
+ "(AddrSizeFlagBit << FlagShift) : 0)"
+
+ def getAllocator(self, microFlags):
+ allocString = '''
+ (%(dataSize)s >= 4) ?
+ (StaticInstPtr)(new %(class_name)sBig(machInst,
+ macrocodeBlock, %(flags)s, %(scale)s, %(index)s,
+ %(base)s, %(disp)s, %(segment)s, %(data)s,
+ %(dataSize)s, %(addressSize)s, %(memFlags)s)) :
+ (StaticInstPtr)(new %(class_name)s(machInst,
+ macrocodeBlock, %(flags)s, %(scale)s, %(index)s,
+ %(base)s, %(disp)s, %(segment)s, %(data)s,
+ %(dataSize)s, %(addressSize)s, %(memFlags)s))
+ '''
+ allocator = allocString % {
+ "class_name" : self.className,
+ "flags" : self.microFlagsText(microFlags),
+ "scale" : self.scale, "index" : self.index,
+ "base" : self.base,
+ "disp" : self.disp,
+ "segment" : self.segment, "data" : self.data,
+ "dataSize" : self.dataSize, "addressSize" : self.addressSize,
+ "memFlags" : self.memFlags}
+ return allocator
}};
let {{
@@ -315,7 +355,8 @@
EA = bits(SegBase + scale * Index + Base + disp, addressSize * 8 - 1, 0);
'''
- def defineMicroLoadOp(mnemonic, code, mem_flags="0"):
+ def defineMicroLoadOp(mnemonic, code, bigCode='',
+ mem_flags="0", big=True):
global header_output
global decoder_output
global exec_output
@@ -324,16 +365,22 @@
name = mnemonic.lower()
# Build up the all register version of this micro op
- iop = InstObjParams(name, Name, 'X86ISA::LdStOp',
- {"code": code,
- "ea_code": calculateEA})
- header_output += MicroLdStOpDeclare.subst(iop)
- decoder_output += MicroLdStOpConstructor.subst(iop)
- exec_output += MicroLoadExecute.subst(iop)
- exec_output += MicroLoadInitiateAcc.subst(iop)
- exec_output += MicroLoadCompleteAcc.subst(iop)
+ iops = [InstObjParams(name, Name, 'X86ISA::LdStOp',
+ {"code": code, "ea_code": calculateEA})]
+ if big:
+ iops += [InstObjParams(name, Name + "Big", 'X86ISA::LdStOp',
+ {"code": bigCode, "ea_code": calculateEA})]
+ for iop in iops:
+ header_output += MicroLdStOpDeclare.subst(iop)
+ decoder_output += MicroLdStOpConstructor.subst(iop)
+ exec_output += MicroLoadExecute.subst(iop)
+ exec_output += MicroLoadInitiateAcc.subst(iop)
+ exec_output += MicroLoadCompleteAcc.subst(iop)
- class LoadOp(LdStOp):
+ base = LdStOp
+ if big:
+ base = BigLdStOp
+ class LoadOp(base):
def __init__(self, data, segment, addr, disp = 0,
dataSize="env.dataSize",
addressSize="env.addressSize",
@@ -346,12 +393,15 @@
microopClasses[name] = LoadOp
- defineMicroLoadOp('Ld', 'Data = merge(Data, Mem, dataSize);')
+ defineMicroLoadOp('Ld', 'Data = merge(Data, Mem, dataSize);',
+ 'Data = Mem & mask(dataSize * 8);')
defineMicroLoadOp('Ldst', 'Data = merge(Data, Mem, dataSize);',
- '(StoreCheck << FlagShift)')
+ 'Data = Mem & mask(dataSize * 8);',
+ '(StoreCheck << FlagShift)')
defineMicroLoadOp('Ldstl', 'Data = merge(Data, Mem, dataSize);',
- '(StoreCheck << FlagShift) | Request::LOCKED')
- defineMicroLoadOp('Ldfp', 'FpData.uqw = Mem;')
+ 'Data = Mem & mask(dataSize * 8);',
+ '(StoreCheck << FlagShift) | Request::LOCKED')
+ defineMicroLoadOp('Ldfp', 'FpData.uqw = Mem;', big = False)
def defineMicroStoreOp(mnemonic, code, \
postCode="", completeCode="", mem_flags="0"):
diff -r 0dff1ff293d0 -r b243dc8cec8b src/arch/x86/isa/microops/limmop.isa
--- a/src/arch/x86/isa/microops/limmop.isa Sun Feb 13 17:42:56 2011 -0800
+++ b/src/arch/x86/isa/microops/limmop.isa Sun Feb 13 17:44:24 2011 -0800
@@ -114,8 +114,16 @@
self.dataSize = dataSize
def getAllocator(self, microFlags):
- allocator = '''new %(class_name)s(machInst, macrocodeBlock,
- %(flags)s, %(dest)s, %(imm)s, %(dataSize)s)''' % {
+ allocString = '''
+ (%(dataSize)s >= 4) ?
+ (StaticInstPtr)(new %(class_name)sBig(machInst,
+ macrocodeBlock, %(flags)s, %(dest)s, %(imm)s,
+ %(dataSize)s)) :
+ (StaticInstPtr)(new %(class_name)s(machInst,
+ macrocodeBlock, %(flags)s, %(dest)s, %(imm)s,
+ %(dataSize)s))
+ '''
+ allocator = allocString % {
"class_name" : self.className,
"mnemonic" : self.mnemonic,
"flags" : self.microFlagsText(microFlags),
@@ -152,12 +160,15 @@
let {{
# Build up the all register version of this micro op
- iop = InstObjParams("limm", "Limm", 'X86MicroopBase',
- {"code" : "DestReg = merge(DestReg, imm, dataSize);"})
- header_output += MicroLimmOpDeclare.subst(iop)
- decoder_output += MicroLimmOpConstructor.subst(iop)
- decoder_output += MicroLimmOpDisassembly.subst(iop)
- exec_output += MicroLimmOpExecute.subst(iop)
+ iops = [InstObjParams("limm", "Limm", 'X86MicroopBase',
+ {"code" : "DestReg = merge(DestReg, imm, dataSize);"}),
+ InstObjParams("limm", "LimmBig", 'X86MicroopBase',
+ {"code" : "DestReg = imm & mask(dataSize * 8);"})]
+ for iop in iops:
+ header_output += MicroLimmOpDeclare.subst(iop)
+ decoder_output += MicroLimmOpConstructor.subst(iop)
+ decoder_output += MicroLimmOpDisassembly.subst(iop)
+ exec_output += MicroLimmOpExecute.subst(iop)
iop = InstObjParams("lfpimm", "Lfpimm", 'X86MicroopBase',
{"code" : "FpDestReg.uqw = imm"})
diff -r 0dff1ff293d0 -r b243dc8cec8b src/arch/x86/isa/microops/regop.isa
--- a/src/arch/x86/isa/microops/regop.isa Sun Feb 13 17:42:56 2011 -0800
+++ b/src/arch/x86/isa/microops/regop.isa Sun Feb 13 17:44:24 2011 -0800
@@ -224,8 +224,8 @@
MicroRegOpExecute)
class RegOpMeta(type):
- def buildCppClasses(self, name, Name, suffix, \
- code, flag_code, cond_check, else_code,
cond_control_flag_init):
+ def buildCppClasses(self, name, Name, suffix, code, big_code, \
+ flag_code, cond_check, else_code, cond_control_flag_init):
# Globals to stick the output in
global header_output
@@ -235,11 +235,13 @@
# Stick all the code together so it can be searched at once
allCode = "|".join((code, flag_code, cond_check, else_code,
cond_control_flag_init))
+ allBigCode = "|".join((big_code, flag_code, cond_check, else_code,
+ cond_control_flag_init))
# If op2 is used anywhere, make register and immediate versions
# of this code.
matcher =
re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?")
- match = matcher.search(allCode)
+ match = matcher.search(allCode + allBigCode)
if match:
typeQual = ""
if match.group("typeQual"):
@@ -247,6 +249,7 @@
src2_name = "%spsrc2%s" % (match.group("prefix"), typeQual)
self.buildCppClasses(name, Name, suffix,
matcher.sub(src2_name, code),
+ matcher.sub(src2_name, big_code),
matcher.sub(src2_name, flag_code),
matcher.sub(src2_name, cond_check),
matcher.sub(src2_name, else_code),
@@ -254,6 +257,7 @@
imm_name = "%simm8" % match.group("prefix")
self.buildCppClasses(name + "i", Name, suffix + "Imm",
matcher.sub(imm_name, code),
+ matcher.sub(imm_name, big_code),
matcher.sub(imm_name, flag_code),
matcher.sub(imm_name, cond_check),
matcher.sub(imm_name, else_code),
@@ -264,27 +268,32 @@
# a version without it and fix up this version to use it.
if flag_code != "" or cond_check != "true":
self.buildCppClasses(name, Name, suffix,
- code, "", "true", else_code, "")
+ code, big_code, "", "true", else_code, "")
suffix = "Flags" + suffix
# If psrc1 or psrc2 is used, we need to actually insert code to
# compute it.
- matcher = re.compile("(?<!\w)psrc1(?!\w)")
- if matcher.search(allCode):
- code = "uint64_t psrc1 = pick(SrcReg1, 0, dataSize);" + code
- matcher = re.compile("(?<!\w)psrc2(?!\w)")
- if matcher.search(allCode):
- code = "uint64_t psrc2 = pick(SrcReg2, 1, dataSize);" + code
- # Also make available versions which do sign extension
- matcher = re.compile("(?<!\w)spsrc1(?!\w)")
- if matcher.search(allCode):
- code = "int64_t spsrc1 = signedPick(SrcReg1, 0, dataSize);" +
code
- matcher = re.compile("(?<!\w)spsrc2(?!\w)")
- if matcher.search(allCode):
- code = "int64_t spsrc2 = signedPick(SrcReg2, 1, dataSize);" +
code
- matcher = re.compile("(?<!\w)simm8(?!\w)")
- if matcher.search(allCode):
- code = "int8_t simm8 = imm8;" + code
+ for (big, all) in ((False, allCode), (True, allBigCode)):
+ prefix = ""
+ for (rex, decl) in (
+ ("(?<!\w)psrc1(?!\w)",
+ "uint64_t psrc1 = pick(SrcReg1, 0, dataSize);"),
+ ("(?<!\w)psrc2(?!\w)",
+ "uint64_t psrc2 = pick(SrcReg2, 1, dataSize);"),
+ ("(?<!\w)spsrc1(?!\w)",
+ "int64_t spsrc1 = signedPick(SrcReg1, 0, dataSize);"),
+ ("(?<!\w)spsrc2(?!\w)",
+ "int64_t spsrc2 = signedPick(SrcReg2, 1, dataSize);"),
+ ("(?<!\w)simm8(?!\w)",
+ "int8_t simm8 = imm8;")):
+ matcher = re.compile(rex)
+ if matcher.search(all):
+ prefix += decl + "\n"
+ if big:
+ if big_code != "":
+ big_code = prefix + big_code
+ else:
+ code = prefix + code
base = "X86ISA::RegOp"
@@ -297,17 +306,26 @@
templates = immTemplates
# Get everything ready for the substitution
- iop = InstObjParams(name, Name + suffix, base,
+ iops = [InstObjParams(name, Name + suffix, base,
{"code" : code,
"flag_code" : flag_code,
"cond_check" : cond_check,
"else_code" : else_code,
- "cond_control_flag_init": cond_control_flag_init})
+ "cond_control_flag_init" : cond_control_flag_init})]
+ if big_code != "":
+ iops += [InstObjParams(name, Name + suffix + "Big", base,
+ {"code" : big_code,
+ "flag_code" : flag_code,
+ "cond_check" : cond_check,
+ "else_code" : else_code,
+ "cond_control_flag_init" :
+ cond_control_flag_init})]
# Generate the actual code (finally!)
- header_output += templates[0].subst(iop)
- decoder_output += templates[1].subst(iop)
- exec_output += templates[2].subst(iop)
+ for iop in iops:
+ header_output += templates[0].subst(iop)
+ decoder_output += templates[1].subst(iop)
+ exec_output += templates[2].subst(iop)
def __new__(mcls, Name, bases, dict):
@@ -322,14 +340,16 @@
cls.className = Name
cls.base_mnemonic = name
code = cls.code
+ big_code = cls.big_code
flag_code = cls.flag_code
cond_check = cls.cond_check
else_code = cls.else_code
cond_control_flag_init = cls.cond_control_flag_init
# Set up the C++ classes
- mcls.buildCppClasses(cls, name, Name, "", code, flag_code,
- cond_check, else_code, cond_control_flag_init)
+ mcls.buildCppClasses(cls, name, Name, "", code, big_code,
+ flag_code, cond_check, else_code,
+ cond_control_flag_init)
_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev