Jason Lowe-Power has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/27972 )

Change subject: arch-x86,cpu: Fix bpred by annotating branch instructions in x86
......................................................................

arch-x86,cpu: Fix bpred by annotating branch instructions in x86

Original Creator: Adria Armejach.

Branch instructions needed to be annotated in x86 as direct/indirect and conditional/unconditional. These annotations where not present causing the branch predictor to misbehave, not using the BTB. In addition, logic to determine the real branch target at decode needed to be added as it was also missing.

Change-Id: I91e707452c1825b9bb4ae75c3f599da489ae5b9a
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/27972
Reviewed-by: Gabe Black <gabebl...@google.com>
Maintainer: Gabe Black <gabebl...@google.com>
Tested-by: kokoro <noreply+kok...@google.com>
---
M src/arch/x86/isa/insts/general_purpose/control_transfer/call.py
M src/arch/x86/isa/insts/general_purpose/control_transfer/conditional_jump.py
M src/arch/x86/isa/insts/general_purpose/control_transfer/jump.py
M src/arch/x86/isa/insts/general_purpose/control_transfer/loop.py
M src/arch/x86/isa/insts/general_purpose/control_transfer/xreturn.py
M src/arch/x86/isa/macroop.isa
M src/arch/x86/isa/microops/regop.isa
M src/arch/x86/isa/microops/seqop.isa
M src/cpu/o3/decode_impl.hh
9 files changed, 113 insertions(+), 6 deletions(-)

Approvals:
  Gabe Black: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass



diff --git a/src/arch/x86/isa/insts/general_purpose/control_transfer/call.py b/src/arch/x86/isa/insts/general_purpose/control_transfer/call.py
index c58152c..edc0007 100644
--- a/src/arch/x86/isa/insts/general_purpose/control_transfer/call.py
+++ b/src/arch/x86/isa/insts/general_purpose/control_transfer/call.py
@@ -41,6 +41,7 @@
     # Make the default data size of calls 64 bits in 64 bit mode
     .adjust_env oszIn64Override
     .function_call
+    .control_direct

     limm t1, imm
     rdip t7
@@ -55,6 +56,7 @@
     # Make the default data size of calls 64 bits in 64 bit mode
     .adjust_env oszIn64Override
     .function_call
+    .control_indirect

     rdip t1
     # Check target of call
@@ -68,6 +70,7 @@
     # Make the default data size of calls 64 bits in 64 bit mode
     .adjust_env oszIn64Override
     .function_call
+    .control_indirect

     rdip t7
     ld t1, seg, sib, disp
@@ -82,6 +85,7 @@
     # Make the default data size of calls 64 bits in 64 bit mode
     .adjust_env oszIn64Override
     .function_call
+    .control_indirect

     rdip t7
     ld t1, seg, riprel, disp
diff --git a/src/arch/x86/isa/insts/general_purpose/control_transfer/conditional_jump.py b/src/arch/x86/isa/insts/general_purpose/control_transfer/conditional_jump.py
index 87b2e6a..f92935d 100644
--- a/src/arch/x86/isa/insts/general_purpose/control_transfer/conditional_jump.py +++ b/src/arch/x86/isa/insts/general_purpose/control_transfer/conditional_jump.py
@@ -40,6 +40,7 @@
 {
     # Make the defualt data size of jumps 64 bits in 64 bit mode
     .adjust_env oszIn64Override
+    .control_direct

     rdip t1
     limm t2, imm
@@ -50,6 +51,7 @@
 {
     # Make the defualt data size of jumps 64 bits in 64 bit mode
     .adjust_env oszIn64Override
+    .control_direct

     rdip t1
     limm t2, imm
@@ -60,6 +62,7 @@
 {
     # Make the default data size of jumps 64 bits in 64 bit mode
     .adjust_env oszIn64Override
+    .control_direct

     rdip t1
     limm t2, imm
@@ -70,6 +73,7 @@
 {
     # Make the default data size of jumps 64 bits in 64 bit mode
     .adjust_env oszIn64Override
+    .control_direct

     rdip t1
     limm t2, imm
@@ -80,6 +84,7 @@
 {
     # Make the default data size of jumps 64 bits in 64 bit mode
     .adjust_env oszIn64Override
+    .control_direct

     rdip t1
     limm t2, imm
@@ -90,6 +95,7 @@
 {
     # Make the default data size of jumps 64 bits in 64 bit mode
     .adjust_env oszIn64Override
+    .control_direct

     rdip t1
     limm t2, imm
@@ -100,6 +106,7 @@
 {
     # Make the default data size of jumps 64 bits in 64 bit mode
     .adjust_env oszIn64Override
+    .control_direct

     rdip t1
     limm t2, imm
@@ -110,6 +117,7 @@
 {
     # Make the default data size of jumps 64 bits in 64 bit mode
     .adjust_env oszIn64Override
+    .control_direct

     rdip t1
     limm t2, imm
@@ -120,6 +128,7 @@
 {
     # Make the default data size of jumps 64 bits in 64 bit mode
     .adjust_env oszIn64Override
+    .control_direct

     rdip t1
     limm t2, imm
@@ -130,6 +139,7 @@
 {
     # Make the default data size of jumps 64 bits in 64 bit mode
     .adjust_env oszIn64Override
+    .control_direct

     rdip t1
     limm t2, imm
@@ -140,6 +150,7 @@
 {
     # Make the default data size of jumps 64 bits in 64 bit mode
     .adjust_env oszIn64Override
+    .control_direct

     rdip t1
     limm t2, imm
@@ -150,6 +161,7 @@
 {
     # Make the default data size of jumps 64 bits in 64 bit mode
     .adjust_env oszIn64Override
+    .control_direct

     rdip t1
     limm t2, imm
@@ -160,6 +172,7 @@
 {
     # Make the default data size of jumps 64 bits in 64 bit mode
     .adjust_env oszIn64Override
+    .control_direct

     rdip t1
     limm t2, imm
@@ -170,6 +183,7 @@
 {
     # Make the default data size of jumps 64 bits in 64 bit mode
     .adjust_env oszIn64Override
+    .control_direct

     rdip t1
     limm t2, imm
@@ -180,6 +194,7 @@
 {
     # Make the default data size of jumps 64 bits in 64 bit mode
     .adjust_env oszIn64Override
+    .control_direct

     rdip t1
     limm t2, imm
@@ -190,6 +205,7 @@
 {
     # Make the default data size of jumps 64 bits in 64 bit mode
     .adjust_env oszIn64Override
+    .control_direct

     rdip t1
     limm t2, imm
@@ -198,6 +214,8 @@

 def macroop JRCX_I
 {
+    .control_direct
+
     rdip t1
     add t0, t0, rcx, flags=(EZF,), dataSize=asz
     wripi t1, imm, flags=(CEZF,)
diff --git a/src/arch/x86/isa/insts/general_purpose/control_transfer/jump.py b/src/arch/x86/isa/insts/general_purpose/control_transfer/jump.py
index 6f419ce..4d76395 100644
--- a/src/arch/x86/isa/insts/general_purpose/control_transfer/jump.py
+++ b/src/arch/x86/isa/insts/general_purpose/control_transfer/jump.py
@@ -41,6 +41,7 @@
 {
     # Make the default data size of jumps 64 bits in 64 bit mode
     .adjust_env oszIn64Override
+    .control_direct

     rdip t1
     limm t2, imm
@@ -51,6 +52,7 @@
 {
     # Make the default data size of jumps 64 bits in 64 bit mode
     .adjust_env oszIn64Override
+    .control_indirect

     wripi reg, 0
 };
@@ -59,6 +61,7 @@
 {
     # Make the default data size of jumps 64 bits in 64 bit mode
     .adjust_env oszIn64Override
+    .control_indirect

     ld t1, seg, sib, disp
     wripi t1, 0
@@ -68,6 +71,7 @@
 {
     # Make the default data size of jumps 64 bits in 64 bit mode
     .adjust_env oszIn64Override
+    .control_indirect

     rdip t7
     ld t1, seg, riprel, disp
@@ -140,6 +144,8 @@

 def macroop JMP_FAR_REAL_M
 {
+    .control_indirect
+
     lea t1, seg, sib, disp, dataSize=asz
     ld t2, seg, [1, t0, t1], dsz
     ld t1, seg, [1, t0, t1]
@@ -152,11 +158,14 @@

 def macroop JMP_FAR_REAL_P
 {
+    .control_indirect
     panic "Real mode far jump executed in 64 bit mode!"
 };

 def macroop JMP_FAR_REAL_I
 {
+    .control_indirect
+
     # Put the whole far pointer into a register.
     limm t2, imm, dataSize=8
     # Figure out the width of the offset.
diff --git a/src/arch/x86/isa/insts/general_purpose/control_transfer/loop.py b/src/arch/x86/isa/insts/general_purpose/control_transfer/loop.py
index 919551e..7ab034e 100644
--- a/src/arch/x86/isa/insts/general_purpose/control_transfer/loop.py
+++ b/src/arch/x86/isa/insts/general_purpose/control_transfer/loop.py
@@ -37,6 +37,8 @@

 microcode = '''
 def macroop LOOP_I {
+    .control_direct
+
     # Make the default data size of pops 64 bits in 64 bit mode
     .adjust_env oszIn64Override
     rdip t1
@@ -45,6 +47,8 @@
 };

 def macroop LOOPNE_I {
+    .control_direct
+
     # Make the default data size of pops 64 bits in 64 bit mode
     .adjust_env oszIn64Override
     rdip t1
@@ -53,6 +57,8 @@
 };

 def macroop LOOPE_I {
+    .control_direct
+
     # Make the default data size of pops 64 bits in 64 bit mode
     .adjust_env oszIn64Override
     rdip t1
diff --git a/src/arch/x86/isa/insts/general_purpose/control_transfer/xreturn.py b/src/arch/x86/isa/insts/general_purpose/control_transfer/xreturn.py
index f24b9d7..25ddaad 100644
--- a/src/arch/x86/isa/insts/general_purpose/control_transfer/xreturn.py
+++ b/src/arch/x86/isa/insts/general_purpose/control_transfer/xreturn.py
@@ -41,6 +41,7 @@
     # Make the default data size of rets 64 bits in 64 bit mode
     .adjust_env oszIn64Override
     .function_return
+    .control_indirect

     ld t1, ss, [1, t0, rsp]
     # Check address of return
@@ -53,6 +54,7 @@
     # Make the default data size of rets 64 bits in 64 bit mode
     .adjust_env oszIn64Override
     .function_return
+    .control_indirect

     limm t2, imm
     ld t1, ss, [1, t0, rsp]
@@ -65,6 +67,7 @@
 def macroop RET_FAR {
     .adjust_env oszIn64Override
     .function_return
+    .control_indirect

     # Get the return RIP
     ld t1, ss, [1, t0, rsp]
diff --git a/src/arch/x86/isa/macroop.isa b/src/arch/x86/isa/macroop.isa
index 99d76b4..3a4ccbd 100644
--- a/src/arch/x86/isa/macroop.isa
+++ b/src/arch/x86/isa/macroop.isa
@@ -153,6 +153,10 @@
             self.function_call = True
         def function_return(self):
             self.function_return = True
+        def control_direct(self):
+            self.control_direct = True
+        def control_indirect(self):
+            self.control_indirect = True

         def __init__(self, name):
             super(X86Macroop, self).__init__(name)
@@ -163,7 +167,9 @@
                 "serialize_before" : self.serializeBefore,
                 "serialize_after" : self.serializeAfter,
                 "function_call" : self.function_call,
-                "function_return" : self.function_return
+                "function_return" : self.function_return,
+                "control_direct" : self.control_direct,
+                "control_indirect" : self.control_indirect
             }
             self.declared = False
             self.adjust_env = ""
@@ -182,6 +188,8 @@
             self.serialize_after = False
             self.function_call = False
             self.function_return = False
+            self.control_direct = False
+            self.control_indirect = False

         def getAllocator(self, env):
             return "new X86Macroop::%s(machInst, %s)" % \
@@ -230,6 +238,10 @@
                     if self.function_return:
                         flags.append("IsReturn")
                         flags.append("IsUncondControl")
+                    if self.control_direct:
+                        flags.append("IsDirectControl")
+                    if self.control_indirect:
+                        flags.append("IsIndirectControl")
                 else:
                     flags.append("IsDelayedCommit")

diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa
index 6f2901b..3e393cf 100644
--- a/src/arch/x86/isa/microops/regop.isa
+++ b/src/arch/x86/isa/microops/regop.isa
@@ -112,6 +112,12 @@
                 uint8_t _dataSize, uint16_t _ext);

         Fault execute(ExecContext *, Trace::InstRecord *) const;
+
+        X86ISA::PCState branchTarget(const X86ISA::PCState &branchPC) const
+        override;
+
+        /// Explicitly import the otherwise hidden branchTarget
+        using StaticInst::branchTarget;
     };
 }};

@@ -126,6 +132,12 @@
                 uint8_t _dataSize, uint16_t _ext);

         Fault execute(ExecContext *, Trace::InstRecord *) const;
+
+        X86ISA::PCState branchTarget(const X86ISA::PCState &branchPC) const
+        override;
+
+        /// Explicitly import the otherwise hidden branchTarget
+        using StaticInst::branchTarget;
     };
 }};

@@ -141,6 +153,17 @@
         %(constructor)s;
         %(cond_control_flag_init)s;
     }
+
+    X86ISA::PCState
+    %(class_name)s::branchTarget(const X86ISA::PCState &branchPC) const
+    {
+        X86ISA::PCState pcs = branchPC;
+        DPRINTF(X86, "branchTarget PC info: %s, Immediate: %lx\n",
+                pcs, (int64_t) this->machInst.immediate);
+        pcs.npc(pcs.npc() + (int64_t) this->machInst.immediate);
+        pcs.uEnd();
+        return pcs;
+    }
 }};

 def template MicroRegOpImmConstructor {{
@@ -155,6 +178,17 @@
         %(constructor)s;
         %(cond_control_flag_init)s;
     }
+
+    X86ISA::PCState
+    %(class_name)s::branchTarget(const X86ISA::PCState &branchPC) const
+    {
+        X86ISA::PCState pcs = branchPC;
+        DPRINTF(X86, "branchTarget PC info: %s, Immediate (imm8): %lx\n",
+                pcs, (int8_t)imm8);
+        pcs.npc(pcs.npc() + (int8_t)imm8);
+        pcs.uEnd();
+        return pcs;
+    }
 }};

 output header {{
@@ -275,7 +309,8 @@
             # a version without it and fix up this version to use it.
             if flag_code != "" or cond_check != "true":
                 self.buildCppClasses(name, Name, suffix,
- code, big_code, "", "true", else_code, "", op_class)
+                        code, big_code, "", "true", else_code,
+ "flags[IsUncondControl] = flags[IsControl];", op_class)
                 suffix = "Flags" + suffix

             # If psrc1 or psrc2 is used, we need to actually insert code to
diff --git a/src/arch/x86/isa/microops/seqop.isa b/src/arch/x86/isa/microops/seqop.isa
index f5cb589..65bf064 100644
--- a/src/arch/x86/isa/microops/seqop.isa
+++ b/src/arch/x86/isa/microops/seqop.isa
@@ -64,6 +64,12 @@
                 uint64_t setFlags, uint16_t _target, uint8_t _cc);

         Fault execute(ExecContext *, Trace::InstRecord *) const;
+
+        X86ISA::PCState branchTarget(const X86ISA::PCState &branchPC) const
+        override;
+
+        /// Explicitly import the otherwise hidden branchTarget
+        using StaticInst::branchTarget;
     };
 }};

@@ -103,6 +109,17 @@
         %(constructor)s;
         %(cond_control_flag_init)s;
     }
+
+    X86ISA::PCState
+    %(class_name)s::branchTarget(const X86ISA::PCState &branchPC) const
+    {
+        X86ISA::PCState pcs = branchPC;
+        DPRINTF(X86, "Br branchTarget PC info: %s, Target: %d\n",
+                pcs, (int16_t)target);
+        pcs.nupc(target);
+        pcs.uAdvance();
+        return pcs;
+    }
 }};

 output decoder {{
@@ -174,7 +191,8 @@
              "else_code": "nuIP = nuIP;",
              "cond_test": "checkCondition(ccFlagBits | cfofBits | dfBit | \
                                           ecfBit | ezfBit, cc)",
-             "cond_control_flag_init": "flags[IsCondControl] = true"})
+             "cond_control_flag_init": "flags[IsCondControl] = true; \
+             flags[IsDirectControl] = true;"})
     exec_output += SeqOpExecute.subst(iop)
     header_output += SeqOpDeclare.subst(iop)
     decoder_output += SeqOpConstructor.subst(iop)
@@ -192,7 +210,8 @@
             {"code": "", "else_code": "",
              "cond_test": "checkCondition(ccFlagBits | cfofBits | dfBit | \
                                           ecfBit | ezfBit, cc)",
-             "cond_control_flag_init": ""})
+             "cond_control_flag_init": "flags[IsUncondControl] = true;\
+             flags[IsDirectControl] = true;"})
     exec_output += SeqOpExecute.subst(iop)
     header_output += SeqOpDeclare.subst(iop)
     decoder_output += SeqOpConstructor.subst(iop)
diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh
index 86f9339..561b482 100644
--- a/src/cpu/o3/decode_impl.hh
+++ b/src/cpu/o3/decode_impl.hh
@@ -749,8 +749,9 @@

                 DPRINTF(Decode,
                         "[tid:%i] [sn:%llu] "
-                        "Updating predictions: PredPC: %s\n",
-                        tid, inst->seqNum, target);
+                        "Updating predictions: Wrong predicted target: %s \
+                        PredPC: %s\n",
+                        tid, inst->seqNum, inst->readPredTarg(), target);
//The micro pc after an instruction level branch should be 0
                 inst->setPredTarg(target);
                 break;

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/27972
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: I91e707452c1825b9bb4ae75c3f599da489ae5b9a
Gerrit-Change-Number: 27972
Gerrit-PatchSet: 3
Gerrit-Owner: Juan Manuel Cebrián González <jm.cebriangonza...@gmail.com>
Gerrit-Reviewer: Gabe Black <gabebl...@google.com>
Gerrit-Reviewer: Jason Lowe-Power <power...@gmail.com>
Gerrit-Reviewer: kokoro <noreply+kok...@google.com>
Gerrit-MessageType: merged
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

Reply via email to