persona0220 updated this revision to Diff 443281.
persona0220 marked 4 inline comments as done.
persona0220 added a comment.
Herald added a subscriber: mgorny.

- Delete PTI_MAP enum and use opcode_len instead.
- Add a unit test
  - Divide existing Disassembler test into x86 / ARM subdirectory
  - Unittests/Disassembler/x86/TestGetControlFlowKindx86.cpp tests categorizing 
x86_64 instructions
- Modify GetControlFlowKind function to get ArchSpec instead of exe_ctx


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D128477/new/

https://reviews.llvm.org/D128477

Files:
  lldb/include/lldb/Core/Disassembler.h
  lldb/include/lldb/Target/TraceDumper.h
  lldb/include/lldb/lldb-enumerations.h
  lldb/source/API/SBInstruction.cpp
  lldb/source/API/SBInstructionList.cpp
  lldb/source/Commands/CommandObjectDisassemble.cpp
  lldb/source/Commands/CommandObjectDisassemble.h
  lldb/source/Commands/CommandObjectThread.cpp
  lldb/source/Commands/Options.td
  lldb/source/Core/Disassembler.cpp
  lldb/source/Core/DumpDataExtractor.cpp
  lldb/source/Expression/IRExecutionUnit.cpp
  lldb/source/Plugins/TraceExporter/common/TraceHTR.cpp
  
lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
  lldb/source/Symbol/Function.cpp
  lldb/source/Symbol/Symbol.cpp
  lldb/source/Target/ThreadPlanTracer.cpp
  lldb/source/Target/TraceDumper.cpp
  lldb/unittests/Disassembler/ARM/CMakeLists.txt
  lldb/unittests/Disassembler/ARM/TestArm64Disassembly.cpp
  lldb/unittests/Disassembler/ARM/TestArmv7Disassembly.cpp
  lldb/unittests/Disassembler/CMakeLists.txt
  lldb/unittests/Disassembler/TestArm64Disassembly.cpp
  lldb/unittests/Disassembler/TestArmv7Disassembly.cpp
  lldb/unittests/Disassembler/x86/CMakeLists.txt
  lldb/unittests/Disassembler/x86/TestGetControlFlowKindx86.cpp

Index: lldb/unittests/Disassembler/x86/TestGetControlFlowKindx86.cpp
===================================================================
--- /dev/null
+++ lldb/unittests/Disassembler/x86/TestGetControlFlowKindx86.cpp
@@ -0,0 +1,147 @@
+//===-- TextX86GetControlFlowKind.cpp ------------------------------------------===//
+
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/TargetSelect.h"
+#include "gtest/gtest.h"
+
+#include "lldb/Core/Address.h"
+#include "lldb/Core/Disassembler.h"
+#include "lldb/Target/ExecutionContext.h"
+#include "lldb/Utility/ArchSpec.h"
+
+#include "Plugins/Disassembler/LLVMC/DisassemblerLLVMC.h"
+
+using namespace lldb;
+using namespace lldb_private;
+
+class TestGetControlFlowKindx86 : public testing::Test {
+public:
+  static void SetUpTestCase();
+  static void TearDownTestCase();
+
+  //  virtual void SetUp() override { }
+  //  virtual void TearDown() override { }
+
+protected:
+};
+
+void TestGetControlFlowKindx86::SetUpTestCase() {
+  llvm::InitializeAllTargets();
+  llvm::InitializeAllAsmPrinters();
+  llvm::InitializeAllTargetMCs();
+  llvm::InitializeAllDisassemblers();
+  DisassemblerLLVMC::Initialize();
+}
+
+void TestGetControlFlowKindx86::TearDownTestCase() {
+  DisassemblerLLVMC::Terminate();
+}
+
+TEST_F(TestGetControlFlowKindx86, TestX86_64Instruction) {
+  ArchSpec arch("x86_64-*-linux");
+
+  const unsigned num_of_instructions = 29;
+  uint8_t data[] = {
+      0x55,                               // other -- pushq %rbp
+      0x48, 0x89, 0xe5,                   // other -- movq %rsp, %rbp
+
+      0xe8, 0xfc, 0xfe, 0xff, 0xff,       // call -- callq 0x4004c0
+      0x41, 0xff, 0x14, 0xdc,             // call -- callq *(%r12,%rbx,8)
+      0xff, 0x50, 0x18,                   // call -- callq *0x18(%rax)
+      0xe8, 0x48, 0x0d, 0x00, 0x00,       // call -- callq 0x94fe0
+
+      0xc3,                               // return -- retq
+
+      0xeb, 0xd3,                         // jump -- jmp 0x92dab
+      0xe9, 0x22, 0xff, 0xff, 0xff,       // jump -- jmp 0x933ae
+      0xff, 0xe0,                         // jump -- jmpq *%rax
+      0xf2, 0xff, 0x25, 0x75, 0xe7, 0x39, 0x00, // jump -- repne jmpq *0x39e775
+
+      0x73, 0xc2,                         // cond jump -- jae 0x9515c
+      0x74, 0x1f,                         // cond jump -- je 0x400626
+      0x75, 0xea,                         // cond jump -- jne 0x400610
+      0x76, 0x10,                         // cond jump -- jbe 0x94d10
+      0x77, 0x58,                         // cond jump -- ja 0x1208c8
+      0x7e, 0x67,                         // cond jump -- jle 0x92180
+      0x78, 0x0b,                         // cond jump -- js 0x92dc3
+      0x0f, 0x82, 0x17, 0x01, 0x00, 0x00, // cond jump -- jb 0x9c7b0
+      0x0f, 0x83, 0xa7, 0x00, 0x00, 0x00, // cond jump -- jae 0x895c8
+      0x0f, 0x84, 0x8c, 0x00, 0x00, 0x00, // cond jump -- je 0x941f0
+      0x0f, 0x85, 0x51, 0xff, 0xff, 0xff, // cond jump -- jne 0x8952c
+      0x0f, 0x86, 0xa3, 0x02, 0x00, 0x00, // cond jump -- jbe 0x9ae10
+      0x0f, 0x87, 0xff, 0x00, 0x00, 0x00, // cond jump -- ja 0x9ab60
+      0x0f, 0x8e, 0x7e, 0x00, 0x00, 0x00, // cond jump -- jle 0x92dd8
+      0x0f, 0x86, 0xdf, 0x00, 0x00, 0x00, // cond jump -- jbe 0x921b0
+
+      0x0f, 0x05,                         // far call -- syscall
+
+      0x0f, 0x07,                         // far return -- sysret
+      0xcf,                               // far return -- interrupt ret
+  };
+
+  InstructionControlFlowKind result[] = {
+      eInstructionControlFlowKindOther,
+      eInstructionControlFlowKindOther,
+
+      eInstructionControlFlowKindCall,
+      eInstructionControlFlowKindCall,
+      eInstructionControlFlowKindCall,
+      eInstructionControlFlowKindCall,
+
+      eInstructionControlFlowKindReturn,
+
+      eInstructionControlFlowKindJump,
+      eInstructionControlFlowKindJump,
+      eInstructionControlFlowKindJump,
+      eInstructionControlFlowKindJump,
+
+      eInstructionControlFlowKindCondJump,
+      eInstructionControlFlowKindCondJump,
+      eInstructionControlFlowKindCondJump,
+      eInstructionControlFlowKindCondJump,
+      eInstructionControlFlowKindCondJump,
+      eInstructionControlFlowKindCondJump,
+      eInstructionControlFlowKindCondJump,
+      eInstructionControlFlowKindCondJump,
+      eInstructionControlFlowKindCondJump,
+      eInstructionControlFlowKindCondJump,
+      eInstructionControlFlowKindCondJump,
+      eInstructionControlFlowKindCondJump,
+      eInstructionControlFlowKindCondJump,
+      eInstructionControlFlowKindCondJump,
+      eInstructionControlFlowKindCondJump,
+
+      eInstructionControlFlowKindFarCall,
+
+      eInstructionControlFlowKindFarReturn,
+      eInstructionControlFlowKindFarReturn,
+  };
+
+  DisassemblerSP disass_sp;
+  Address start_addr(0x100);
+  disass_sp =
+      Disassembler::DisassembleBytes(arch, nullptr, nullptr, start_addr, &data,
+                                    sizeof (data), num_of_instructions, false);
+
+  // If we failed to get a disassembler, we can assume it is because
+  // the llvm we linked against was not built with the i386 target,
+  // and we should skip these tests without marking anything as failing.
+
+  if (disass_sp) {
+    const InstructionList inst_list(disass_sp->GetInstructionList());
+    EXPECT_EQ(num_of_instructions, inst_list.GetSize());
+
+    for (size_t i = 0; i < num_of_instructions; ++i) {
+      InstructionSP inst_sp;
+      inst_sp = inst_list.GetInstructionAtIndex(i);
+      InstructionControlFlowKind kind = inst_sp->GetControlFlowKind(arch);
+      EXPECT_EQ(kind, result[i]);
+    }
+  }
+}
Index: lldb/unittests/Disassembler/x86/CMakeLists.txt
===================================================================
--- /dev/null
+++ lldb/unittests/Disassembler/x86/CMakeLists.txt
@@ -0,0 +1,12 @@
+add_lldb_unittest(GetControlFlowKindx86Tests
+    TestGetControlFlowKindx86.cpp
+    LINK_LIBS
+      lldbCore
+      lldbSymbol
+      lldbTarget
+      lldbPluginDisassemblerLLVMC
+      lldbPluginProcessUtility
+    LINK_COMPONENTS
+      Support
+      ${LLVM_TARGETS_TO_BUILD}
+  )
Index: lldb/unittests/Disassembler/CMakeLists.txt
===================================================================
--- lldb/unittests/Disassembler/CMakeLists.txt
+++ lldb/unittests/Disassembler/CMakeLists.txt
@@ -1,14 +1,7 @@
 if("ARM" IN_LIST LLVM_TARGETS_TO_BUILD)
-  add_lldb_unittest(DisassemblerTests
-    TestArm64Disassembly.cpp
-    TestArmv7Disassembly.cpp
-    LINK_LIBS
-      lldbCore
-      lldbSymbol
-      lldbTarget
-      lldbPluginDisassemblerLLVMC
-      lldbPluginProcessUtility
-    LINK_COMPONENTS
-      Support
-      ${LLVM_TARGETS_TO_BUILD})
+  add_subdirectory(ARM)
+endif()
+
+if("X86" IN_LIST LLVM_TARGETS_TO_BUILD)
+  add_subdirectory(x86)
 endif()
Index: lldb/unittests/Disassembler/ARM/CMakeLists.txt
===================================================================
--- /dev/null
+++ lldb/unittests/Disassembler/ARM/CMakeLists.txt
@@ -0,0 +1,12 @@
+add_lldb_unittest(DisassemblerTests
+  TestArm64Disassembly.cpp
+  TestArmv7Disassembly.cpp
+  LINK_LIBS
+    lldbCore
+    lldbSymbol
+    lldbTarget
+    lldbPluginDisassemblerLLVMC
+    lldbPluginProcessUtility
+  LINK_COMPONENTS
+    Support
+    ${LLVM_TARGETS_TO_BUILD})
Index: lldb/source/Target/TraceDumper.cpp
===================================================================
--- lldb/source/Target/TraceDumper.cpp
+++ lldb/source/Target/TraceDumper.cpp
@@ -147,14 +147,14 @@
       m_s.Format("{0:x+16}", item.load_address);
       if (item.symbol_info) {
         m_s << "    ";
-        item.symbol_info->instruction->Dump(&m_s, /*max_opcode_byte_size=*/0,
-                                            /*show_address=*/false,
-                                            /*show_bytes=*/false,
-                                            &item.symbol_info->exe_ctx,
-                                            &item.symbol_info->sc,
-                                            /*prev_sym_ctx=*/nullptr,
-                                            /*disassembly_addr_format=*/nullptr,
-                                            /*max_address_text_size=*/0);
+        item.symbol_info->instruction->Dump(
+            &m_s, /*max_opcode_byte_size=*/0,
+            /*show_address=*/false,
+            /*show_bytes=*/false, m_options.show_control_flow_kind,
+            &item.symbol_info->exe_ctx, &item.symbol_info->sc,
+            /*prev_sym_ctx=*/nullptr,
+            /*disassembly_addr_format=*/nullptr,
+            /*max_address_text_size=*/0);
       }
     }
 
Index: lldb/source/Target/ThreadPlanTracer.cpp
===================================================================
--- lldb/source/Target/ThreadPlanTracer.cpp
+++ lldb/source/Target/ThreadPlanTracer.cpp
@@ -170,13 +170,14 @@
       if (instruction_list.GetSize()) {
         const bool show_bytes = true;
         const bool show_address = true;
+        const bool show_control_flow_kind = true;
         Instruction *instruction =
             instruction_list.GetInstructionAtIndex(0).get();
         const FormatEntity::Entry *disassemble_format =
             m_process.GetTarget().GetDebugger().GetDisassemblyFormat();
         instruction->Dump(stream, max_opcode_byte_size, show_address,
-                          show_bytes, nullptr, nullptr, nullptr,
-                          disassemble_format, 0);
+                          show_bytes, show_control_flow_kind, nullptr, nullptr,
+                          nullptr, disassemble_format, 0);
       }
     }
   }
Index: lldb/source/Symbol/Symbol.cpp
===================================================================
--- lldb/source/Symbol/Symbol.cpp
+++ lldb/source/Symbol/Symbol.cpp
@@ -558,8 +558,9 @@
   if (disassembler_sp) {
     const bool show_address = true;
     const bool show_bytes = false;
-    disassembler_sp->GetInstructionList().Dump(&strm, show_address, show_bytes,
-                                               &exe_ctx);
+    const bool show_control_flow_kind = false;
+    disassembler_sp->GetInstructionList().Dump(
+        &strm, show_address, show_bytes, show_control_flow_kind, &exe_ctx);
     return true;
   }
   return false;
Index: lldb/source/Symbol/Function.cpp
===================================================================
--- lldb/source/Symbol/Function.cpp
+++ lldb/source/Symbol/Function.cpp
@@ -440,8 +440,9 @@
   if (disassembler_sp) {
     const bool show_address = true;
     const bool show_bytes = false;
-    disassembler_sp->GetInstructionList().Dump(&strm, show_address, show_bytes,
-                                               &exe_ctx);
+    const bool show_control_flow_kind = false;
+    disassembler_sp->GetInstructionList().Dump(
+        &strm, show_address, show_bytes, show_control_flow_kind, &exe_ctx);
     return true;
   }
   return false;
Index: lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
===================================================================
--- lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
+++ lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
@@ -83,6 +83,7 @@
       const uint32_t addr_byte_size = m_arch.GetAddressByteSize();
       const bool show_address = true;
       const bool show_bytes = true;
+      const bool show_control_flow_kind = true;
       m_inst_emulator_up->GetRegisterInfo(unwind_plan.GetRegisterKind(),
                                           unwind_plan.GetInitialCFARegister(),
                                           m_cfa_reg_info);
@@ -244,7 +245,8 @@
               lldb_private::FormatEntity::Entry format;
               FormatEntity::Parse("${frame.pc}: ", format);
               inst->Dump(&strm, inst_list.GetMaxOpcocdeByteSize(), show_address,
-                         show_bytes, nullptr, nullptr, nullptr, &format, 0);
+                         show_bytes, show_control_flow_kind, nullptr, nullptr,
+                         nullptr, &format, 0);
               log->PutString(strm.GetString());
             }
 
Index: lldb/source/Plugins/TraceExporter/common/TraceHTR.cpp
===================================================================
--- lldb/source/Plugins/TraceExporter/common/TraceHTR.cpp
+++ lldb/source/Plugins/TraceExporter/common/TraceHTR.cpp
@@ -158,15 +158,15 @@
       cursor.Next();
     } else {
       lldb::addr_t current_instruction_load_address = cursor.GetLoadAddress();
-      lldb::TraceInstructionControlFlowType current_instruction_type =
-          cursor.GetInstructionControlFlowType();
+      lldb::InstructionControlFlowKind current_instruction_type =
+          cursor.GetInstructionControlFlowKind();
 
       m_instruction_layer_up->AppendInstruction(
           current_instruction_load_address);
       cursor.Next();
       bool more_data_in_trace = cursor.HasValue();
       if (current_instruction_type &
-          lldb::eTraceInstructionControlFlowTypeCall) {
+          lldb::eInstructionControlFlowKindCall) {
         if (more_data_in_trace && !cursor.IsError()) {
           m_instruction_layer_up->AddCallInstructionMetadata(
               current_instruction_load_address,
Index: lldb/source/Expression/IRExecutionUnit.cpp
===================================================================
--- lldb/source/Expression/IRExecutionUnit.cpp
+++ lldb/source/Expression/IRExecutionUnit.cpp
@@ -200,7 +200,9 @@
                                       UINT32_MAX, false, false);
 
   InstructionList &instruction_list = disassembler_sp->GetInstructionList();
-  instruction_list.Dump(&stream, true, true, &exe_ctx);
+  instruction_list.Dump(&stream, true, true, /*show_control_flow_kind=*/true,
+                        &exe_ctx);
+
   return ret;
 }
 
Index: lldb/source/Core/DumpDataExtractor.cpp
===================================================================
--- lldb/source/Core/DumpDataExtractor.cpp
+++ lldb/source/Core/DumpDataExtractor.cpp
@@ -170,10 +170,11 @@
         offset += bytes_consumed;
         const bool show_address = base_addr != LLDB_INVALID_ADDRESS;
         const bool show_bytes = true;
+        const bool show_control_flow_kind = true;
         ExecutionContext exe_ctx;
         exe_scope->CalculateExecutionContext(exe_ctx);
-        disassembler_sp->GetInstructionList().Dump(s, show_address, show_bytes,
-                                                   &exe_ctx);
+        disassembler_sp->GetInstructionList().Dump(
+            s, show_address, show_bytes, show_control_flow_kind, &exe_ctx);
       }
     }
   } else
Index: lldb/source/Core/Disassembler.cpp
===================================================================
--- lldb/source/Core/Disassembler.cpp
+++ lldb/source/Core/Disassembler.cpp
@@ -527,8 +527,11 @@
       }
 
       const bool show_bytes = (options & eOptionShowBytes) != 0;
-      inst->Dump(&strm, max_opcode_byte_size, true, show_bytes, &exe_ctx, &sc,
-                 &prev_sc, nullptr, address_text_size);
+      const bool show_control_flow_kind =
+          (options & eOptionShowControlFlowKind) != 0;
+      inst->Dump(&strm, max_opcode_byte_size, true, show_bytes,
+                 show_control_flow_kind, &exe_ctx, &sc, &prev_sc, nullptr,
+                 address_text_size);
       strm.EOL();
     } else {
       break;
@@ -568,6 +571,328 @@
 
 Instruction::~Instruction() = default;
 
+namespace x86 {
+
+/// Determine the InstructionControlFlowKind based on the bytes of the
+/// instruction, which are represented as the following parameters.
+/// Refer http://ref.x86asm.net/coder.html to see the full list of
+/// opcode and instruction set.
+///
+/// \param[in] opcode
+///    Primary opcode of the instruction.
+///
+/// \param[in] modrm
+///    ModR/M byte of the instruction.
+///
+/// \param[in] opcode_len
+///    The length of opcode. Valid opcode lengths are 1, 2, or 3.
+///
+/// \return
+///   The control flow kind of the instruction or
+///   eInstructionControlFlowKindOther if the instruction is not interesting.
+///   i.e. a sequential instruction that doesn't affect the control flow of
+///   the program.
+lldb::InstructionControlFlowKind
+MapOpcodeIntoControlFlowKind(uint8_t opcode, uint8_t modrm,
+                             uint8_t opcode_len) {
+  if (opcode_len > 2)
+    return lldb::eInstructionControlFlowKindOther;
+
+  if (opcode >= 0x70 && opcode <= 0x7F) {
+    if (opcode_len == 1)
+      return lldb::eInstructionControlFlowKindCondJump;
+    else
+      return lldb::eInstructionControlFlowKindOther;
+  }
+
+  if (opcode >= 0x80 && opcode <= 0x8F) {
+    if (opcode_len == 2)
+      return lldb::eInstructionControlFlowKindCondJump;
+    else
+      return lldb::eInstructionControlFlowKindOther;
+  }
+
+  switch (opcode) {
+  case 0x9A:
+    if (opcode_len == 1)
+      return lldb::eInstructionControlFlowKindFarCall;
+    break;
+  case 0xFF:
+    if (opcode_len == 1) {
+      uint8_t modrm_reg = (modrm >> 3) & 7;
+      if (modrm_reg == 2)
+        return lldb::eInstructionControlFlowKindCall;
+      else if (modrm_reg == 3)
+        return lldb::eInstructionControlFlowKindFarCall;
+      else if (modrm_reg == 4)
+        return lldb::eInstructionControlFlowKindJump;
+      else if (modrm_reg == 5)
+        return lldb::eInstructionControlFlowKindFarJump;
+    }
+    break;
+  case 0xE8:
+    if (opcode_len == 1)
+      return lldb::eInstructionControlFlowKindCall;
+    break;
+  case 0xCD:
+  case 0xCC:
+  case 0xCE:
+  case 0xF1:
+    if (opcode_len == 1)
+      return lldb::eInstructionControlFlowKindFarCall;
+    break;
+  case 0xCF:
+    if (opcode_len == 1)
+      return lldb::eInstructionControlFlowKindFarReturn;
+    break;
+  case 0xE9:
+  case 0xEB:
+    if (opcode_len == 1)
+      return lldb::eInstructionControlFlowKindJump;
+    break;
+  case 0xEA:
+    if (opcode_len == 1)
+      return lldb::eInstructionControlFlowKindFarJump;
+    break;
+  case 0xE3:
+  case 0xE0:
+  case 0xE1:
+  case 0xE2:
+    if (opcode_len == 1)
+      return lldb::eInstructionControlFlowKindCondJump;
+    break;
+  case 0xC3:
+  case 0xC2:
+    if (opcode_len == 1)
+      return lldb::eInstructionControlFlowKindReturn;
+    break;
+  case 0xCB:
+  case 0xCA:
+    if (opcode_len == 1)
+      return lldb::eInstructionControlFlowKindFarReturn;
+    break;
+  case 0x05:
+  case 0x34:
+    if (opcode_len == 2)
+      return lldb::eInstructionControlFlowKindFarCall;
+    break;
+  case 0x35:
+  case 0x07:
+    if (opcode_len == 2)
+      return lldb::eInstructionControlFlowKindFarReturn;
+    break;
+  case 0x01:
+    if (opcode_len == 2) {
+      switch (modrm) {
+      case 0xc1:
+        return lldb::eInstructionControlFlowKindFarCall;
+      case 0xc2:
+      case 0xc3:
+        return lldb::eInstructionControlFlowKindFarReturn;
+      default:
+        break;
+      }
+    }
+    break;
+  default:
+    break;
+  }
+
+  return lldb::eInstructionControlFlowKindOther;
+}
+
+/// Decode an instruction into opcode, modrm and opcode_len.
+/// Refer http://ref.x86asm.net/coder.html for the instruction bytes layout.
+/// Opcodes in x86 are generally the first byte of instruction, though two-byte
+/// instructions and prefixes exist. ModR/M is the byte following the opcode
+/// and adds additional information for how the instruction is executed.
+///
+/// \param[in] inst_bytes
+///    Raw bytes of the instruction
+///
+/// \param[in] is_exec_mode_64b
+///    If true, the execution mode is 64 bit.
+///
+/// \param[in] bytes_len
+///    The length of the inst_bytes array.
+///
+/// \param[out] primary_opcode
+///    Primary opcode of the instruction.
+///    For one-byte opcode instruction, it's the first byte after prefix.
+///    For two- and three-byte opcodes, it's the second byte.
+///
+/// \param[out] modrm
+///    ModR/M byte of the instruction.
+///    Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0]
+///    may contain a register or specify an addressing mode, depending on MOD.
+///
+/// \param[out] opcode_len
+///    Valid opcode lengths are 1, 2, or 3.
+///
+/// \return
+///    Returns true if the given instruction is valid. Otherwise, returns false.
+bool InstructionLengthDecode(const uint8_t *inst_bytes, bool is_exec_mode_64b,
+                             int bytes_len, uint8_t *primary_opcode,
+                             uint8_t *modrm, uint8_t *opcode_len) {
+  int op_idx = 0;
+  bool prefix_done = false;
+
+  // In most cases, the primary_opcode is the first byte of the instruction
+  // but some instructions have a prefix to be skipped for these calculations.
+  // The following mapping is inspired from libipt's instruction decoding logic
+  // in `src/pt_ild.c`
+  *opcode_len = 0;
+  while (!prefix_done) {
+    if (op_idx >= bytes_len)
+      return false;
+
+    *primary_opcode = inst_bytes[op_idx];
+    switch (*primary_opcode) {
+    // prefix_ignore
+    case 0x26:
+    case 0x2e:
+    case 0x36:
+    case 0x3e:
+    case 0x64:
+    case 0x65:
+    // prefix_osz, prefix_asz
+    case 0x66:
+    case 0x67:
+    // prefix_lock, prefix_f2, prefix_f3
+    case 0xf0:
+    case 0xf2:
+    case 0xf3:
+      op_idx++;
+      break;
+
+    // prefix_rex
+    case 0x40:
+    case 0x41:
+    case 0x42:
+    case 0x43:
+    case 0x44:
+    case 0x45:
+    case 0x46:
+    case 0x47:
+    case 0x48:
+    case 0x49:
+    case 0x4a:
+    case 0x4b:
+    case 0x4c:
+    case 0x4d:
+    case 0x4e:
+    case 0x4f:
+      if (is_exec_mode_64b)
+        op_idx++;
+      else
+        prefix_done = true;
+      break;
+
+    // prefix_vex_c4, c5
+    case 0xc5:
+      if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
+        prefix_done = true;
+        break;
+      }
+
+      *opcode_len = 2;
+      *primary_opcode = inst_bytes[op_idx + 2];
+      *modrm = inst_bytes[op_idx + 3];
+      return true;
+
+    case 0xc4:
+      if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
+        prefix_done = true;
+        break;
+      }
+      *opcode_len = inst_bytes[op_idx + 1] & 0x1f;
+      *primary_opcode = inst_bytes[op_idx + 3];
+      *modrm = inst_bytes[op_idx + 4];
+      return true;
+
+    // prefix_evex
+    case 0x62:
+      if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
+        prefix_done = true;
+        break;
+      }
+      *opcode_len = inst_bytes[op_idx + 1] & 0x03;
+      *primary_opcode = inst_bytes[op_idx + 4];
+      *modrm = inst_bytes[op_idx + 5];
+      return true;
+
+    default:
+      prefix_done = true;
+      break;
+    }
+  } // prefix done
+
+  *primary_opcode = inst_bytes[op_idx];
+  *modrm = inst_bytes[op_idx + 1];
+  *opcode_len = 1;
+
+  // If the first opcode is 0F, it's two- or three- byte opcodes.
+  if (*primary_opcode == 0x0F) {
+    *primary_opcode = inst_bytes[++op_idx]; // get the next byte
+
+    if (*primary_opcode == 0x38) {
+      *opcode_len = 3;
+      *primary_opcode = inst_bytes[++op_idx]; // get the next byte
+      *modrm = inst_bytes[op_idx + 1];
+    } else if (*primary_opcode == 0x3A) {
+      *opcode_len = 3;
+      *primary_opcode = inst_bytes[++op_idx];
+      *modrm = inst_bytes[op_idx + 1];
+    } else if ((*primary_opcode & 0xf8) == 0x38) {
+      *opcode_len = 0;
+      *primary_opcode = inst_bytes[++op_idx];
+      *modrm = inst_bytes[op_idx + 1];
+    } else if (*primary_opcode == 0x0F) {
+      *opcode_len = 3;
+      // opcode is 0x0F, no needs to update
+      *modrm = inst_bytes[op_idx + 1];
+    } else {
+      *opcode_len = 2;
+      *modrm = inst_bytes[op_idx + 1];
+    }
+  }
+
+  return true;
+}
+
+lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b,
+                                                    Opcode m_opcode) {
+  uint8_t primary_opcode = 0, modrm = 0, opcode_len = 0;
+  bool ret;
+
+  if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) {
+    // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes
+    return lldb::eInstructionControlFlowKindUnknown;
+  }
+
+  // Opcode bytes will be decoded into primary_opcode, modrm and opcode length.
+  // These are the three values deciding instruction control flow kind.
+  ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(),
+                                is_exec_mode_64b, m_opcode.GetByteSize(),
+                                &primary_opcode, &modrm, &opcode_len);
+  if (!ret)
+    return lldb::eInstructionControlFlowKindUnknown;
+
+  return MapOpcodeIntoControlFlowKind(primary_opcode, modrm, opcode_len);
+}
+
+} // namespace x86
+
+lldb::InstructionControlFlowKind
+Instruction::GetControlFlowKind(const ArchSpec &arch) {
+  if (arch.GetTriple().getArch() == llvm::Triple::x86)
+    return x86::GetControlFlowKind(/*is_exec_mode_64b=*/false, m_opcode);
+  else if (arch.GetTriple().getArch() == llvm::Triple::x86_64)
+    return x86::GetControlFlowKind(/*is_exec_mode_64b=*/true, m_opcode);
+  else
+    return eInstructionControlFlowKindUnknown; // not implemented
+}
+
 AddressClass Instruction::GetAddressClass() {
   if (m_address_class == AddressClass::eInvalid)
     m_address_class = m_address.GetAddressClass();
@@ -576,6 +901,7 @@
 
 void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size,
                        bool show_address, bool show_bytes,
+                       bool show_control_flow_kind,
                        const ExecutionContext *exe_ctx,
                        const SymbolContext *sym_ctx,
                        const SymbolContext *prev_sym_ctx,
@@ -613,6 +939,38 @@
     }
   }
 
+  if (show_control_flow_kind) {
+    switch (GetControlFlowKind(exe_ctx->GetTargetRef().GetArchitecture())) {
+    case eInstructionControlFlowKindUnknown:
+      ss.Printf("%-12s", "unknown");
+      break;
+    case eInstructionControlFlowKindOther:
+      ss.Printf("%-12s", "other");
+      break;
+    case eInstructionControlFlowKindCall:
+      ss.Printf("%-12s", "call");
+      break;
+    case eInstructionControlFlowKindReturn:
+      ss.Printf("%-12s", "return");
+      break;
+    case eInstructionControlFlowKindJump:
+      ss.Printf("%-12s", "jump");
+      break;
+    case eInstructionControlFlowKindCondJump:
+      ss.Printf("%-12s", "cond jump");
+      break;
+    case eInstructionControlFlowKindFarCall:
+      ss.Printf("%-12s", "far call");
+      break;
+    case eInstructionControlFlowKindFarReturn:
+      ss.Printf("%-12s", "far return");
+      break;
+    case eInstructionControlFlowKindFarJump:
+      ss.Printf("%-12s", "far jump");
+      break;
+    }
+  }
+
   const size_t opcode_pos = ss.GetSizeOfLastLine();
 
   // The default opcode size of 7 characters is plenty for most architectures
@@ -957,6 +1315,7 @@
 }
 
 void InstructionList::Dump(Stream *s, bool show_address, bool show_bytes,
+                           bool show_control_flow_kind,
                            const ExecutionContext *exe_ctx) {
   const uint32_t max_opcode_byte_size = GetMaxOpcocdeByteSize();
   collection::const_iterator pos, begin, end;
@@ -975,8 +1334,9 @@
        pos != end; ++pos) {
     if (pos != begin)
       s->EOL();
-    (*pos)->Dump(s, max_opcode_byte_size, show_address, show_bytes, exe_ctx,
-                 nullptr, nullptr, disassembly_format, 0);
+    (*pos)->Dump(s, max_opcode_byte_size, show_address, show_bytes,
+                 show_control_flow_kind, exe_ctx, nullptr, nullptr,
+                 disassembly_format, 0);
   }
 }
 
@@ -994,7 +1354,7 @@
   size_t num_instructions = m_instructions.size();
 
   uint32_t next_branch = UINT32_MAX;
-  
+
   if (found_calls)
     *found_calls = false;
   for (size_t i = start; i < num_instructions; i++) {
Index: lldb/source/Commands/Options.td
===================================================================
--- lldb/source/Commands/Options.td
+++ lldb/source/Commands/Options.td
@@ -300,6 +300,11 @@
 let Command = "disassemble" in {
   def disassemble_options_bytes : Option<"bytes", "b">,
     Desc<"Show opcode bytes when disassembling.">;
+  def disassemble_options_kind : Option<"kind", "k">,
+    Desc<"Show instruction control flow kind. Refer enum "
+    "`InstructionControlFlowKind` for a list of control flow kind. "
+    "As an important note, far jumps, far calls and far returns often indicate "
+    "calls to and from kernel.">;
   def disassemble_options_context : Option<"context", "C">, Arg<"NumLines">,
     Desc<"Number of context lines of source to show.">;
   def disassemble_options_mixed : Option<"mixed", "m">,
@@ -1150,6 +1155,11 @@
   def thread_trace_dump_instructions_pretty_print: Option<"pretty-json", "J">,
     Group<1>,
     Desc<"Dump in JSON format but pretty printing the output for easier readability.">;
+  def thread_trace_dump_instructions_show_kind : Option<"kind", "k">, Group<1>,
+    Desc<"Show instruction control flow kind. Refer enum "
+    "`InstructionControlFlowKind` for a list of control flow kind. "
+    "As an important note, far jumps, far calls and far returns often indicate "
+    "calls to and from kernel.">;
   def thread_trace_dump_instructions_show_tsc : Option<"tsc", "t">, Group<1>,
     Desc<"For each instruction, print the corresponding timestamp counter if "
     "available.">;
Index: lldb/source/Commands/CommandObjectThread.cpp
===================================================================
--- lldb/source/Commands/CommandObjectThread.cpp
+++ lldb/source/Commands/CommandObjectThread.cpp
@@ -2164,6 +2164,10 @@
         m_dumper_options.forwards = true;
         break;
       }
+      case 'k': {
+        m_dumper_options.show_control_flow_kind = true;
+        break;
+      }
       case 't': {
         m_dumper_options.show_tsc = true;
         break;
Index: lldb/source/Commands/CommandObjectDisassemble.h
===================================================================
--- lldb/source/Commands/CommandObjectDisassemble.h
+++ lldb/source/Commands/CommandObjectDisassemble.h
@@ -46,6 +46,7 @@
 
     bool show_mixed; // Show mixed source/assembly
     bool show_bytes;
+    bool show_control_flow_kind;
     uint32_t num_lines_context = 0;
     uint32_t num_instructions = 0;
     bool raw;
Index: lldb/source/Commands/CommandObjectDisassemble.cpp
===================================================================
--- lldb/source/Commands/CommandObjectDisassemble.cpp
+++ lldb/source/Commands/CommandObjectDisassemble.cpp
@@ -65,6 +65,10 @@
     show_bytes = true;
     break;
 
+  case 'k':
+    show_control_flow_kind = true;
+    break;
+
   case 's': {
     start_addr = OptionArgParser::ToAddress(execution_context, option_arg,
                                             LLDB_INVALID_ADDRESS, &error);
@@ -154,6 +158,7 @@
     ExecutionContext *execution_context) {
   show_mixed = false;
   show_bytes = false;
+  show_control_flow_kind = false;
   num_lines_context = 0;
   num_instructions = 0;
   func_name.clear();
@@ -493,6 +498,9 @@
   if (m_options.show_bytes)
     options |= Disassembler::eOptionShowBytes;
 
+  if (m_options.show_control_flow_kind)
+    options |= Disassembler::eOptionShowControlFlowKind;
+
   if (m_options.raw)
     options |= Disassembler::eOptionRawOuput;
 
Index: lldb/source/API/SBInstructionList.cpp
===================================================================
--- lldb/source/API/SBInstructionList.cpp
+++ lldb/source/API/SBInstructionList.cpp
@@ -165,8 +165,9 @@
               addr, eSymbolContextEverything, sc);
         }
 
-        inst->Dump(&sref, max_opcode_byte_size, true, false, nullptr, &sc,
-                   &prev_sc, &format, 0);
+        inst->Dump(&sref, max_opcode_byte_size, true, false,
+                   /*show_control_flow_kind=*/false, nullptr, &sc, &prev_sc,
+                   &format, 0);
         sref.EOL();
       }
       return true;
Index: lldb/source/API/SBInstruction.cpp
===================================================================
--- lldb/source/API/SBInstruction.cpp
+++ lldb/source/API/SBInstruction.cpp
@@ -241,7 +241,8 @@
     // didn't have a stream already created, one will get created...
     FormatEntity::Entry format;
     FormatEntity::Parse("${addr}: ", format);
-    inst_sp->Dump(&s.ref(), 0, true, false, nullptr, &sc, nullptr, &format, 0);
+    inst_sp->Dump(&s.ref(), 0, true, false, /*show_control_flow_kind=*/false,
+                  nullptr, &sc, nullptr, &format, 0);
     return true;
   }
   return false;
@@ -275,8 +276,8 @@
     StreamFile out_stream(out_sp);
     FormatEntity::Entry format;
     FormatEntity::Parse("${addr}: ", format);
-    inst_sp->Dump(&out_stream, 0, true, false, nullptr, &sc, nullptr, &format,
-                  0);
+    inst_sp->Dump(&out_stream, 0, true, false, /*show_control_flow_kind=*/false,
+                  nullptr, &sc, nullptr, &format, 0);
   }
 }
 
Index: lldb/include/lldb/lldb-enumerations.h
===================================================================
--- lldb/include/lldb/lldb-enumerations.h
+++ lldb/include/lldb/lldb-enumerations.h
@@ -970,20 +970,30 @@
 /// control flow of a trace.
 ///
 /// A single instruction can match one or more of these categories.
-FLAGS_ENUM(TraceInstructionControlFlowType){
-    /// Any instruction.
-    eTraceInstructionControlFlowTypeInstruction = (1u << 1),
-    /// A conditional or unconditional branch/jump.
-    eTraceInstructionControlFlowTypeBranch = (1u << 2),
-    /// A conditional or unconditional branch/jump that changed
-    /// the control flow of the program.
-    eTraceInstructionControlFlowTypeTakenBranch = (1u << 3),
-    /// A call to a function.
-    eTraceInstructionControlFlowTypeCall = (1u << 4),
-    /// A return from a function.
-    eTraceInstructionControlFlowTypeReturn = (1u << 5)};
-
-LLDB_MARK_AS_BITMASK_ENUM(TraceInstructionControlFlowType)
+enum InstructionControlFlowKind {
+  /// The instruction could not be classified.
+  eInstructionControlFlowKindUnknown = 0,
+  /// The instruction is something not listed below, i.e. it's a sequential
+  /// instruction that doesn't affect the control flow of the program.
+  eInstructionControlFlowKindOther,
+  /// The instruction is a near (function) call.
+  eInstructionControlFlowKindCall,
+  /// The instruction is a near (function) return.
+  eInstructionControlFlowKindReturn,
+  /// The instruction is a near unconditional jump.
+  eInstructionControlFlowKindJump,
+  /// The instruction is a near conditional jump.
+  eInstructionControlFlowKindCondJump,
+  /// The instruction is a call-like far transfer.
+  /// E.g. SYSCALL, SYSENTER, or FAR CALL.
+  eInstructionControlFlowKindFarCall,
+  /// The instruction is a return-like far transfer.
+  /// E.g. SYSRET, SYSEXIT, IRET, or FAR RET.
+  eInstructionControlFlowKindFarReturn,
+  /// The instruction is a jump-like far transfer.
+  /// E.g. FAR JMP.
+  eInstructionControlFlowKindFarJump
+};
 
 /// Watchpoint Kind.
 ///
Index: lldb/include/lldb/Target/TraceDumper.h
===================================================================
--- lldb/include/lldb/Target/TraceDumper.h
+++ lldb/include/lldb/Target/TraceDumper.h
@@ -34,6 +34,8 @@
   bool show_tsc = false;
   /// Dump the events that happened between instructions.
   bool show_events = false;
+  /// For each instruction, print the instruction kind.
+  bool show_control_flow_kind = false;
   /// Optional custom id to start traversing from.
   llvm::Optional<uint64_t> id = llvm::None;
   /// Optional number of instructions to skip from the starting position
Index: lldb/include/lldb/Core/Disassembler.h
===================================================================
--- lldb/include/lldb/Core/Disassembler.h
+++ lldb/include/lldb/Core/Disassembler.h
@@ -79,6 +79,12 @@
     return m_comment.c_str();
   }
 
+  /// \return
+  ///    The control flow kind of this instruction, or
+  ///    eInstructionControlFlowKindUnknown if the instruction
+  ///    can't be classified.
+  lldb::InstructionControlFlowKind GetControlFlowKind(const ArchSpec &arch);
+
   virtual void
   CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0;
 
@@ -105,6 +111,9 @@
   /// \param[in] show_bytes
   ///     Whether the bytes of the assembly instruction should be printed.
   ///
+  /// \param[in] show_control_flow_kind
+  ///     Whether the control flow kind of the instruction should be printed.
+  ///
   /// \param[in] max_opcode_byte_size
   ///     The size (in bytes) of the largest instruction in the list that
   ///     we are printing (for text justification/alignment purposes)
@@ -140,7 +149,8 @@
   ///     so this method can properly align the instruction opcodes.
   ///     May be 0 to indicate no indentation/alignment of the opcodes.
   virtual void Dump(Stream *s, uint32_t max_opcode_byte_size, bool show_address,
-                    bool show_bytes, const ExecutionContext *exe_ctx,
+                    bool show_bytes, bool show_control_flow_kind,
+                    const ExecutionContext *exe_ctx,
                     const SymbolContext *sym_ctx,
                     const SymbolContext *prev_sym_ctx,
                     const FormatEntity::Entry *disassembly_addr_format,
@@ -320,7 +330,7 @@
   void Append(lldb::InstructionSP &inst_sp);
 
   void Dump(Stream *s, bool show_address, bool show_bytes,
-            const ExecutionContext *exe_ctx);
+            bool show_control_flow_kind, const ExecutionContext *exe_ctx);
 
 private:
   typedef std::vector<lldb::InstructionSP> collection;
@@ -375,7 +385,8 @@
     eOptionMarkPCSourceLine = (1u << 2), // Mark the source line that contains
                                          // the current PC (mixed mode only)
     eOptionMarkPCAddress =
-        (1u << 3) // Mark the disassembly line the contains the PC
+        (1u << 3), // Mark the disassembly line the contains the PC
+    eOptionShowControlFlowKind = (1u << 4),
   };
 
   enum HexImmediateStyle {
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to