Sorry, wrong version of patch...
On Fri, Jul 25, 2014 at 1:41 PM, Tong Shen <[email protected]> wrote:
> Hi Molenda, lldb-commits,
>
> For now, x86 assembly profiler will stop after 10 "non-prologue"
> instructions. In practice it may not be sufficient. For example, we have a
> hand-written assembly function, which have hundreds of instruction before
> actual (stack-adjusting) prologue instructions.
>
> One way is to change the limit to 1000; but there will always be functions
> that break the limit :-) I believe the right thing to do here is parsing
> all instructions before "ret"/"call" as prologue instructions.
>
> Here's what I changed:
> - For "push %rbx" and "mov %rbx, -8(%rbp)": only add first row for that
> register. They may appear multiple times in function body. But as long as
> one of them appears, first appearance should be in prologue(If it's not in
> prologue, this function will not use %rbx, so these 2 instructions should
> not appear at all).
> - Also monitor "add %rsp 0x20".
> - Remove non prologue instruction count.
> - Add "call" instruction detection, and stop parsing after it.
>
> Thanks.
>
> --
> Best Regards, Tong Shen
>
--
Best Regards, Tong Shen
Index: source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp
===================================================================
--- source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp (revision 213949)
+++ source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp (working copy)
@@ -134,10 +134,11 @@
bool push_rbp_pattern_p ();
bool push_0_pattern_p ();
bool mov_rsp_rbp_pattern_p ();
- bool sub_rsp_pattern_p (int& amount);
+ bool sub_or_add_rsp_pattern_p (int& amount);
bool push_reg_p (int& regno);
bool mov_reg_to_local_stack_frame_p (int& regno, int& fp_offset);
bool ret_pattern_p ();
+ bool call_pattern_p ();
uint32_t extract_4 (uint8_t *b);
bool machine_regno_to_lldb_regno (int machine_regno, uint32_t& lldb_regno);
bool instruction_length (Address addr, int &length);
@@ -332,22 +333,33 @@
}
// subq $0x20, %rsp
-bool AssemblyParse_x86::sub_rsp_pattern_p (int& amount) {
+// addq $0x20, $rsp
+bool AssemblyParse_x86::sub_or_add_rsp_pattern_p (int& amount) {
uint8_t *p = m_cur_insn_bytes;
if (m_wordsize == 8 && *p == 0x48)
p++;
- // 8-bit immediate operand
+ // 8-bit immediate operand, sub
if (*p == 0x83 && *(p + 1) == 0xec) {
amount = (int8_t) *(p + 2);
return true;
}
- // 32-bit immediate operand
+ // 8-bit immediate operand, add
+ if (*p == 0x83 && *(p + 1) == 0xc4) {
+ amount = (int8_t) *(p + 2);
+ amount = -amount;
+ return true;
+ }
+ // 32-bit immediate operand, sub
if (*p == 0x81 && *(p + 1) == 0xec) {
amount = (int32_t) extract_4 (p + 2);
return true;
}
- // Not handled: [0x83 0xc4] for imm8 with neg values
- // [0x81 0xc4] for imm32 with neg values
+ // 32-bit immediate operand, add
+ if (*p == 0x81 && *(p + 1) == 0xc4) {
+ amount = (int32_t) extract_4 (p + 2);
+ amount = -amount;
+ return true;
+ }
return false;
}
@@ -436,6 +448,21 @@
return false;
}
+// call 0xdeadbeaf [0xe8 0xaf 0xbe 0xad 0xde]
+// call *%rax [0xff 0xd0]
+// call *%r8 [0x41 0xff 0xd0]
+bool
+AssemblyParse_x86::call_pattern_p () {
+ uint8_t *p = m_cur_insn_bytes;
+ if (*p == 0xe8)
+ return true;
+ if (*p == 0xff && ((*(p+1) & 0x38) == 0x10))
+ return true;
+ if (*p == 0x41 && *(p+1) == 0xff && ((*(p+2) & 0x38) == 0x10))
+ return true;
+ return false;
+}
+
uint32_t
AssemblyParse_x86::extract_4 (uint8_t *b)
{
@@ -509,10 +536,10 @@
AssemblyParse_x86::get_non_call_site_unwind_plan (UnwindPlan &unwind_plan)
{
UnwindPlan::RowSP row(new UnwindPlan::Row);
- int non_prologue_insn_count = 0;
m_cur_insn = m_func_bounds.GetBaseAddress ();
int current_func_text_offset = 0;
int current_sp_bytes_offset_from_cfa = 0;
+ uint32_t reg_bitmap = 0;
UnwindPlan::Row::RegisterLocation initial_regloc;
Error error;
@@ -548,7 +575,7 @@
const bool prefer_file_cache = true;
Target *target = m_exe_ctx.GetTargetPtr();
- while (m_func_bounds.ContainsFileAddress (m_cur_insn) && non_prologue_insn_count < 10)
+ while (m_func_bounds.ContainsFileAddress (m_cur_insn))
{
int stack_offset, insn_len;
int machine_regno; // register numbers masked directly out of instructions
@@ -566,8 +593,9 @@
break;
}
- if (push_rbp_pattern_p ())
+ if (push_rbp_pattern_p () && !(reg_bitmap & (1 << m_lldb_fp_regnum)))
{
+ reg_bitmap |= (1 << m_lldb_fp_regnum);
row->SetOffset (current_func_text_offset + insn_len);
current_sp_bytes_offset_from_cfa += m_wordsize;
row->SetCFAOffset (current_sp_bytes_offset_from_cfa);
@@ -614,8 +642,10 @@
row->SetCFAOffset (current_sp_bytes_offset_from_cfa);
}
// record where non-volatile (callee-saved, spilled) registers are saved on the stack
- if (nonvolatile_reg_p (machine_regno) && machine_regno_to_lldb_regno (machine_regno, lldb_regno))
+ if (nonvolatile_reg_p (machine_regno) && machine_regno_to_lldb_regno (machine_regno, lldb_regno)
+ && !(reg_bitmap & (1 << lldb_regno)))
{
+ reg_bitmap |= (1 << lldb_regno);
need_to_push_row = true;
UnwindPlan::Row::RegisterLocation regloc;
regloc.SetAtCFAPlusOffset (-current_sp_bytes_offset_from_cfa);
@@ -633,10 +663,13 @@
goto loopnext;
}
- if (mov_reg_to_local_stack_frame_p (machine_regno, stack_offset) && nonvolatile_reg_p (machine_regno))
+ if ((row->GetCFARegister() == static_cast<uint32_t>(m_lldb_fp_regnum))
+ && mov_reg_to_local_stack_frame_p (machine_regno, stack_offset)
+ && nonvolatile_reg_p (machine_regno))
{
- if (machine_regno_to_lldb_regno (machine_regno, lldb_regno))
+ if (machine_regno_to_lldb_regno (machine_regno, lldb_regno) && !(reg_bitmap & (1 << lldb_regno)))
{
+ reg_bitmap |= (1 << lldb_regno);
row->SetOffset (current_func_text_offset + insn_len);
UnwindPlan::Row::RegisterLocation regloc;
@@ -656,7 +689,7 @@
}
}
- if (sub_rsp_pattern_p (stack_offset))
+ if (sub_or_add_rsp_pattern_p (stack_offset))
{
current_sp_bytes_offset_from_cfa += stack_offset;
if (row->GetCFARegister() == static_cast<uint32_t>(m_lldb_sp_regnum))
@@ -672,7 +705,7 @@
goto loopnext;
}
- if (ret_pattern_p ())
+ if (ret_pattern_p () || call_pattern_p ())
{
// we know where the end of the function is; set the limit on the PlanValidAddressRange
// in case our initial "high pc" value was overly large
@@ -688,7 +721,6 @@
// 0x1f1b: popl %eax
// and record the temporary stack movements if the CFA is not expressed in terms of ebp.
- non_prologue_insn_count++;
loopnext:
m_cur_insn.SetOffset (m_cur_insn.GetOffset() + insn_len);
current_func_text_offset += insn_len;
@@ -883,7 +915,7 @@
break;
}
- if (push_rbp_pattern_p () || mov_rsp_rbp_pattern_p () || sub_rsp_pattern_p (offset)
+ if (push_rbp_pattern_p () || mov_rsp_rbp_pattern_p () || sub_or_add_rsp_pattern_p (offset)
|| push_reg_p (regno) || mov_reg_to_local_stack_frame_p (regno, offset))
{
m_cur_insn.SetOffset (m_cur_insn.GetOffset() + insn_len);
_______________________________________________
lldb-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/lldb-commits