Filed RFE:

https://bugs.openjdk.java.net/browse/JDK-8068945

Regards,
Vladimir

On 12/9/14 2:14 AM, Erik Helin wrote:
I should also add that I don't have enough knowledge of the compiler
internals to review this patch, sorry.

Thanks,
Erik

On 2014-12-09 10:53, Erik Helin wrote:
I applied the patch on top of jdk9/hs-comp and created a webrev:
http://cr.openjdk.java.net/~ehelin/brendan/frame-pointer/webrev/

I also successfully run the patch through JPRT.

Thanks,
Erik

On 2014-12-05 20:57, Brendan Gregg wrote:


On Thu, Dec 4, 2014 at 2:55 PM, Brendan Gregg <brendan.d.gr...@gmail.com
<mailto:brendan.d.gr...@gmail.com>> wrote:

    G'Day,

    I've hacked hotspot to return the frame pointer, in part to see what
    this involves, and also to have a working prototype for analysis.
    Along with an agent to resolve symbols, this has allowed full stack
    profiling using Linux perf_events. The following flame graphs show
    the resulting profiles.

    A mixed mode CPU flame graph of a vert.x benchmark (click to zoom):

    http://www.brendangregg.com/FlameGraphs/cpu-mixedmode-vertx.svg

    Same thing, but this time disabling inlining, to show more frames:

    http://www.brendangregg.com/FlameGraphs/cpu-mixedmode-flamegraph.svg

    As expected, performance is worse without inlining. You can compare
    the flame graphs side by side to see why. Less time spent doing work
    / I/O!


https://github.com/brendangregg/Misc/blob/master/java/openjdk8_b132-fp.diff


    is my patch,

    [...]


In case there's problems with the patch URL, the patch is:

--- openjdk8clean/hotspot/src/cpu/x86/vm/x86_64.ad <http://x86_64.ad>
  2014-03-04 02:52:11.000000000 +0000
+++ openjdk8/hotspot/src/cpu/x86/vm/x86_64.ad <http://x86_64.ad>
  2014-11-08 01:10:49.686044933 +0000
@@ -166,10 +166,9 @@
  // 3) reg_class stack_slots( /* one chunk of stack-based "registers"
*/ )
  //

-// Class for all pointer registers (including RSP)
+// Class for all pointer registers (including RSP, excluding RBP)
  reg_class any_reg(RAX, RAX_H,
                    RDX, RDX_H,
-                  RBP, RBP_H,
                    RDI, RDI_H,
                    RSI, RSI_H,
                    RCX, RCX_H,
@@ -184,10 +183,9 @@
                    R14, R14_H,
                    R15, R15_H);

-// Class for all pointer registers except RSP
+// Class for all pointer registers except RSP and RBP
  reg_class ptr_reg(RAX, RAX_H,
                    RDX, RDX_H,
-                  RBP, RBP_H,
                    RDI, RDI_H,
                    RSI, RSI_H,
                    RCX, RCX_H,
@@ -199,9 +197,8 @@
                    R13, R13_H,
                    R14, R14_H);

-// Class for all pointer registers except RAX and RSP
+// Class for all pointer registers except RAX, RSP and RBP
  reg_class ptr_no_rax_reg(RDX, RDX_H,
-                         RBP, RBP_H,
                           RDI, RDI_H,
                           RSI, RSI_H,
                           RCX, RCX_H,
@@ -226,9 +223,8 @@
                           R13, R13_H,
                           R14, R14_H);

-// Class for all pointer registers except RAX, RBX and RSP
+// Class for all pointer registers except RAX, RBX, RSP and RBP
  reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
-                             RBP, RBP_H,
                               RDI, RDI_H,
                               RSI, RSI_H,
                               RCX, RCX_H,
@@ -260,10 +256,9 @@
  // Singleton class for TLS pointer
  reg_class ptr_r15_reg(R15, R15_H);

-// Class for all long registers (except RSP)
+// Class for all long registers (except RSP and RBP)
  reg_class long_reg(RAX, RAX_H,
                     RDX, RDX_H,
-                   RBP, RBP_H,
                     RDI, RDI_H,
                     RSI, RSI_H,
                     RCX, RCX_H,
@@ -275,9 +270,8 @@
                     R13, R13_H,
                     R14, R14_H);

-// Class for all long registers except RAX, RDX (and RSP)
-reg_class long_no_rax_rdx_reg(RBP, RBP_H,
-                              RDI, RDI_H,
+// Class for all long registers except RAX, RDX (and RSP, RBP)
+reg_class long_no_rax_rdx_reg(RDI, RDI_H,
                                RSI, RSI_H,
                                RCX, RCX_H,
                                RBX, RBX_H,
@@ -288,9 +282,8 @@
                                R13, R13_H,
                                R14, R14_H);

-// Class for all long registers except RCX (and RSP)
-reg_class long_no_rcx_reg(RBP, RBP_H,
-                          RDI, RDI_H,
+// Class for all long registers except RCX (and RSP, RBP)
+reg_class long_no_rcx_reg(RDI, RDI_H,
                            RSI, RSI_H,
                            RAX, RAX_H,
                            RDX, RDX_H,
@@ -302,9 +295,8 @@
                            R13, R13_H,
                            R14, R14_H);

-// Class for all long registers except RAX (and RSP)
-reg_class long_no_rax_reg(RBP, RBP_H,
-                          RDX, RDX_H,
+// Class for all long registers except RAX (and RSP, RBP)
+reg_class long_no_rax_reg(RDX, RDX_H,
                            RDI, RDI_H,
                            RSI, RSI_H,
                            RCX, RCX_H,
@@ -325,10 +317,9 @@
  // Singleton class for RDX long register
  reg_class long_rdx_reg(RDX, RDX_H);

-// Class for all int registers (except RSP)
+// Class for all int registers (except RSP and RBP)
  reg_class int_reg(RAX,
                    RDX,
-                  RBP,
                    RDI,
                    RSI,
                    RCX,
@@ -340,10 +331,9 @@
                    R13,
                    R14);

-// Class for all int registers except RCX (and RSP)
+// Class for all int registers except RCX (and RSP, RBP)
  reg_class int_no_rcx_reg(RAX,
                           RDX,
-                         RBP,
                           RDI,
                           RSI,
                           RBX,
@@ -355,8 +345,7 @@
                           R14);

  // Class for all int registers except RAX, RDX (and RSP)
-reg_class int_no_rax_rdx_reg(RBP,
-                             RDI,
+reg_class int_no_rax_rdx_reg(RDI,
                               RSI,
                               RCX,
                               RBX,
@@ -718,6 +707,7 @@
      st->print("# stack bang");
      st->print("\n\t");
      st->print("pushq   rbp\t# Save rbp");
+    // BDG consider: st->print("movq    rbp, rsp\t# ");
      if (framesize) {
        st->print("\n\t");
        st->print("subq    rsp, #%d\t# Create frame",framesize);
--- openjdk8clean/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
  2014-03-04 02:52:11.000000000 +0000
+++ openjdk8/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp    2014-11-07
23:57:11.589593723 +0000
@@ -5236,6 +5236,7 @@
      // We always push rbp, so that on return to interpreter rbp,
will be
      // restored correctly and we can correct the stack.
      push(rbp);
+    mov(rbp, rsp);
      // Remove word for ebp
      framesize -= wordSize;

--- openjdk8clean/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp
  2014-03-04 02:52:10.000000000 +0000
+++ openjdk8/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp
  2014-11-07 23:57:21.933257882 +0000
@@ -358,6 +358,7 @@
    generate_stack_overflow_check(frame_size_in_bytes);

    push(rbp);
+  mov(rbp, rsp);
  #ifdef TIERED
    // c2 leaves fpu stack dirty. Clean it on entry
    if (UseSSE < 2 ) {


Brendan

Reply via email to