People are starting to capture kernel stack traces and produce
Flamegraphs.  Those traces currently include the frames used by
dt(4) itself:

    dt_pcb_ring_get+0x11d
    dt_prov_profile_enter+0x6e
    hardclock+0x1a9
    lapic_clockintr+0x3f
    Xresume_lapic_ltimer+0x26
    acpicpu_idle+0x261          <--- CPU was Idle
    sched_idle+0x225
    proc_trampoline+0x1c
    1

Saving the last 5 frames in the example above consumes CPU time and
require some post-processing to cleanup gathered data.  So the diff
below extends the existing stacktrace_save() into and *_at() version
that takes an arbitrary frame as argument.

The fun start when we ask "How many frames do we skip?".  There are
currently two different contexts in which dt(4) can gather data:
 - in hardlock(9), which mean a specialized interrupt context
 - in any thread or interrupt context

Those two contexts require a different number of frames to reach the
recording function: dt_pcb_ring_get().  Now what's fun is that every
architecture has its own way to reach hardclock(9) (possibly multiple
ones), has its own stack unwinder (with specific bugs) and might use
different compilers with different optimizations (yeah!).

So the diff below introduce two per-arch defines that specify at which
frame the unwinding should start.  This has been tested on sparc64 and
amd64 and here's the result:

    # btrace -e 'tracepoint:sched:sleep { printf("%s1\n", kstack); }'
    sleep_setup+0x16c
    msleep+0x160
    taskq_next_work+0x5c
    taskq_thread+0x38
    0x1013074
    1

Note that the number of frames for hardclock(9) on amd64 assumes lapic
is used.  That's why on vmm(4) you'll still see an extra frame as below:

  #  btrace -e 'profile:hz:99 { printf("%s1\n", kstack); }'          
  Xresume_legacy0+0x1d3
  cpu_idle_cycle+0x1b
  proc_trampoline+0x1c
  1

Whereas on real hardware:

  # btrace -e 'profile:hz:99 { printf("%s1\n", kstack); }' 
  acpicpu_idle+0x1d2
  sched_idle+0x225
  proc_trampoline+0x1c
  1

The diff below will break compiling dt(4) on the other archs because
they don't yet provide stacktrace_save_at(), but it shouldn't be hard
to fix ;)

Ok?

Index: dev/dt/dt_dev.c
===================================================================
RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v
retrieving revision 1.4
diff -u -p -r1.4 dt_dev.c
--- dev/dt/dt_dev.c     4 Feb 2020 10:56:15 -0000       1.4
+++ dev/dt/dt_dev.c     19 Mar 2020 14:08:53 -0000
@@ -26,29 +26,40 @@
 #include <dev/dt/dtvar.h>
 
 /*
- * How many frames are used by the profiling code?  For example
- * on amd64:
+ * The number of frames required to execute dt(4) profiling code
+ * depends on the probe, context, architecture and possibly the
+ * compiler.
  *
- * From syscall provider:
+ * Static probes (tracepoints) are executed in the context of the
+ * current thread and only need to skip frames up to the recording
+ *function.  For example the syscall provider:
  *
  *     dt_prov_syscall_entry+0x141
  *     syscall+0x205           <--- start here
  *     Xsyscall+0x128
  *
- * From profile provider:
+ * Probes executed in their own context, like the profile provider,
+ * need to skip the frames of that context which are different for
+ * every architecture.  For example the profile provider executed
+ * from hardclock(9) on amd64:
  *
  *     dt_prov_profile_enter+0x6e
- *     hardclock+0x12c
- *     clockintr+0x59
- *     intr_handler+0x6e
- *     Xresume_legacy0+0x1d3
- *     cpu_idle_cycle+0x1b     <---- start here.
+ *     hardclock+0x1a9
+ *     lapic_clockintr+0x3f
+ *     Xresume_lapic_ltimer+0x26
+ *     acpicpu_idle+0x1d2      <---- start here.
+ *     sched_idle+0x225
  *     proc_trampoline+0x1c
  */
-#if notyet
-#define DT_HOOK_FRAME_ADDRESS  __builtin_frame_address(4)
+#if defined(__amd64__)
+#define DT_FA_PROFILE  __builtin_frame_address(4)
+#define DT_FA_STATIC   __builtin_frame_address(1)
+#elif defined(__sparc64__)
+#define DT_FA_PROFILE  __builtin_frame_address(4)
+#define DT_FA_STATIC   __builtin_frame_address(0)
 #else
-#define DT_HOOK_FRAME_ADDRESS  __builtin_frame_address(0)
+#define DT_FA_STATIC   __builtin_frame_address(0)
+#define DT_FA_PROFILE  DT_FA_STATIC
 #endif
 
 #define DT_EVTRING_SIZE        16      /* # of slots in per PCB event ring */
@@ -583,7 +594,7 @@ dt_pcb_filter(struct dt_pcb *dp)
  * Get a reference to the next free event state from the ring.
  */
 struct dt_evt *
-dt_pcb_ring_get(struct dt_pcb *dp)
+dt_pcb_ring_get(struct dt_pcb *dp, int profiling)
 {
        struct proc *p = curproc;
        struct dt_evt *dtev;
@@ -617,11 +628,10 @@ dt_pcb_ring_get(struct dt_pcb *dp)
                memcpy(dtev->dtev_comm, p->p_p->ps_comm, DTMAXCOMLEN - 1);
 
        if (ISSET(dp->dp_evtflags, DTEVT_KSTACK|DTEVT_USTACK)) {
-#if notyet
-               stacktrace_save_at(&dtev->dtev_kstack, DT_HOOK_FRAME_ADDRESS);
-#else
-               stacktrace_save(&dtev->dtev_kstack);
-#endif
+               if (profiling)
+                       stacktrace_save_at(&dtev->dtev_kstack, DT_FA_PROFILE);
+               else
+                       stacktrace_save_at(&dtev->dtev_kstack, DT_FA_STATIC);
        }
 
        return dtev;
Index: dev/dt/dt_prov_profile.c
===================================================================
RCS file: /cvs/src/sys/dev/dt/dt_prov_profile.c,v
retrieving revision 1.1
diff -u -p -r1.1 dt_prov_profile.c
--- dev/dt/dt_prov_profile.c    21 Jan 2020 16:16:23 -0000      1.1
+++ dev/dt/dt_prov_profile.c    19 Mar 2020 14:06:01 -0000
@@ -107,7 +107,7 @@ dt_prov_profile_fire(struct dt_pcb *dp)
        if (++dp->dp_nticks < dp->dp_maxtick)
                return;
 
-       dtev = dt_pcb_ring_get(dp);
+       dtev = dt_pcb_ring_get(dp, 1);
        if (dtev == NULL)
                return;
        dt_pcb_ring_consume(dp, dtev);
Index: dev/dt/dt_prov_static.c
===================================================================
RCS file: /cvs/src/sys/dev/dt/dt_prov_static.c,v
retrieving revision 1.1
diff -u -p -r1.1 dt_prov_static.c
--- dev/dt/dt_prov_static.c     21 Jan 2020 16:16:23 -0000      1.1
+++ dev/dt/dt_prov_static.c     19 Mar 2020 14:06:01 -0000
@@ -120,7 +120,7 @@ dt_prov_static_hook(struct dt_provider *
        SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) {
                struct dt_evt *dtev;
 
-               dtev = dt_pcb_ring_get(dp);
+               dtev = dt_pcb_ring_get(dp, 0);
                if (dtev == NULL)
                        continue;
 
Index: dev/dt/dt_prov_syscall.c
===================================================================
RCS file: /cvs/src/sys/dev/dt/dt_prov_syscall.c,v
retrieving revision 1.1
diff -u -p -r1.1 dt_prov_syscall.c
--- dev/dt/dt_prov_syscall.c    21 Jan 2020 16:16:23 -0000      1.1
+++ dev/dt/dt_prov_syscall.c    19 Mar 2020 14:06:01 -0000
@@ -148,7 +148,7 @@ dt_prov_syscall_entry(struct dt_provider
        SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) {
                struct dt_evt *dtev;
 
-               dtev = dt_pcb_ring_get(dp);
+               dtev = dt_pcb_ring_get(dp, 0);
                if (dtev == NULL)
                        continue;
 
@@ -190,7 +190,7 @@ dt_prov_syscall_return(struct dt_provide
        SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) {
                struct dt_evt *dtev;
 
-               dtev = dt_pcb_ring_get(dp);
+               dtev = dt_pcb_ring_get(dp, 0);
                if (dtev == NULL)
                        continue;
 
Index: dev/dt/dtvar.h
===================================================================
RCS file: /cvs/src/sys/dev/dt/dtvar.h,v
retrieving revision 1.1
diff -u -p -r1.1 dtvar.h
--- dev/dt/dtvar.h      21 Jan 2020 16:16:23 -0000      1.1
+++ dev/dt/dtvar.h      19 Mar 2020 14:06:01 -0000
@@ -194,7 +194,7 @@ void                 dt_pcb_free(struct dt_pcb *);
 void            dt_pcb_purge(struct dt_pcb_list *);
 int             dt_pcb_filter(struct dt_pcb *);
 
-struct dt_evt  *dt_pcb_ring_get(struct dt_pcb *);
+struct dt_evt  *dt_pcb_ring_get(struct dt_pcb *, int);
 void            dt_pcb_ring_consume(struct dt_pcb *, struct dt_evt *);
 
 /*
Index: sys/stacktrace.h
===================================================================
RCS file: /cvs/src/sys/sys/stacktrace.h,v
retrieving revision 1.1
diff -u -p -r1.1 stacktrace.h
--- sys/stacktrace.h    20 Jan 2020 15:58:23 -0000      1.1
+++ sys/stacktrace.h    19 Mar 2020 14:06:01 -0000
@@ -28,6 +28,7 @@ struct stacktrace {
 
 #ifdef _KERNEL
 void   stacktrace_print(struct stacktrace *, int (*)(const char *, ...));
+void   stacktrace_save_at(struct stacktrace *, void *);
 void   stacktrace_save(struct stacktrace *);
 #endif
 
Index: arch/amd64/amd64/db_trace.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/db_trace.c,v
retrieving revision 1.49
diff -u -p -r1.49 db_trace.c
--- arch/amd64/amd64/db_trace.c 20 Jan 2020 15:58:23 -0000      1.49
+++ arch/amd64/amd64/db_trace.c 19 Mar 2020 14:06:01 -0000
@@ -256,11 +256,13 @@ db_stack_trace_print(db_expr_t addr, int
 }
 
 void
-stacktrace_save(struct stacktrace *st)
+stacktrace_save_at(struct stacktrace *st, void *xframe)
 {
-       struct callframe *frame, *lastframe;
+       struct callframe *lastframe, *frame = xframe;
+
+       if (frame == NULL)
+               frame = __builtin_frame_address(0);
 
-       frame = __builtin_frame_address(0);
        st->st_count = 0;
        while (st->st_count < STACKTRACE_MAX) {
                st->st_pc[st->st_count++] = frame->f_retaddr;
@@ -275,6 +277,12 @@ stacktrace_save(struct stacktrace *st)
                if (!INKERNEL(frame->f_retaddr))
                        break;
        }
+}
+
+void
+stacktrace_save(struct stacktrace *st)
+{
+       return stacktrace_save_at(st, NULL);
 }
 
 vaddr_t
Index: arch/sparc64/sparc64/db_trace.c
===================================================================
RCS file: /cvs/src/sys/arch/sparc64/sparc64/db_trace.c,v
retrieving revision 1.20
diff -u -p -r1.20 db_trace.c
--- arch/sparc64/sparc64/db_trace.c     20 Jan 2020 15:58:23 -0000      1.20
+++ arch/sparc64/sparc64/db_trace.c     19 Mar 2020 14:06:01 -0000
@@ -155,7 +155,7 @@ db_stack_trace_print(db_expr_t addr, int
 }
 
 void
-stacktrace_save(struct stacktrace *st)
+stacktrace_save_at(struct stacktrace *st, void *xframe)
 {
        struct frame64  *f64;
        vaddr_t         pc;
@@ -163,7 +163,10 @@ stacktrace_save(struct stacktrace *st)
 
        write_all_windows();
 
-       frame = (vaddr_t)__builtin_frame_address(0) - BIAS;
+       if (xframe == NULL)
+               xframe = __builtin_frame_address(0);
+
+       frame = (vaddr_t)xframe - BIAS;
        if ((frame & 1) == 0)
                return;
 
@@ -185,6 +188,11 @@ stacktrace_save(struct stacktrace *st)
        }
 }
 
+void
+stacktrace_save(struct stacktrace *st)
+{
+       return stacktrace_save_at(st, NULL);
+}
 void
 db_dump_window(db_expr_t addr, int have_addr, db_expr_t count, char *modif)
 {

Reply via email to