Re: [PATCH v3 3/3] tracing: format non-nanosec times from tsc clock without a decimal point.

2012-09-25 Thread David Sharp
On Mon, Sep 24, 2012 at 8:27 PM, Steven Rostedt  wrote:
> On Thu, 2012-09-20 at 15:52 -0700, David Sharp wrote:
>> With the addition of the "tsc" clock, formatting timestamps to look like
>> fractional seconds is misleading. Mark clocks as either in nanoseconds or
>> not, and format non-nanosecond timestamps as decimal integers.
>
> I got this:
>
> /work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
> undefined reference to `__umoddi3'
> /work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
> undefined reference to `__udivdi3'
> /work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
> undefined reference to `__umoddi3'
> /work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
> undefined reference to `__umoddi3'
> /work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
> undefined reference to `__udivdi3'
> /work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
> undefined reference to `__umoddi3'
> /work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
> undefined reference to `__udivdi3'
>
> when building for x86_32.
>
>> + int ret;
>> + struct trace_seq *s = >seq;
>> + unsigned long verbose = trace_flags & TRACE_ITER_VERBOSE;
>> + unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS;
>> + unsigned long long abs_ts = iter->ts - iter->tr->time_start;
>> + unsigned long long rel_ts = next_ts - iter->ts;
>> + unsigned long mark_thresh;
>> +
>> + if (in_ns) {
>> + abs_ts = ns2usecs(abs_ts);
>> + rel_ts = ns2usecs(rel_ts);
>> + mark_thresh = preempt_mark_thresh_us;
>> + } else
>> + mark_thresh = preempt_mark_thresh_cycles;
>> +
>> + if (verbose && in_ns) {
>> + ret = trace_seq_printf(
>> + s, "[%08llx] %lld.%03lldms (+%lld.%03lldms): ",
>> + ns2usecs(iter->ts),
>> + abs_ts / USEC_PER_MSEC,
>> + abs_ts % USEC_PER_MSEC,
>> + rel_ts / USEC_PER_MSEC,
>
> You can't divide 64 bit numbers in the kernel. It breaks on 32bit archs.

I didn't realize that. Sorry. Not sure why I changed them to long long
in the first place.

Although, that does mean that it is currently overflowing every 4.29
seconds. I may as well fix that.

>
> -- Steve
>
>> + rel_ts % USEC_PER_MSEC);
>> + } else if (verbose && !in_ns) {
>> + ret = trace_seq_printf(
>> + s, "[%016llx] %lld (+%lld): ",
>> + iter->ts, abs_ts, rel_ts);
>> + } else { /* !verbose */
>> + ret = trace_seq_printf(
>> + s, " %4lld%s%c: ",
>> + abs_ts,
>> + in_ns ? "us" : "",
>> + rel_ts > mark_thresh ? '!' :
>> +   rel_ts > 1 ? '+' : ' ');
>> + }
>> + return ret;
>>  }
>>
>>  int trace_print_context(struct trace_iterator *iter)
>>  {
>>   struct trace_seq *s = >seq;
>>   struct trace_entry *entry = iter->ent;
>> - unsigned long long t = ns2usecs(iter->ts);
>> - unsigned long usec_rem = do_div(t, USEC_PER_SEC);
>> - unsigned long secs = (unsigned long)t;
>> + unsigned long long t;
>> + unsigned long secs, usec_rem;
>>   char comm[TASK_COMM_LEN];
>>   int ret;
>>
>> @@ -644,8 +677,13 @@ int trace_print_context(struct trace_iterator *iter)
>>   return 0;
>>   }
>>
>> - return trace_seq_printf(s, " %5lu.%06lu: ",
>> - secs, usec_rem);
>> + if (iter->iter_flags & TRACE_FILE_TIME_IN_NS) {
>> + t = ns2usecs(iter->ts);
>> + usec_rem = do_div(t, USEC_PER_SEC);
>> + secs = (unsigned long)t;
>> + return trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
>> + } else
>> + return trace_seq_printf(s, "%12llu: ", iter->ts);
>>  }
>>
>>  int trace_print_lat_context(struct trace_iterator *iter)
>> @@ -659,36 +697,30 @@ int trace_print_lat_context(struct trace_iterator 
>> *iter)
>>  *next_entry = trace_find_next_entry(iter, NULL,
>>  _ts);
>>   unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
>> - unsigned long abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
>> - unsigned long rel_usecs;
>> +
>>
>>   /* Restore the original ent_size */
>>   iter->ent_size = ent_size;
>>
>>   if (!next_entry)
>>   next_ts = iter->ts;
>> - rel_usecs = ns2usecs(next_ts - iter->ts);
>>
>>   if (verbose) {
>>   char comm[TASK_COMM_LEN];
>>
>>   trace_find_cmdline(entry->pid, comm);
>>
>> - ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx 

Re: [PATCH v3 3/3] tracing: format non-nanosec times from tsc clock without a decimal point.

2012-09-25 Thread David Sharp
On Mon, Sep 24, 2012 at 8:27 PM, Steven Rostedt rost...@goodmis.org wrote:
 On Thu, 2012-09-20 at 15:52 -0700, David Sharp wrote:
 With the addition of the tsc clock, formatting timestamps to look like
 fractional seconds is misleading. Mark clocks as either in nanoseconds or
 not, and format non-nanosecond timestamps as decimal integers.

 I got this:

 /work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
 undefined reference to `__umoddi3'
 /work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
 undefined reference to `__udivdi3'
 /work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
 undefined reference to `__umoddi3'
 /work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
 undefined reference to `__umoddi3'
 /work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
 undefined reference to `__udivdi3'
 /work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
 undefined reference to `__umoddi3'
 /work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
 undefined reference to `__udivdi3'

 when building for x86_32.

 + int ret;
 + struct trace_seq *s = iter-seq;
 + unsigned long verbose = trace_flags  TRACE_ITER_VERBOSE;
 + unsigned long in_ns = iter-iter_flags  TRACE_FILE_TIME_IN_NS;
 + unsigned long long abs_ts = iter-ts - iter-tr-time_start;
 + unsigned long long rel_ts = next_ts - iter-ts;
 + unsigned long mark_thresh;
 +
 + if (in_ns) {
 + abs_ts = ns2usecs(abs_ts);
 + rel_ts = ns2usecs(rel_ts);
 + mark_thresh = preempt_mark_thresh_us;
 + } else
 + mark_thresh = preempt_mark_thresh_cycles;
 +
 + if (verbose  in_ns) {
 + ret = trace_seq_printf(
 + s, [%08llx] %lld.%03lldms (+%lld.%03lldms): ,
 + ns2usecs(iter-ts),
 + abs_ts / USEC_PER_MSEC,
 + abs_ts % USEC_PER_MSEC,
 + rel_ts / USEC_PER_MSEC,

 You can't divide 64 bit numbers in the kernel. It breaks on 32bit archs.

I didn't realize that. Sorry. Not sure why I changed them to long long
in the first place.

Although, that does mean that it is currently overflowing every 4.29
seconds. I may as well fix that.


 -- Steve

 + rel_ts % USEC_PER_MSEC);
 + } else if (verbose  !in_ns) {
 + ret = trace_seq_printf(
 + s, [%016llx] %lld (+%lld): ,
 + iter-ts, abs_ts, rel_ts);
 + } else { /* !verbose */
 + ret = trace_seq_printf(
 + s,  %4lld%s%c: ,
 + abs_ts,
 + in_ns ? us : ,
 + rel_ts  mark_thresh ? '!' :
 +   rel_ts  1 ? '+' : ' ');
 + }
 + return ret;
  }

  int trace_print_context(struct trace_iterator *iter)
  {
   struct trace_seq *s = iter-seq;
   struct trace_entry *entry = iter-ent;
 - unsigned long long t = ns2usecs(iter-ts);
 - unsigned long usec_rem = do_div(t, USEC_PER_SEC);
 - unsigned long secs = (unsigned long)t;
 + unsigned long long t;
 + unsigned long secs, usec_rem;
   char comm[TASK_COMM_LEN];
   int ret;

 @@ -644,8 +677,13 @@ int trace_print_context(struct trace_iterator *iter)
   return 0;
   }

 - return trace_seq_printf(s,  %5lu.%06lu: ,
 - secs, usec_rem);
 + if (iter-iter_flags  TRACE_FILE_TIME_IN_NS) {
 + t = ns2usecs(iter-ts);
 + usec_rem = do_div(t, USEC_PER_SEC);
 + secs = (unsigned long)t;
 + return trace_seq_printf(s, %5lu.%06lu: , secs, usec_rem);
 + } else
 + return trace_seq_printf(s, %12llu: , iter-ts);
  }

  int trace_print_lat_context(struct trace_iterator *iter)
 @@ -659,36 +697,30 @@ int trace_print_lat_context(struct trace_iterator 
 *iter)
  *next_entry = trace_find_next_entry(iter, NULL,
  next_ts);
   unsigned long verbose = (trace_flags  TRACE_ITER_VERBOSE);
 - unsigned long abs_usecs = ns2usecs(iter-ts - iter-tr-time_start);
 - unsigned long rel_usecs;
 +

   /* Restore the original ent_size */
   iter-ent_size = ent_size;

   if (!next_entry)
   next_ts = iter-ts;
 - rel_usecs = ns2usecs(next_ts - iter-ts);

   if (verbose) {
   char comm[TASK_COMM_LEN];

   trace_find_cmdline(entry-pid, comm);

 - ret = trace_seq_printf(s, %16s %5d %3d %d %08x %08lx [%08llx]
 - %ld.%03ldms (+%ld.%03ldms): , comm,
 -entry-pid, iter-cpu, entry-flags,
 -entry-preempt_count, iter-idx,
 - 

Re: [PATCH v3 3/3] tracing: format non-nanosec times from tsc clock without a decimal point.

2012-09-24 Thread Steven Rostedt
On Thu, 2012-09-20 at 15:52 -0700, David Sharp wrote:
> With the addition of the "tsc" clock, formatting timestamps to look like
> fractional seconds is misleading. Mark clocks as either in nanoseconds or
> not, and format non-nanosecond timestamps as decimal integers.

I got this:

/work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
undefined reference to `__umoddi3'
/work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
undefined reference to `__udivdi3'
/work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
undefined reference to `__umoddi3'
/work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
undefined reference to `__umoddi3'
/work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
undefined reference to `__udivdi3'
/work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
undefined reference to `__umoddi3'
/work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
undefined reference to `__udivdi3'

when building for x86_32.

> + int ret;
> + struct trace_seq *s = >seq;
> + unsigned long verbose = trace_flags & TRACE_ITER_VERBOSE;
> + unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS;
> + unsigned long long abs_ts = iter->ts - iter->tr->time_start;
> + unsigned long long rel_ts = next_ts - iter->ts;
> + unsigned long mark_thresh;
> +
> + if (in_ns) {
> + abs_ts = ns2usecs(abs_ts);
> + rel_ts = ns2usecs(rel_ts);
> + mark_thresh = preempt_mark_thresh_us;
> + } else
> + mark_thresh = preempt_mark_thresh_cycles;
> +
> + if (verbose && in_ns) {
> + ret = trace_seq_printf(
> + s, "[%08llx] %lld.%03lldms (+%lld.%03lldms): ",
> + ns2usecs(iter->ts),
> + abs_ts / USEC_PER_MSEC,
> + abs_ts % USEC_PER_MSEC,
> + rel_ts / USEC_PER_MSEC,

You can't divide 64 bit numbers in the kernel. It breaks on 32bit archs.

-- Steve

> + rel_ts % USEC_PER_MSEC);
> + } else if (verbose && !in_ns) {
> + ret = trace_seq_printf(
> + s, "[%016llx] %lld (+%lld): ",
> + iter->ts, abs_ts, rel_ts);
> + } else { /* !verbose */
> + ret = trace_seq_printf(
> + s, " %4lld%s%c: ",
> + abs_ts,
> + in_ns ? "us" : "",
> + rel_ts > mark_thresh ? '!' :
> +   rel_ts > 1 ? '+' : ' ');
> + }
> + return ret;
>  }
>  
>  int trace_print_context(struct trace_iterator *iter)
>  {
>   struct trace_seq *s = >seq;
>   struct trace_entry *entry = iter->ent;
> - unsigned long long t = ns2usecs(iter->ts);
> - unsigned long usec_rem = do_div(t, USEC_PER_SEC);
> - unsigned long secs = (unsigned long)t;
> + unsigned long long t;
> + unsigned long secs, usec_rem;
>   char comm[TASK_COMM_LEN];
>   int ret;
>  
> @@ -644,8 +677,13 @@ int trace_print_context(struct trace_iterator *iter)
>   return 0;
>   }
>  
> - return trace_seq_printf(s, " %5lu.%06lu: ",
> - secs, usec_rem);
> + if (iter->iter_flags & TRACE_FILE_TIME_IN_NS) {
> + t = ns2usecs(iter->ts);
> + usec_rem = do_div(t, USEC_PER_SEC);
> + secs = (unsigned long)t;
> + return trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
> + } else
> + return trace_seq_printf(s, "%12llu: ", iter->ts);
>  }
>  
>  int trace_print_lat_context(struct trace_iterator *iter)
> @@ -659,36 +697,30 @@ int trace_print_lat_context(struct trace_iterator *iter)
>  *next_entry = trace_find_next_entry(iter, NULL,
>  _ts);
>   unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
> - unsigned long abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
> - unsigned long rel_usecs;
> +
>  
>   /* Restore the original ent_size */
>   iter->ent_size = ent_size;
>  
>   if (!next_entry)
>   next_ts = iter->ts;
> - rel_usecs = ns2usecs(next_ts - iter->ts);
>  
>   if (verbose) {
>   char comm[TASK_COMM_LEN];
>  
>   trace_find_cmdline(entry->pid, comm);
>  
> - ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08llx]"
> -" %ld.%03ldms (+%ld.%03ldms): ", comm,
> -entry->pid, iter->cpu, entry->flags,
> -entry->preempt_count, iter->idx,
> -ns2usecs(iter->ts),
> -abs_usecs / USEC_PER_MSEC,
> -

Re: [PATCH v3 3/3] tracing: format non-nanosec times from tsc clock without a decimal point.

2012-09-24 Thread Steven Rostedt
On Thu, 2012-09-20 at 15:52 -0700, David Sharp wrote:
 With the addition of the tsc clock, formatting timestamps to look like
 fractional seconds is misleading. Mark clocks as either in nanoseconds or
 not, and format non-nanosecond timestamps as decimal integers.

I got this:

/work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
undefined reference to `__umoddi3'
/work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
undefined reference to `__udivdi3'
/work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
undefined reference to `__umoddi3'
/work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
undefined reference to `__umoddi3'
/work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
undefined reference to `__udivdi3'
/work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
undefined reference to `__umoddi3'
/work/autotest/nobackup/linux-test.git/kernel/trace/trace_output.c:636: 
undefined reference to `__udivdi3'

when building for x86_32.

 + int ret;
 + struct trace_seq *s = iter-seq;
 + unsigned long verbose = trace_flags  TRACE_ITER_VERBOSE;
 + unsigned long in_ns = iter-iter_flags  TRACE_FILE_TIME_IN_NS;
 + unsigned long long abs_ts = iter-ts - iter-tr-time_start;
 + unsigned long long rel_ts = next_ts - iter-ts;
 + unsigned long mark_thresh;
 +
 + if (in_ns) {
 + abs_ts = ns2usecs(abs_ts);
 + rel_ts = ns2usecs(rel_ts);
 + mark_thresh = preempt_mark_thresh_us;
 + } else
 + mark_thresh = preempt_mark_thresh_cycles;
 +
 + if (verbose  in_ns) {
 + ret = trace_seq_printf(
 + s, [%08llx] %lld.%03lldms (+%lld.%03lldms): ,
 + ns2usecs(iter-ts),
 + abs_ts / USEC_PER_MSEC,
 + abs_ts % USEC_PER_MSEC,
 + rel_ts / USEC_PER_MSEC,

You can't divide 64 bit numbers in the kernel. It breaks on 32bit archs.

-- Steve

 + rel_ts % USEC_PER_MSEC);
 + } else if (verbose  !in_ns) {
 + ret = trace_seq_printf(
 + s, [%016llx] %lld (+%lld): ,
 + iter-ts, abs_ts, rel_ts);
 + } else { /* !verbose */
 + ret = trace_seq_printf(
 + s,  %4lld%s%c: ,
 + abs_ts,
 + in_ns ? us : ,
 + rel_ts  mark_thresh ? '!' :
 +   rel_ts  1 ? '+' : ' ');
 + }
 + return ret;
  }
  
  int trace_print_context(struct trace_iterator *iter)
  {
   struct trace_seq *s = iter-seq;
   struct trace_entry *entry = iter-ent;
 - unsigned long long t = ns2usecs(iter-ts);
 - unsigned long usec_rem = do_div(t, USEC_PER_SEC);
 - unsigned long secs = (unsigned long)t;
 + unsigned long long t;
 + unsigned long secs, usec_rem;
   char comm[TASK_COMM_LEN];
   int ret;
  
 @@ -644,8 +677,13 @@ int trace_print_context(struct trace_iterator *iter)
   return 0;
   }
  
 - return trace_seq_printf(s,  %5lu.%06lu: ,
 - secs, usec_rem);
 + if (iter-iter_flags  TRACE_FILE_TIME_IN_NS) {
 + t = ns2usecs(iter-ts);
 + usec_rem = do_div(t, USEC_PER_SEC);
 + secs = (unsigned long)t;
 + return trace_seq_printf(s, %5lu.%06lu: , secs, usec_rem);
 + } else
 + return trace_seq_printf(s, %12llu: , iter-ts);
  }
  
  int trace_print_lat_context(struct trace_iterator *iter)
 @@ -659,36 +697,30 @@ int trace_print_lat_context(struct trace_iterator *iter)
  *next_entry = trace_find_next_entry(iter, NULL,
  next_ts);
   unsigned long verbose = (trace_flags  TRACE_ITER_VERBOSE);
 - unsigned long abs_usecs = ns2usecs(iter-ts - iter-tr-time_start);
 - unsigned long rel_usecs;
 +
  
   /* Restore the original ent_size */
   iter-ent_size = ent_size;
  
   if (!next_entry)
   next_ts = iter-ts;
 - rel_usecs = ns2usecs(next_ts - iter-ts);
  
   if (verbose) {
   char comm[TASK_COMM_LEN];
  
   trace_find_cmdline(entry-pid, comm);
  
 - ret = trace_seq_printf(s, %16s %5d %3d %d %08x %08lx [%08llx]
 - %ld.%03ldms (+%ld.%03ldms): , comm,
 -entry-pid, iter-cpu, entry-flags,
 -entry-preempt_count, iter-idx,
 -ns2usecs(iter-ts),
 -abs_usecs / USEC_PER_MSEC,
 -abs_usecs % USEC_PER_MSEC,
 -rel_usecs / USEC_PER_MSEC,
 -   

[PATCH v3 3/3] tracing: format non-nanosec times from tsc clock without a decimal point.

2012-09-20 Thread David Sharp
With the addition of the "tsc" clock, formatting timestamps to look like
fractional seconds is misleading. Mark clocks as either in nanoseconds or
not, and format non-nanosecond timestamps as decimal integers.

Tested:
$ cd /sys/kernel/debug/tracing/
$ cat trace_clock
[local] global tsc
$ echo sched_switch > set_event
$ echo 1 > tracing_enabled ; sleep 0.0005 ; echo 0 > tracing_enabled
$ cat trace
  -0 [000]  6330.52: sched_switch: prev_comm=swapper 
prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=29964 
next_prio=120
   sleep-29964 [000]  6330.555628: sched_switch: prev_comm=bash 
prev_pid=29964 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 
next_prio=120
  ...
$ echo 1 > options/latency-format
$ cat trace
  -0   0 4104553247us+: sched_switch: prev_comm=swapper prev_pid=0 
prev_prio=120 prev_state=R ==> next_comm=bash next_pid=29964 next_prio=120
   sleep-29964   0 4104553322us+: sched_switch: prev_comm=bash prev_pid=29964 
prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120
  ...
$ echo tsc > trace_clock
$ cat trace
$ echo 1 > tracing_enabled ; sleep 0.0005 ; echo 0 > tracing_enabled
$ echo 0 > options/latency-format
$ cat trace
  -0 [000] 16490053398357: sched_switch: prev_comm=swapper 
prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=31128 
next_prio=120
   sleep-31128 [000] 16490053588518: sched_switch: prev_comm=bash 
prev_pid=31128 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 
next_prio=120
  ...
echo 1 > options/latency-format
$ cat trace
  -0   0 91557653238+: sched_switch: prev_comm=swapper prev_pid=0 
prev_prio=120 prev_state=R ==> next_comm=bash next_pid=31128 next_prio=120
   sleep-31128   0 91557843399+: sched_switch: prev_comm=bash prev_pid=31128 
prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120
  ...

v2:
Move arch-specific bits out of generic code.

Google-Bug-Id: 6980623
Signed-off-by: David Sharp 
Cc: Steven Rostedt 
Cc: Masami Hiramatsu 
---
 arch/x86/include/asm/trace_clock.h |2 +-
 include/linux/ftrace_event.h   |6 +++
 kernel/trace/trace.c   |   15 +-
 kernel/trace/trace.h   |4 --
 kernel/trace/trace_output.c|   82 +---
 5 files changed, 76 insertions(+), 33 deletions(-)

diff --git a/arch/x86/include/asm/trace_clock.h 
b/arch/x86/include/asm/trace_clock.h
index 7ee0d8c..45e17f5 100644
--- a/arch/x86/include/asm/trace_clock.h
+++ b/arch/x86/include/asm/trace_clock.h
@@ -9,7 +9,7 @@
 extern u64 notrace trace_clock_x86_tsc(void);
 
 # define ARCH_TRACE_CLOCKS \
-   { trace_clock_x86_tsc,  "x86-tsc" },
+   { trace_clock_x86_tsc,  "x86-tsc",  .in_ns = 0 },
 
 #endif
 
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 642928c..c760670 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -86,6 +86,12 @@ struct trace_iterator {
cpumask_var_t   started;
 };
 
+enum trace_iter_flags {
+   TRACE_FILE_LAT_FMT  = 1,
+   TRACE_FILE_ANNOTATE = 2,
+   TRACE_FILE_TIME_IN_NS   = 4,
+};
+
 
 struct trace_event;
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4e26df3..3fe4c5b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -476,10 +476,11 @@ static const char *trace_options[] = {
 static struct {
u64 (*func)(void);
const char *name;
+   int in_ns; /* is this clock in nanoseconds? */
 } trace_clocks[] = {
-   { trace_clock_local,"local" },
-   { trace_clock_global,   "global" },
-   { trace_clock_counter,  "counter" },
+   { trace_clock_local,"local",1 },
+   { trace_clock_global,   "global",   1 },
+   { trace_clock_counter,  "counter",  0 },
ARCH_TRACE_CLOCKS
 };
 
@@ -2425,6 +2426,10 @@ __tracing_open(struct inode *inode, struct file *file)
if (ring_buffer_overruns(iter->tr->buffer))
iter->iter_flags |= TRACE_FILE_ANNOTATE;
 
+   /* Output in nanoseconds only if we are using a clock in nanoseconds. */
+   if (trace_clocks[trace_clock_id].in_ns)
+   iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
+
/* stop the trace while dumping */
tracing_stop();
 
@@ -3324,6 +3329,10 @@ static int tracing_open_pipe(struct inode *inode, struct 
file *filp)
if (trace_flags & TRACE_ITER_LATENCY_FMT)
iter->iter_flags |= TRACE_FILE_LAT_FMT;
 
+   /* Output in nanoseconds only if we are using a clock in nanoseconds. */
+   if (trace_clocks[trace_clock_id].in_ns)
+   iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
+
iter->cpu_file = cpu_file;
iter->tr = _trace;
mutex_init(>mutex);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 55e1f7f..84fefed 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -409,10 +409,6 @@ void 

[PATCH v3 3/3] tracing: format non-nanosec times from tsc clock without a decimal point.

2012-09-20 Thread David Sharp
With the addition of the tsc clock, formatting timestamps to look like
fractional seconds is misleading. Mark clocks as either in nanoseconds or
not, and format non-nanosecond timestamps as decimal integers.

Tested:
$ cd /sys/kernel/debug/tracing/
$ cat trace_clock
[local] global tsc
$ echo sched_switch  set_event
$ echo 1  tracing_enabled ; sleep 0.0005 ; echo 0  tracing_enabled
$ cat trace
  idle-0 [000]  6330.52: sched_switch: prev_comm=swapper 
prev_pid=0 prev_prio=120 prev_state=R == next_comm=bash next_pid=29964 
next_prio=120
   sleep-29964 [000]  6330.555628: sched_switch: prev_comm=bash 
prev_pid=29964 prev_prio=120 prev_state=S == next_comm=swapper next_pid=0 
next_prio=120
  ...
$ echo 1  options/latency-format
$ cat trace
  idle-0   0 4104553247us+: sched_switch: prev_comm=swapper prev_pid=0 
prev_prio=120 prev_state=R == next_comm=bash next_pid=29964 next_prio=120
   sleep-29964   0 4104553322us+: sched_switch: prev_comm=bash prev_pid=29964 
prev_prio=120 prev_state=S == next_comm=swapper next_pid=0 next_prio=120
  ...
$ echo tsc  trace_clock
$ cat trace
$ echo 1  tracing_enabled ; sleep 0.0005 ; echo 0  tracing_enabled
$ echo 0  options/latency-format
$ cat trace
  idle-0 [000] 16490053398357: sched_switch: prev_comm=swapper 
prev_pid=0 prev_prio=120 prev_state=R == next_comm=bash next_pid=31128 
next_prio=120
   sleep-31128 [000] 16490053588518: sched_switch: prev_comm=bash 
prev_pid=31128 prev_prio=120 prev_state=S == next_comm=swapper next_pid=0 
next_prio=120
  ...
echo 1  options/latency-format
$ cat trace
  idle-0   0 91557653238+: sched_switch: prev_comm=swapper prev_pid=0 
prev_prio=120 prev_state=R == next_comm=bash next_pid=31128 next_prio=120
   sleep-31128   0 91557843399+: sched_switch: prev_comm=bash prev_pid=31128 
prev_prio=120 prev_state=S == next_comm=swapper next_pid=0 next_prio=120
  ...

v2:
Move arch-specific bits out of generic code.

Google-Bug-Id: 6980623
Signed-off-by: David Sharp dhsh...@google.com
Cc: Steven Rostedt rost...@goodmis.org
Cc: Masami Hiramatsu masami.hiramatsu...@hitachi.com
---
 arch/x86/include/asm/trace_clock.h |2 +-
 include/linux/ftrace_event.h   |6 +++
 kernel/trace/trace.c   |   15 +-
 kernel/trace/trace.h   |4 --
 kernel/trace/trace_output.c|   82 +---
 5 files changed, 76 insertions(+), 33 deletions(-)

diff --git a/arch/x86/include/asm/trace_clock.h 
b/arch/x86/include/asm/trace_clock.h
index 7ee0d8c..45e17f5 100644
--- a/arch/x86/include/asm/trace_clock.h
+++ b/arch/x86/include/asm/trace_clock.h
@@ -9,7 +9,7 @@
 extern u64 notrace trace_clock_x86_tsc(void);
 
 # define ARCH_TRACE_CLOCKS \
-   { trace_clock_x86_tsc,  x86-tsc },
+   { trace_clock_x86_tsc,  x86-tsc,  .in_ns = 0 },
 
 #endif
 
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 642928c..c760670 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -86,6 +86,12 @@ struct trace_iterator {
cpumask_var_t   started;
 };
 
+enum trace_iter_flags {
+   TRACE_FILE_LAT_FMT  = 1,
+   TRACE_FILE_ANNOTATE = 2,
+   TRACE_FILE_TIME_IN_NS   = 4,
+};
+
 
 struct trace_event;
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4e26df3..3fe4c5b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -476,10 +476,11 @@ static const char *trace_options[] = {
 static struct {
u64 (*func)(void);
const char *name;
+   int in_ns; /* is this clock in nanoseconds? */
 } trace_clocks[] = {
-   { trace_clock_local,local },
-   { trace_clock_global,   global },
-   { trace_clock_counter,  counter },
+   { trace_clock_local,local,1 },
+   { trace_clock_global,   global,   1 },
+   { trace_clock_counter,  counter,  0 },
ARCH_TRACE_CLOCKS
 };
 
@@ -2425,6 +2426,10 @@ __tracing_open(struct inode *inode, struct file *file)
if (ring_buffer_overruns(iter-tr-buffer))
iter-iter_flags |= TRACE_FILE_ANNOTATE;
 
+   /* Output in nanoseconds only if we are using a clock in nanoseconds. */
+   if (trace_clocks[trace_clock_id].in_ns)
+   iter-iter_flags |= TRACE_FILE_TIME_IN_NS;
+
/* stop the trace while dumping */
tracing_stop();
 
@@ -3324,6 +3329,10 @@ static int tracing_open_pipe(struct inode *inode, struct 
file *filp)
if (trace_flags  TRACE_ITER_LATENCY_FMT)
iter-iter_flags |= TRACE_FILE_LAT_FMT;
 
+   /* Output in nanoseconds only if we are using a clock in nanoseconds. */
+   if (trace_clocks[trace_clock_id].in_ns)
+   iter-iter_flags |= TRACE_FILE_TIME_IN_NS;
+
iter-cpu_file = cpu_file;
iter-tr = global_trace;
mutex_init(iter-mutex);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 55e1f7f..84fefed 100644
--- a/kernel/trace/trace.h
+++