Re: [PATCH v4 3/3] tracing: format non-nanosec times from tsc clock without a decimal point.

2012-09-26 Thread David Sharp
On Wed, Sep 26, 2012 at 11:30 AM, Steven Rostedt  wrote:
> On Tue, 2012-09-25 at 15:29 -0700, David Sharp wrote:
>
>>  static int
>> -lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
>> - unsigned long rel_usecs)
>> +lat_print_timestamp(struct trace_iterator *iter, u64 next_ts)
>>  {
>> - return trace_seq_printf(s, " %4lldus%c: ", abs_usecs,
>> - rel_usecs > preempt_mark_thresh ? '!' :
>> -   rel_usecs > 1 ? '+' : ' ');
>> + unsigned long verbose = trace_flags & TRACE_ITER_VERBOSE;
>> + unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS;
>> + unsigned long long abs_ts = iter->ts - iter->tr->time_start;
>> + unsigned long long rel_ts = next_ts - iter->ts;
>> + struct trace_seq *s = &iter->seq;
>> + unsigned long mark_thresh;
>> + int ret;
>> +
>> + if (in_ns) {
>> + abs_ts = ns2usecs(abs_ts);
>> + rel_ts = ns2usecs(rel_ts);
>> + mark_thresh = preempt_mark_thresh_us;
>> + } else
>> + mark_thresh = preempt_mark_thresh_cycles;
>> +
>> + if (verbose && in_ns) {
>> + unsigned long abs_msec = abs_ts;
>> + unsigned long abs_usec = do_div(abs_msec, USEC_PER_MSEC);
>> + unsigned long rel_msec = rel_ts;
>> + unsigned long rel_usec = do_div(rel_msec, USEC_PER_MSEC);
>> +
>> + ret = trace_seq_printf(
>> + s, "[%08llx] %ld.%03ldms (+%ld.%03ldms): ",
>> + ns2usecs(iter->ts),
>> + abs_msec, abs_usec,
>> + rel_msec, rel_usec);
>> + } else if (verbose && !in_ns) {
>> + ret = trace_seq_printf(
>> + s, "[%016llx] %lld (+%lld): ",
>> + iter->ts, abs_ts, rel_ts);
>> + } else { /* !verbose */
>> + ret = trace_seq_printf(
>> + s, " %4lld%s%c: ",
>> + abs_ts,
>> + in_ns ? "us" : "",
>> + rel_ts > mark_thresh ? '!' :
>> +   rel_ts > 1 ? '+' : ' ');
>
> I still think the annotations are meaningless for counters. Even if the
> counter is a timer like the tsc, as it does not coincide with real time
> (us), I say just don't print it for counters.

Sounds fine to me, as that's the easiest modification anyway. Done.

>
> This is not helpful:
>
> 

Agreed. The other way to fix it would have been to change the "+"
threshold from 1 to ~2000.

>
>
>> + }
>> + return ret;
>>  }
>>
>>  int trace_print_context(struct trace_iterator *iter)
>>  {
>>   struct trace_seq *s = &iter->seq;
>>   struct trace_entry *entry = iter->ent;
>> - unsigned long long t = ns2usecs(iter->ts);
>> - unsigned long usec_rem = do_div(t, USEC_PER_SEC);
>> - unsigned long secs = (unsigned long)t;
>> + unsigned long long t;
>> + unsigned long secs, usec_rem;
>>   char comm[TASK_COMM_LEN];
>>   int ret;
>>
>> @@ -644,8 +680,13 @@ int trace_print_context(struct trace_iterator *iter)
>>   return 0;
>>   }
>>
>> - return trace_seq_printf(s, " %5lu.%06lu: ",
>> - secs, usec_rem);
>> + if (iter->iter_flags & TRACE_FILE_TIME_IN_NS) {
>> + t = ns2usecs(iter->ts);
>> + usec_rem = do_div(t, USEC_PER_SEC);
>> + secs = (unsigned long)t;
>> + return trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
>
> You took away the space before %5.
>
>> + } else
>> + return trace_seq_printf(s, "%12llu: ", iter->ts);
>
> here too.

Don't know why I would have changed that... Fixed.

>>  }
>>
>>  int trace_print_lat_context(struct trace_iterator *iter)
>> @@ -659,36 +700,29 @@ int trace_print_lat_context(struct trace_iterator 
>> *iter)
>>  *next_entry = trace_find_next_entry(iter, NULL,
>>  &next_ts);
>>   unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
>> - unsigned long abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
>> - unsigned long rel_usecs;
>>
>>   /* Restore the original ent_size */
>>   iter->ent_size = ent_size;
>>
>>   if (!next_entry)
>>   next_ts = iter->ts;
>> - rel_usecs = ns2usecs(next_ts - iter->ts);
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v4 3/3] tracing: format non-nanosec times from tsc clock without a decimal point.

2012-09-26 Thread Steven Rostedt
On Tue, 2012-09-25 at 15:29 -0700, David Sharp wrote:

>  static int
> -lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
> - unsigned long rel_usecs)
> +lat_print_timestamp(struct trace_iterator *iter, u64 next_ts)
>  {
> - return trace_seq_printf(s, " %4lldus%c: ", abs_usecs,
> - rel_usecs > preempt_mark_thresh ? '!' :
> -   rel_usecs > 1 ? '+' : ' ');
> + unsigned long verbose = trace_flags & TRACE_ITER_VERBOSE;
> + unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS;
> + unsigned long long abs_ts = iter->ts - iter->tr->time_start;
> + unsigned long long rel_ts = next_ts - iter->ts;
> + struct trace_seq *s = &iter->seq;
> + unsigned long mark_thresh;
> + int ret;
> +
> + if (in_ns) {
> + abs_ts = ns2usecs(abs_ts);
> + rel_ts = ns2usecs(rel_ts);
> + mark_thresh = preempt_mark_thresh_us;
> + } else
> + mark_thresh = preempt_mark_thresh_cycles;
> +
> + if (verbose && in_ns) {
> + unsigned long abs_msec = abs_ts;
> + unsigned long abs_usec = do_div(abs_msec, USEC_PER_MSEC);
> + unsigned long rel_msec = rel_ts;
> + unsigned long rel_usec = do_div(rel_msec, USEC_PER_MSEC);
> +
> + ret = trace_seq_printf(
> + s, "[%08llx] %ld.%03ldms (+%ld.%03ldms): ",
> + ns2usecs(iter->ts),
> + abs_msec, abs_usec,
> + rel_msec, rel_usec);
> + } else if (verbose && !in_ns) {
> + ret = trace_seq_printf(
> + s, "[%016llx] %lld (+%lld): ",
> + iter->ts, abs_ts, rel_ts);
> + } else { /* !verbose */
> + ret = trace_seq_printf(
> + s, " %4lld%s%c: ",
> + abs_ts,
> + in_ns ? "us" : "",
> + rel_ts > mark_thresh ? '!' :
> +   rel_ts > 1 ? '+' : ' ');

I still think the annotations are meaningless for counters. Even if the
counter is a timer like the tsc, as it does not coincide with real time
(us), I say just don't print it for counters.

This is not helpful:

  -0   0d..1  168+: trace_hardirqs_off_thunk <-apic_timer_interrupt
  -0   0d..1  672+: smp_apic_timer_interrupt <-apic_timer_interrupt
  -0   0d..1 1224+: irq_enter <-smp_apic_timer_interrupt
  -0   0d..1 1720+: rcu_irq_enter <-irq_enter
  -0   0d..1 2328+: rcu_idle_exit_common.isra.38 <-rcu_irq_enter
  -0   0d..1 3016+: local_bh_disable <-irq_enter
  -0   0d..1 3512+: __local_bh_disable <-irq_enter
  -0   0d.s1 4120+: tick_check_idle <-irq_enter
  -0   0d.s1 4632+: tick_check_oneshot_broadcast <-tick_check_idle
  -0   0d.s1 5296+: ktime_get <-tick_check_idle
  -0   0d.s1 6104+: tick_nohz_stop_idle <-tick_check_idle
  -0   0d.s1 6616+: update_ts_time_stats <-tick_nohz_stop_idle
  -0   0d.s1 7144+: nr_iowait_cpu <-update_ts_time_stats
  -0   0d.s1 7720+: ktime_get <-sched_clock_tick
  -0   0d.s1 8480+: touch_softlockup_watchdog <-tick_check_idle
  -0   0d.s1 9120+: tick_do_update_jiffies64 <-tick_check_idle
  -0   0d.s1 9648+: _raw_spin_lock <-tick_do_update_jiffies64


> + }
> + return ret;
>  }
>  
>  int trace_print_context(struct trace_iterator *iter)
>  {
>   struct trace_seq *s = &iter->seq;
>   struct trace_entry *entry = iter->ent;
> - unsigned long long t = ns2usecs(iter->ts);
> - unsigned long usec_rem = do_div(t, USEC_PER_SEC);
> - unsigned long secs = (unsigned long)t;
> + unsigned long long t;
> + unsigned long secs, usec_rem;
>   char comm[TASK_COMM_LEN];
>   int ret;
>  
> @@ -644,8 +680,13 @@ int trace_print_context(struct trace_iterator *iter)
>   return 0;
>   }
>  
> - return trace_seq_printf(s, " %5lu.%06lu: ",
> - secs, usec_rem);
> + if (iter->iter_flags & TRACE_FILE_TIME_IN_NS) {
> + t = ns2usecs(iter->ts);
> + usec_rem = do_div(t, USEC_PER_SEC);
> + secs = (unsigned long)t;
> + return trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);

You took away the space before %5.

> + } else
> + return trace_seq_printf(s, "%12llu: ", iter->ts);

here too.

We end up with this:

  -0 [002] d.h11968173392984: _raw_spin_lock_irqsave 
<-try_to_wake_up
  -0 [002] d.h11968173393704: add_preempt_count 
<-_raw_spin_lock_irqsave
  -0 [002] d.h21968173394568: task_waking_fair 
<-try_to_wake_up
  -0 [002] d.h21968173395488: select_task_rq_fair 
<-select_task_rq
  -0 [002] d.h21968173396240: __rcu_read_lock 
<-select_task_rq_fair
  -0 [002] d.h21968173397136: idle_cpu <-select_task_rq_fair
  -0 

Re: [PATCH v4 3/3] tracing: format non-nanosec times from tsc clock without a decimal point.

2012-09-25 Thread David Sharp
On Tue, Sep 25, 2012 at 4:36 PM, Steven Rostedt  wrote:
> On Tue, 2012-09-25 at 15:29 -0700, David Sharp wrote:
>
>
>> >> + ret = trace_seq_printf(
>> >> + s, "[%08llx] %ld.%03ldms (+%ld.%03ldms): ",
>> >> + ns2usecs(iter->ts),
>> >> + abs_msec, abs_usec,
>> >> + rel_msec, rel_usec);
>> >> + } else if (verbose && !in_ns) {
>> >> + ret = trace_seq_printf(
>> >> + s, "[%016llx] %lld (+%lld): ",
>> >> + iter->ts, abs_ts, rel_ts);
>> >> + } else { /* !verbose */
>> >> + ret = trace_seq_printf(
>> >> + s, " %4lld%s%c: ",
>> >> + abs_ts,
>> >> + in_ns ? "us" : "",
>> >> + rel_ts > mark_thresh ? '!' :
>> >> +   rel_ts > 1 ? '+' : ' ');
>>
>> I just noticed something about this: with x86-tsc clock, this will
>> always print a '+'. Does it matter? Also, is the 200k cycle threshold
>> for '!' okay? I guess the counter clock will always end up with rel_ts
>> == 1, so marks should never appear.
>>
>
> Actually, I'm thinking that counters should not add those annotations.
> As it just doesn't make sense.

Right. But they won't appear anyway, since the delta will always be 1.

wait, by "counters" are you including TSC? Surely that makes sense,
since it is a measurement of time.

Eh... sorry I brought it up. I don't really want to change it. I never
use the latency tracer, so I mostly just don't want to break it.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v4 3/3] tracing: format non-nanosec times from tsc clock without a decimal point.

2012-09-25 Thread Steven Rostedt
On Tue, 2012-09-25 at 15:29 -0700, David Sharp wrote:


> >> + ret = trace_seq_printf(
> >> + s, "[%08llx] %ld.%03ldms (+%ld.%03ldms): ",
> >> + ns2usecs(iter->ts),
> >> + abs_msec, abs_usec,
> >> + rel_msec, rel_usec);
> >> + } else if (verbose && !in_ns) {
> >> + ret = trace_seq_printf(
> >> + s, "[%016llx] %lld (+%lld): ",
> >> + iter->ts, abs_ts, rel_ts);
> >> + } else { /* !verbose */
> >> + ret = trace_seq_printf(
> >> + s, " %4lld%s%c: ",
> >> + abs_ts,
> >> + in_ns ? "us" : "",
> >> + rel_ts > mark_thresh ? '!' :
> >> +   rel_ts > 1 ? '+' : ' ');
> 
> I just noticed something about this: with x86-tsc clock, this will
> always print a '+'. Does it matter? Also, is the 200k cycle threshold
> for '!' okay? I guess the counter clock will always end up with rel_ts
> == 1, so marks should never appear.
> 

Actually, I'm thinking that counters should not add those annotations.
As it just doesn't make sense.

-- Steve


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v4 3/3] tracing: format non-nanosec times from tsc clock without a decimal point.

2012-09-25 Thread David Sharp
On Tue, Sep 25, 2012 at 2:42 PM, Steven Rostedt  wrote:
> Sorry, I should have been more picky before. I haven't totally tested
> this yet.
>
> On Tue, 2012-09-25 at 13:49 -0700, David Sharp wrote:
>> With the addition of the "tsc" clock, formatting timestamps to look like
>> fractional seconds is misleading. Mark clocks as either in nanoseconds or
>> not, and format non-nanosecond timestamps as decimal integers.
>>
>> Tested:
>> $ cd /sys/kernel/debug/tracing/
>> $ cat trace_clock
>> [local] global tsc
>> $ echo sched_switch > set_event
>> $ echo 1 > tracing_enabled ; sleep 0.0005 ; echo 0 > tracing_enabled
>> $ cat trace
>>   -0 [000]  6330.52: sched_switch: prev_comm=swapper 
>> prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=29964 
>> next_prio=120
>>sleep-29964 [000]  6330.555628: sched_switch: prev_comm=bash 
>> prev_pid=29964 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 
>> next_prio=120
>>   ...
>> $ echo 1 > options/latency-format
>> $ cat trace
>>   -0   0 4104553247us+: sched_switch: prev_comm=swapper prev_pid=0 
>> prev_prio=120 prev_state=R ==> next_comm=bash next_pid=29964 next_prio=120
>>sleep-29964   0 4104553322us+: sched_switch: prev_comm=bash 
>> prev_pid=29964 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 
>> next_prio=120
>>   ...
>> $ echo tsc > trace_clock
>> $ cat trace
>> $ echo 1 > tracing_enabled ; sleep 0.0005 ; echo 0 > tracing_enabled
>> $ echo 0 > options/latency-format
>> $ cat trace
>>   -0 [000] 16490053398357: sched_switch: prev_comm=swapper 
>> prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=31128 
>> next_prio=120
>>sleep-31128 [000] 16490053588518: sched_switch: prev_comm=bash 
>> prev_pid=31128 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 
>> next_prio=120
>>   ...
>> echo 1 > options/latency-format
>> $ cat trace
>>   -0   0 91557653238+: sched_switch: prev_comm=swapper prev_pid=0 
>> prev_prio=120 prev_state=R ==> next_comm=bash next_pid=31128 next_prio=120
>>sleep-31128   0 91557843399+: sched_switch: prev_comm=bash prev_pid=31128 
>> prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120
>>   ...
>>
>> v2:
>> Move arch-specific bits out of generic code.
>> v4:
>> Fix x86_32 build due to 64-bit division.
>>
>> Google-Bug-Id: 6980623
>> Signed-off-by: David Sharp 
>> Cc: Steven Rostedt 
>> Cc: Masami Hiramatsu 
>> ---
>>  arch/x86/include/asm/trace_clock.h |2 +-
>>  include/linux/ftrace_event.h   |6 +++
>>  kernel/trace/trace.c   |   15 +-
>>  kernel/trace/trace.h   |4 --
>>  kernel/trace/trace_output.c|   84 
>> +---
>>  5 files changed, 78 insertions(+), 33 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/trace_clock.h 
>> b/arch/x86/include/asm/trace_clock.h
>> index 7ee0d8c..45e17f5 100644
>> --- a/arch/x86/include/asm/trace_clock.h
>> +++ b/arch/x86/include/asm/trace_clock.h
>> @@ -9,7 +9,7 @@
>>  extern u64 notrace trace_clock_x86_tsc(void);
>>
>>  # define ARCH_TRACE_CLOCKS \
>> - { trace_clock_x86_tsc,  "x86-tsc" },
>> + { trace_clock_x86_tsc,  "x86-tsc",  .in_ns = 0 },
>>
>>  #endif
>>
>> diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
>> index 642928c..c760670 100644
>> --- a/include/linux/ftrace_event.h
>> +++ b/include/linux/ftrace_event.h
>> @@ -86,6 +86,12 @@ struct trace_iterator {
>>   cpumask_var_t   started;
>>  };
>>
>> +enum trace_iter_flags {
>> + TRACE_FILE_LAT_FMT  = 1,
>> + TRACE_FILE_ANNOTATE = 2,
>> + TRACE_FILE_TIME_IN_NS   = 4,
>> +};
>> +
>>
>>  struct trace_event;
>>
>> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
>> index 4e26df3..3fe4c5b 100644
>> --- a/kernel/trace/trace.c
>> +++ b/kernel/trace/trace.c
>> @@ -476,10 +476,11 @@ static const char *trace_options[] = {
>>  static struct {
>>   u64 (*func)(void);
>>   const char *name;
>> + int in_ns; /* is this clock in nanoseconds? */
>
> Add a few tabs between the ns; and /*

Done.

>
>
>>  } trace_clocks[] = {
>> - { trace_clock_local,"local" },
>> - { trace_clock_global,   "global" },
>> - { trace_clock_counter,  "counter" },
>> + { trace_clock_local,"local",1 },
>> + { trace_clock_global,   "global",   1 },
>> + { trace_clock_counter,  "counter",  0 },
>>   ARCH_TRACE_CLOCKS
>>  };
>>
>> @@ -2425,6 +2426,10 @@ __tracing_open(struct inode *inode, struct file *file)
>>   if (ring_buffer_overruns(iter->tr->buffer))
>>   iter->iter_flags |= TRACE_FILE_ANNOTATE;
>>
>> + /* Output in nanoseconds only if we are using a clock in nanoseconds. 
>> */
>> + if (trace_clocks[trace_clock_id].in_ns)
>> + iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
>> +
>>   /* stop the trace while dumping */
>>   tracing_stop();
>>
>> @@ -3324,6 +3329,10 @@ static int tracing_ope

Re: [PATCH v4 3/3] tracing: format non-nanosec times from tsc clock without a decimal point.

2012-09-25 Thread Steven Rostedt
Sorry, I should have been more picky before. I haven't totally tested
this yet.

On Tue, 2012-09-25 at 13:49 -0700, David Sharp wrote:
> With the addition of the "tsc" clock, formatting timestamps to look like
> fractional seconds is misleading. Mark clocks as either in nanoseconds or
> not, and format non-nanosecond timestamps as decimal integers.
> 
> Tested:
> $ cd /sys/kernel/debug/tracing/
> $ cat trace_clock
> [local] global tsc
> $ echo sched_switch > set_event
> $ echo 1 > tracing_enabled ; sleep 0.0005 ; echo 0 > tracing_enabled
> $ cat trace
>   -0 [000]  6330.52: sched_switch: prev_comm=swapper 
> prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=29964 
> next_prio=120
>sleep-29964 [000]  6330.555628: sched_switch: prev_comm=bash 
> prev_pid=29964 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 
> next_prio=120
>   ...
> $ echo 1 > options/latency-format
> $ cat trace
>   -0   0 4104553247us+: sched_switch: prev_comm=swapper prev_pid=0 
> prev_prio=120 prev_state=R ==> next_comm=bash next_pid=29964 next_prio=120
>sleep-29964   0 4104553322us+: sched_switch: prev_comm=bash prev_pid=29964 
> prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120
>   ...
> $ echo tsc > trace_clock
> $ cat trace
> $ echo 1 > tracing_enabled ; sleep 0.0005 ; echo 0 > tracing_enabled
> $ echo 0 > options/latency-format
> $ cat trace
>   -0 [000] 16490053398357: sched_switch: prev_comm=swapper 
> prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=31128 
> next_prio=120
>sleep-31128 [000] 16490053588518: sched_switch: prev_comm=bash 
> prev_pid=31128 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 
> next_prio=120
>   ...
> echo 1 > options/latency-format
> $ cat trace
>   -0   0 91557653238+: sched_switch: prev_comm=swapper prev_pid=0 
> prev_prio=120 prev_state=R ==> next_comm=bash next_pid=31128 next_prio=120
>sleep-31128   0 91557843399+: sched_switch: prev_comm=bash prev_pid=31128 
> prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120
>   ...
> 
> v2:
> Move arch-specific bits out of generic code.
> v4:
> Fix x86_32 build due to 64-bit division.
> 
> Google-Bug-Id: 6980623
> Signed-off-by: David Sharp 
> Cc: Steven Rostedt 
> Cc: Masami Hiramatsu 
> ---
>  arch/x86/include/asm/trace_clock.h |2 +-
>  include/linux/ftrace_event.h   |6 +++
>  kernel/trace/trace.c   |   15 +-
>  kernel/trace/trace.h   |4 --
>  kernel/trace/trace_output.c|   84 
> +---
>  5 files changed, 78 insertions(+), 33 deletions(-)
> 
> diff --git a/arch/x86/include/asm/trace_clock.h 
> b/arch/x86/include/asm/trace_clock.h
> index 7ee0d8c..45e17f5 100644
> --- a/arch/x86/include/asm/trace_clock.h
> +++ b/arch/x86/include/asm/trace_clock.h
> @@ -9,7 +9,7 @@
>  extern u64 notrace trace_clock_x86_tsc(void);
>  
>  # define ARCH_TRACE_CLOCKS \
> - { trace_clock_x86_tsc,  "x86-tsc" },
> + { trace_clock_x86_tsc,  "x86-tsc",  .in_ns = 0 },
>  
>  #endif
>  
> diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
> index 642928c..c760670 100644
> --- a/include/linux/ftrace_event.h
> +++ b/include/linux/ftrace_event.h
> @@ -86,6 +86,12 @@ struct trace_iterator {
>   cpumask_var_t   started;
>  };
>  
> +enum trace_iter_flags {
> + TRACE_FILE_LAT_FMT  = 1,
> + TRACE_FILE_ANNOTATE = 2,
> + TRACE_FILE_TIME_IN_NS   = 4,
> +};
> +
>  
>  struct trace_event;
>  
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 4e26df3..3fe4c5b 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -476,10 +476,11 @@ static const char *trace_options[] = {
>  static struct {
>   u64 (*func)(void);
>   const char *name;
> + int in_ns; /* is this clock in nanoseconds? */

Add a few tabs between the ns; and /*


>  } trace_clocks[] = {
> - { trace_clock_local,"local" },
> - { trace_clock_global,   "global" },
> - { trace_clock_counter,  "counter" },
> + { trace_clock_local,"local",1 },
> + { trace_clock_global,   "global",   1 },
> + { trace_clock_counter,  "counter",  0 },
>   ARCH_TRACE_CLOCKS
>  };
>  
> @@ -2425,6 +2426,10 @@ __tracing_open(struct inode *inode, struct file *file)
>   if (ring_buffer_overruns(iter->tr->buffer))
>   iter->iter_flags |= TRACE_FILE_ANNOTATE;
>  
> + /* Output in nanoseconds only if we are using a clock in nanoseconds. */
> + if (trace_clocks[trace_clock_id].in_ns)
> + iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
> +
>   /* stop the trace while dumping */
>   tracing_stop();
>  
> @@ -3324,6 +3329,10 @@ static int tracing_open_pipe(struct inode *inode, 
> struct file *filp)
>   if (trace_flags & TRACE_ITER_LATENCY_FMT)
>   iter->iter_flags |= TRACE_FILE_LAT_FMT;
>  
> + /* Output in nanose