On Tue, Feb 23, 2016 at 06:47:41PM +0100, Peter Zijlstra wrote:
> On Tue, Feb 23, 2016 at 10:05:50PM +0530, Pratyush Anand wrote:
> > Its better with this patch, still count is 1 more in case of higher probe 
> > hits (
> > like 65535 times).
> 
> Ah, ok, I'll go try again.

OK, so the below seems to cure this for me, but now I'm hurting my head
to make the same true for perf_install_in_context(), because 'tricky' :/

(the below is a fold of 2 patches, I'll send out a new series once I get
my head around the install_in_context muck :/)

---
 kernel/events/core.c | 84 +++++++++++++++++++++++++++++++++-------------------
 1 file changed, 53 insertions(+), 31 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 94c47e3f9a0a..8326a7f5729c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -314,7 +314,8 @@ static void event_function_call(struct perf_event *event, 
event_f func, void *da
 enum event_type_t {
        EVENT_FLEXIBLE = 0x1,
        EVENT_PINNED = 0x2,
-       EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
+       EVENT_TIME = 0x4,
+       EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED | EVENT_TIME,
 };
 
 /*
@@ -1288,16 +1289,18 @@ static u64 perf_event_time(struct perf_event *event)
 
 /*
  * Update the total_time_enabled and total_time_running fields for a event.
- * The caller of this function needs to hold the ctx->lock.
  */
 static void update_event_times(struct perf_event *event)
 {
        struct perf_event_context *ctx = event->ctx;
        u64 run_end;
 
+       lockdep_assert_held(&ctx->lock);
+
        if (event->state < PERF_EVENT_STATE_INACTIVE ||
            event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
                return;
+
        /*
         * in cgroup mode, time_enabled represents
         * the time the event was enabled AND active
@@ -2063,14 +2066,27 @@ static void add_event_to_ctx(struct perf_event *event,
        event->tstamp_stopped = tstamp;
 }
 
-static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
-                              struct perf_event_context *ctx);
+static void ctx_sched_out(struct perf_event_context *ctx,
+                         struct perf_cpu_context *cpuctx,
+                         enum event_type_t event_type);
 static void
 ctx_sched_in(struct perf_event_context *ctx,
             struct perf_cpu_context *cpuctx,
             enum event_type_t event_type,
             struct task_struct *task);
 
+static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
+                              struct perf_event_context *ctx)
+{
+       if (!cpuctx->task_ctx)
+               return;
+
+       if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
+               return;
+
+       ctx_sched_out(ctx, cpuctx, EVENT_ALL);
+}
+
 static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
                                struct perf_event_context *ctx,
                                struct task_struct *task)
@@ -2219,17 +2235,17 @@ static void __perf_event_enable(struct perf_event 
*event,
            event->state <= PERF_EVENT_STATE_ERROR)
                return;
 
-       update_context_time(ctx);
+       ctx_sched_out(ctx, cpuctx, EVENT_TIME);
+
        __perf_event_mark_enabled(event);
 
        if (!ctx->is_active)
                return;
 
        if (!event_filter_match(event)) {
-               if (is_cgroup_event(event)) {
-                       perf_cgroup_set_timestamp(current, ctx); // XXX ?
+               if (is_cgroup_event(event))
                        perf_cgroup_defer_enabled(event);
-               }
+               ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
                return;
        }
 
@@ -2237,8 +2253,10 @@ static void __perf_event_enable(struct perf_event *event,
         * If the event is in a group and isn't the group leader,
         * then don't put it on unless the group is on.
         */
-       if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)
+       if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) {
+               ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
                return;
+       }
 
        task_ctx = cpuctx->task_ctx;
        if (ctx->task)
@@ -2344,24 +2362,33 @@ static void ctx_sched_out(struct perf_event_context 
*ctx,
        }
 
        ctx->is_active &= ~event_type;
+       if (!(ctx->is_active & (EVENT_FLEXIBLE | EVENT_PINNED)))
+               ctx->is_active = 0;
+
        if (ctx->task) {
                WARN_ON_ONCE(cpuctx->task_ctx != ctx);
                if (!ctx->is_active)
                        cpuctx->task_ctx = NULL;
        }
 
-       update_context_time(ctx);
-       update_cgrp_time_from_cpuctx(cpuctx);
+       is_active ^= ctx->is_active; /* changed bits */
+
+       if (is_active & EVENT_TIME) {
+               /* update (and stop) ctx time */
+               update_context_time(ctx);
+               update_cgrp_time_from_cpuctx(cpuctx);
+       }
+
        if (!ctx->nr_active)
                return;
 
        perf_pmu_disable(ctx->pmu);
-       if ((is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) {
+       if (is_active & EVENT_PINNED) {
                list_for_each_entry(event, &ctx->pinned_groups, group_entry)
                        group_sched_out(event, cpuctx, ctx);
        }
 
-       if ((is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE)) {
+       if (is_active & EVENT_FLEXIBLE) {
                list_for_each_entry(event, &ctx->flexible_groups, group_entry)
                        group_sched_out(event, cpuctx, ctx);
        }
@@ -2641,18 +2668,6 @@ void __perf_event_task_sched_out(struct task_struct 
*task,
                perf_cgroup_sched_out(task, next);
 }
 
-static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
-                              struct perf_event_context *ctx)
-{
-       if (!cpuctx->task_ctx)
-               return;
-
-       if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
-               return;
-
-       ctx_sched_out(ctx, cpuctx, EVENT_ALL);
-}
-
 /*
  * Called with IRQs disabled
  */
@@ -2735,7 +2750,7 @@ ctx_sched_in(struct perf_event_context *ctx,
        if (likely(!ctx->nr_events))
                return;
 
-       ctx->is_active |= event_type;
+       ctx->is_active |= (event_type | EVENT_TIME);
        if (ctx->task) {
                if (!is_active)
                        cpuctx->task_ctx = ctx;
@@ -2743,18 +2758,24 @@ ctx_sched_in(struct perf_event_context *ctx,
                        WARN_ON_ONCE(cpuctx->task_ctx != ctx);
        }
 
-       now = perf_clock();
-       ctx->timestamp = now;
-       perf_cgroup_set_timestamp(task, ctx);
+       is_active ^= ctx->is_active; /* changed bits */
+
+       if (is_active & EVENT_TIME) {
+               /* start ctx time */
+               now = perf_clock();
+               ctx->timestamp = now;
+               perf_cgroup_set_timestamp(task, ctx);
+       }
+
        /*
         * First go through the list and put on any pinned groups
         * in order to give them the best chance of going on.
         */
-       if (!(is_active & EVENT_PINNED) && (event_type & EVENT_PINNED))
+       if (is_active & EVENT_PINNED)
                ctx_pinned_sched_in(ctx, cpuctx);
 
        /* Then walk through the lower prio flexible groups */
-       if (!(is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE))
+       if (is_active & EVENT_FLEXIBLE)
                ctx_flexible_sched_in(ctx, cpuctx);
 }
 
@@ -3120,6 +3141,7 @@ static void perf_event_enable_on_exec(int ctxn)
 
        cpuctx = __get_cpu_context(ctx);
        perf_ctx_lock(cpuctx, ctx);
+       ctx_sched_out(ctx, cpuctx, EVENT_TIME);
        list_for_each_entry(event, &ctx->event_list, event_entry)
                enabled |= event_enable_on_exec(event, ctx);
 

Reply via email to