Re: [PATCH v1 4/5] perf: Introduce address range filtering

Peter Zijlstra Fri, 22 Apr 2016 00:47:26 -0700

On Thu, Apr 21, 2016 at 06:17:02PM +0300, Alexander Shishkin wrote:
> +struct addr_filter_setup_data {
> +     struct perf_event       *event;
> +     unsigned long           *offs;
> +     unsigned long           gen;
> +};
> +
> +static int __perf_event_addr_filters_setup(void *info)
> +{
> +     struct addr_filter_setup_data *id = info;
> +     struct perf_event *event = id->event;
> +     struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
> +     unsigned long flags;
> +
> +     if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
> +             return -EAGAIN;
> +
> +     /* matches smp_wmb() in event_sched_in() */
> +     smp_rmb();
> +
> +     /*
> +      * There is a window with interrupts enabled before we get here,
> +      * so we need to check again lest we try to stop another cpu's event.
> +      */
> +     if (READ_ONCE(event->oncpu) != smp_processor_id())
> +             return -EAGAIN;
> +
> +     raw_spin_lock_irqsave(&ifh->lock, flags);


Since we only ever use this from cpu_function_call() IRQs are guaranteed
off already, you even rely on that in the above ->oncpu test, so the
_irqsave() thing is entirely redundant, no?

> +     /*
> +      * In case of generations' mismatch, we don't have to do anything for
> +      * this instance any, there will be another one with the *right* gen.
> +      * If called to clear filters, always let it through.
> +      */
> +     if (id->gen == event->addr_filters_gen || !id->offs)
> +             event->pmu->addr_filters_setup(event, id->offs, PERF_EF_RELOAD);
> +     raw_spin_unlock_irqrestore(&ifh->lock, flags);
> +
> +     return 0;
> +}
> +
> +static int perf_event_addr_filters_setup(struct perf_event *event,
> +                                        unsigned long *offs,
> +                                        unsigned long gen)
> +{
> +     struct addr_filter_setup_data id = {
> +             .event  = event,
> +             .offs   = offs,
> +             .gen    = gen,
> +     };
> +     struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
> +     unsigned long flags;
> +     int ret = 0;
> +
> +     /*
> +      * We can't use event_function_call() here, because that would
> +      * require ctx::mutex, but one of our callers is called with
> +      * mm::mmap_sem down, which would cause an inversion, see bullet
> +      * (2) in put_event().
> +      */
> +     do {
> +             if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE) {
> +                     raw_spin_lock_irqsave(&ifh->lock, flags);

And here, like in all the other sites, IRQs must be enabled, no?

> +                     /* see __perf_event_addr_filters_setup */

How is this not racy? The event could have gotten enabled between that
test and getting here, right?

> +                     if (gen == event->addr_filters_gen || !offs)
> +                             event->pmu->addr_filters_setup(event, offs, 0);
> +                     raw_spin_unlock_irqrestore(&ifh->lock, flags);
> +
> +                     if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
> +                             break;

I r confused, calling ->addr_filters_setup() on an active event, from
the wrong cpu, is badness, no?

Is this something the callback has to handle?

> +                     /* otherwise, fall through to the cross-call */
> +             }
> +
> +             /* matches smp_wmb() in event_sched_in() */
> +             smp_rmb();
> +
> +             ret = cpu_function_call(READ_ONCE(event->oncpu),
> +                                     __perf_event_addr_filters_setup, &id);
> +     } while (ret == -EAGAIN);
> +
> +     return ret;
> +}

This whole thing seems rather full of tricky :/


> @@ -6398,6 +6629,7 @@ void perf_event_mmap(struct vm_area_struct *vma)
>               /* .flags (attr_mmap2 only) */
>       };
>  
> +     perf_addr_filters_adjust(vma);
>       perf_event_mmap_event(&mmap_event);
>  }

And this is the 'offending' site that requires all the tricky..

> +/*
> + * Calculate event's address filters' ranges based on the
> + * task's existing mappings; if any of the existing mappings
> + * match the filters, update event's hw configuration and
> + * restart it if it's running.
> + */
> +static void perf_event_addr_filters_apply(struct perf_event *event)
> +{
> +     struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
> +     struct perf_addr_filter *filter;
> +     struct task_struct *task = READ_ONCE(event->ctx->task);
> +     struct mm_struct *mm = NULL;
> +     unsigned int restart = 0, count = 0;
> +     unsigned long *offs, flags, gen;
> +
> +     offs = event->hw.addr_filters_offs;
> +
> +     /*
> +      * We may observe TASK_TOMBSTONE, which means that the event tear-down
> +      * will stop on the parent's child_mutex that our caller is also holding
> +      */
> +     if (task == TASK_TOMBSTONE)
> +             return;
> +
> +     mm = get_task_mm(event->ctx->task);
> +     if (!mm)
> +             return;

kernel threads may not have a filter?

> +
> +     /* establish the initial hw configuration for this set of filters */
> +     perf_event_addr_filters_setup(event, NULL, 0);
> +
> +     down_read(&mm->mmap_sem);
> +
> +     raw_spin_lock_irqsave(&ifh->lock, flags);
> +     list_for_each_entry(filter, &ifh->list, entry) {
> +             offs[count] = 0;
> +
> +             if (perf_addr_filter_needs_mmap(filter)) {
> +                     offs[count] = perf_addr_filter_apply(filter, mm);
> +
> +                     if (offs[count])
> +                             restart++;
> +             }
> +
> +             count++;
> +     }
> +
> +     gen = ++event->addr_filters_gen;
> +     raw_spin_unlock_irqrestore(&ifh->lock, flags);
> +
> +     up_read(&mm->mmap_sem);
> +
> +     if (restart)
> +             perf_event_addr_filters_setup(event, offs, gen);
> +
> +     mmput(mm);
> +}

> +/*
> + * Address range filtering: limiting the data to certain
> + * instruction address ranges. Filters are ioctl()ed to us from
> + * userspace as ascii strings.
> + *
> + * Filter string format:
> + *
> + * ACTION SOURCE:RANGE_SPEC
> + * where ACTION is one of the
> + *  * "filter": limit the trace to this region
> + *  * "start": start tracing from this address
> + *  * "stop": stop tracing at this address/region;
> + * SOURCE is either "file" or "kernel"
> + * RANGE_SPEC is
> + *  * for "kernel": <start address>[/<size>]
> + *  * for "file":   <start address>[/<size>]@</path/to/object/file>
> + *
> + * if <size> is not specified, the range is treated as a single address.
> + */

Scary bit of string manipulation there.. have you tried fuzzing it? ;-)

Re: [PATCH v1 4/5] perf: Introduce address range filtering

Reply via email to