Re: [PATCH V2 2/2] bpf: control a set of perf events by creating a new ioctl PERF_EVENT_IOC_SET_ENABLER

2015-10-14 Thread Alexei Starovoitov

On 10/14/15 5:37 AM, Kaixu Xia wrote:

+   event->p_sample_disable = _event->sample_disable;


I don't like it as a concept and it's buggy implementation.
What happens here when enabler is alive, but other event is destroyed?


--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -221,9 +221,12 @@ static u64 bpf_perf_event_sample_control(u64 r1, u64 
index, u64 flag, u64 r4, u6
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct perf_event *event;

-   if (unlikely(index >= array->map.max_entries))
+   if (unlikely(index > array->map.max_entries))
return -E2BIG;

+   if (index == array->map.max_entries)
+   index = 0;


what is this hack for ?

Either use notification and user space disable or
call bpf_perf_event_sample_control() manually for each cpu.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 2/2] bpf: control a set of perf events by creating a new ioctl PERF_EVENT_IOC_SET_ENABLER

2015-10-14 Thread Kaixu Xia
This patch creates a new ioctl PERF_EVENT_IOC_SET_ENABLER to let
perf to select an event as 'enabler'. So we can set this 'enabler'
event to enable/disable a set of events. The event on CPU 0 is
treated as the 'enabler' event by default.

Signed-off-by: Kaixu Xia 
---
 include/linux/perf_event.h  |  1 +
 include/uapi/linux/perf_event.h |  1 +
 kernel/events/core.c| 42 -
 kernel/trace/bpf_trace.c|  5 -
 4 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index dcbf7d5..bc9fe77 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -473,6 +473,7 @@ struct perf_event {
 
atomic_tevent_limit;
atomic_tsample_disable;
+   atomic_t*p_sample_disable;
 
void (*destroy)(struct perf_event *);
struct rcu_head rcu_head;
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index a2b9dd7..3b4fb90 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -393,6 +393,7 @@ struct perf_event_attr {
 #define PERF_EVENT_IOC_SET_FILTER  _IOW('$', 6, char *)
 #define PERF_EVENT_IOC_ID  _IOR('$', 7, __u64 *)
 #define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
+#define PERF_EVENT_IOC_SET_ENABLER _IO ('$', 9)
 
 enum perf_event_ioc_flags {
PERF_IOC_FLAG_GROUP = 1U << 0,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 942351c..03d2594 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4152,6 +4152,7 @@ static int perf_event_set_output(struct perf_event *event,
 struct perf_event *output_event);
 static int perf_event_set_filter(struct perf_event *event, void __user *arg);
 static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd);
+static int perf_event_set_sample_enabler(struct perf_event *event, u32 
enabler_fd);
 
 static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned 
long arg)
 {
@@ -4208,6 +4209,9 @@ static long _perf_ioctl(struct perf_event *event, 
unsigned int cmd, unsigned lon
case PERF_EVENT_IOC_SET_BPF:
return perf_event_set_bpf_prog(event, arg);
 
+   case PERF_EVENT_IOC_SET_ENABLER:
+   return perf_event_set_sample_enabler(event, arg);
+
default:
return -ENOTTY;
}
@@ -6337,7 +6341,7 @@ static int __perf_event_overflow(struct perf_event *event,
irq_work_queue(>pending);
}
 
-   if (!atomic_read(>sample_disable))
+   if (!atomic_read(event->p_sample_disable))
return ret;
 
if (event->overflow_handler)
@@ -6989,6 +6993,35 @@ static int perf_event_set_bpf_prog(struct perf_event 
*event, u32 prog_fd)
return 0;
 }
 
+static int perf_event_set_sample_enabler(struct perf_event *event, u32 
enabler_fd)
+{
+   int ret;
+   struct fd enabler;
+   struct perf_event *enabler_event;
+
+   if (enabler_fd == -1)
+   return 0;
+
+   ret = perf_fget_light(enabler_fd, );
+   if (ret)
+   return ret;
+   enabler_event = enabler.file->private_data;
+   if (event == enabler_event) {
+   fdput(enabler);
+   return 0;
+   }
+
+   /* they must be on the same PMU*/
+   if (event->pmu != enabler_event->pmu) {
+   fdput(enabler);
+   return -EINVAL;
+   }
+
+   event->p_sample_disable = _event->sample_disable;
+   fdput(enabler);
+   return 0;
+}
+
 static void perf_event_free_bpf_prog(struct perf_event *event)
 {
struct bpf_prog *prog;
@@ -7023,6 +7056,11 @@ static int perf_event_set_bpf_prog(struct perf_event 
*event, u32 prog_fd)
return -ENOENT;
 }
 
+static int perf_event_set_sample_enabler(struct perf_event *event, u32 
group_fd)
+{
+   return -ENOENT;
+}
+
 static void perf_event_free_bpf_prog(struct perf_event *event)
 {
 }
@@ -7718,6 +7756,8 @@ static void perf_event_check_sample_flag(struct 
perf_event *event)
atomic_set(>sample_disable, 0);
else
atomic_set(>sample_disable, 1);
+
+   event->p_sample_disable = >sample_disable;
 }
 
 /*
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f261333..d012be3 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -221,9 +221,12 @@ static u64 bpf_perf_event_sample_control(u64 r1, u64 
index, u64 flag, u64 r4, u6
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct perf_event *event;
 
-   if (unlikely(index >= array->map.max_entries))
+   if (unlikely(index > array->map.max_entries))
return -E2BIG;
 
+   if (index == array->map.max_entries)
+   index = 0;
+
  

Re: [PATCH V2 2/2] bpf: control a set of perf events by creating a new ioctl PERF_EVENT_IOC_SET_ENABLER

2015-10-14 Thread xiakaixu
于 2015/10/15 5:28, Alexei Starovoitov 写道:
> On 10/14/15 5:37 AM, Kaixu Xia wrote:
>> +event->p_sample_disable = _event->sample_disable;
> 
> I don't like it as a concept and it's buggy implementation.
> What happens here when enabler is alive, but other event is destroyed?
> 
>> --- a/kernel/trace/bpf_trace.c
>> +++ b/kernel/trace/bpf_trace.c
>> @@ -221,9 +221,12 @@ static u64 bpf_perf_event_sample_control(u64 r1, u64 
>> index, u64 flag, u64 r4, u6
>>   struct bpf_array *array = container_of(map, struct bpf_array, map);
>>   struct perf_event *event;
>>
>> -if (unlikely(index >= array->map.max_entries))
>> +if (unlikely(index > array->map.max_entries))
>>   return -E2BIG;
>>
>> +if (index == array->map.max_entries)
>> +index = 0;
> 
> what is this hack for ?
> 
> Either use notification and user space disable or
> call bpf_perf_event_sample_control() manually for each cpu.

I will discard current implemention that controlling a set of
perf events by the 'enabler' event. Call bpf_perf_event_sample_control()
manually for each cpu is fine. Maybe we can add a loop to control all the
events stored in maps by judging the index, OK?
> 
> 
> 
> .
> 


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html