Re: [PATCH 4/4] perf/powerpc: Implement group_read() txn interface for 24x7 counters

2015-04-01 Thread Peter Zijlstra
On Wed, Mar 25, 2015 at 10:57:05PM -0700, Sukadev Bhattiprolu wrote:
 Would something liks this work?
 

Sure, that looks fine.

Thanks.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 4/4] perf/powerpc: Implement group_read() txn interface for 24x7 counters

2015-03-25 Thread Sukadev Bhattiprolu
Peter Zijlstra [pet...@infradead.org] wrote:
| 
| Is there a down-side to always doing the txn based group read? If an
| arch does not implement the read txn support it'll fall back to doing
| independent read ops, but we end up doing those anyway.
| 
| That way we get less special case code.

We could, but would need to move the perf_event_read() earlier in
the perf_event_read_group(). Can we do something like this (it
could be broken into two patches, but merging for easier review)
Would something liks this work?



perf_event_read_value() is mostly computing event count, enabled
and running times. Move the perf_event_read() into caller and
rename perf_event_read_value() to perf_event_compute_values().

Then, in perf_event_read_group(), read the event counts using the
transaction interface for all PMUs.



diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 8e6b7d8..5896cb1 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -144,9 +144,11 @@ static u64 read_pmc(struct kvm_pmc *pmc)
 
counter = pmc-counter;
 
-   if (pmc-perf_event)
-   counter += perf_event_read_value(pmc-perf_event,
+   if (pmc-perf_event) {
+   perf_event_read(pmc-perf_event);
+   counter += perf_event_compute_values(pmc-perf_event,
 enabled, running);
+   }
 
/* FIXME: Scaling needed? */
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c8fe60e..1e30560 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -579,7 +579,7 @@ perf_event_create_kernel_counter(struct perf_event_attr 
*attr,
void *context);
 extern void perf_pmu_migrate_context(struct pmu *pmu,
int src_cpu, int dst_cpu);
-extern u64 perf_event_read_value(struct perf_event *event,
+extern u64 perf_event_compute_values(struct perf_event *event,
 u64 *enabled, u64 *running);
 
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a6abcd3..f7e4705 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3643,7 +3643,27 @@ static void orphans_remove_work(struct work_struct *work)
put_ctx(ctx);
 }
 
-u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
+static int perf_event_read_values(struct perf_event *leader)
+{
+   int ret;
+   struct perf_event *sub;
+   struct pmu *pmu;
+
+   pmu = leader-pmu;
+
+   pmu-start_txn(pmu, PERF_PMU_TXN_READ);
+
+   pmu-read(leader);
+   list_for_each_entry(sub, leader-sibling_list, group_entry)
+   pmu-read(sub);
+
+   ret = pmu-commit_txn(pmu, PERF_PMU_TXN_READ);
+
+   return ret;
+}
+
+u64 perf_event_compute_values(struct perf_event *event, u64 *enabled,
+   u64 *running)
 {
struct perf_event *child;
u64 total = 0;
@@ -3653,7 +3673,6 @@ u64 perf_event_read_value(struct perf_event *event, u64 
*enabled, u64 *running)
 
mutex_lock(event-child_mutex);
 
-   perf_event_read(event);
total += perf_event_count(event);
 
*enabled += event-total_time_enabled +
@@ -3671,7 +3690,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 
*enabled, u64 *running)
 
return total;
 }
-EXPORT_SYMBOL_GPL(perf_event_read_value);
+EXPORT_SYMBOL_GPL(perf_event_compute_values);
 
 static int perf_event_read_group(struct perf_event *event,
   u64 read_format, char __user *buf)
@@ -3684,7 +3703,11 @@ static int perf_event_read_group(struct perf_event 
*event,
 
lockdep_assert_held(ctx-mutex);
 
-   count = perf_event_read_value(leader, enabled, running);
+   ret = perf_event_read_values(leader);
+   if (ret)
+   return ret;
+
+   count = perf_event_compute_values(leader, enabled, running);
 
values[n++] = 1 + leader-nr_siblings;
if (read_format  PERF_FORMAT_TOTAL_TIME_ENABLED)
@@ -3705,7 +3728,7 @@ static int perf_event_read_group(struct perf_event *event,
list_for_each_entry(sub, leader-sibling_list, group_entry) {
n = 0;
 
-   values[n++] = perf_event_read_value(sub, enabled, running);
+   values[n++] = perf_event_compute_values(sub, enabled, 
running);
if (read_format  PERF_FORMAT_ID)
values[n++] = primary_event_id(sub);
 
@@ -3728,7 +3751,8 @@ static int perf_event_read_one(struct perf_event *event,
u64 values[4];
int n = 0;
 
-   values[n++] = perf_event_read_value(event, enabled, running);
+   perf_event_read(event);
+   values[n++] = perf_event_compute_values(event, enabled, running);
if (read_format  PERF_FORMAT_TOTAL_TIME_ENABLED)
values[n++] = enabled;
if (read_format  PERF_FORMAT_TOTAL_TIME_RUNNING)

___
Linuxppc-dev mailing 

[PATCH 4/4] perf/powerpc: Implement group_read() txn interface for 24x7 counters

2015-03-04 Thread Sukadev Bhattiprolu
The 24x7 counters in Powerpc allow monitoring a large number of counters
simultaneously. They also allow reading several counters in a single
HCALL so we can get a more consistent snapshot of the system.

Use the PMU's transaction interface to monitor and read several event
counters at once. The idea is that users can group several 24x7 events
into a single group of events. We use the following logic to submit
the group of events to the PMU and read the values:

pmu-start_txn()// Intialize before first event

for each event in group
pmu-read(event);   // queue the event to be read

pmu-commit_txn()   // Read/update all queued counters

The -commit_txn() also updates the event counts in the respective
perf_event objects.  The perf subsystem can then directly get the
event counts from the perf_event and can avoid submitting a new
-read() request to the PMU.

Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com
---
 arch/powerpc/perf/hv-24x7.c | 171 
 include/linux/perf_event.h  |   1 +
 kernel/events/core.c|  37 ++
 3 files changed, 209 insertions(+)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8c571fb..a144d67 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -166,6 +166,7 @@ struct perf_event;
  * pmu::capabilities flags
  */
 #define PERF_PMU_CAP_NO_INTERRUPT  0x01
+#define PERF_PMU_CAP_GROUP_READ0x02
 
 /**
  * struct pmu - generic performance monitoring unit
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 77ce4f3..ff62ea5 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3677,11 +3677,34 @@ u64 perf_event_read_value(struct perf_event *event, u64 
*enabled, u64 *running,
 }
 EXPORT_SYMBOL_GPL(perf_event_read_value);
 
+static int do_pmu_group_read(struct perf_event *leader)
+{
+   int ret;
+   struct pmu *pmu;
+   struct perf_event *sub;
+
+   pmu = leader-pmu;
+   pmu-start_txn(pmu, PERF_PMU_TXN_READ);
+
+   pmu-read(leader);
+   list_for_each_entry(sub, leader-sibling_list, group_entry)
+   pmu-read(sub);
+
+   /*
+* Commit_txn submits the transaction to read all the counters
+* in the group _and_ updates the event count.
+*/
+   ret = pmu-commit_txn(pmu, PERF_PMU_TXN_READ);
+
+   return ret;
+}
+
 static int perf_event_read_group(struct perf_event *event,
   u64 read_format, char __user *buf)
 {
struct perf_event *leader = event-group_leader, *sub;
struct perf_event_context *ctx = leader-ctx;
+   struct pmu *pmu;
int n = 0, size = 0, ret;
u64 count, enabled, running;
u64 values[5];
@@ -3690,7 +3713,21 @@ static int perf_event_read_group(struct perf_event 
*event,
 
lockdep_assert_held(ctx-mutex);
 
+   pmu = event-pmu;
update = 1;
+
+   if ((read_format  PERF_FORMAT_GROUP) 
+   (pmu-capabilities  PERF_PMU_CAP_GROUP_READ)) {
+   ret = do_pmu_group_read(event);
+   if (ret)
+   return ret;
+   /*
+* -commit_txn() would have updated the event count,
+* so we don't have to consult the PMU again.
+*/
+   update = 0;
+   }
+
count = perf_event_read_value(leader, enabled, running, update);
 
values[n++] = 1 + leader-nr_siblings;
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 66fa6c8..08c69c1 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -142,6 +142,13 @@ static struct attribute_group event_long_desc_group = {
 
 static struct kmem_cache *hv_page_cache;
 
+struct h_24x7_hw {
+   int flags;
+   int in_txn;
+   int txn_err;
+   struct perf_event *events[255];
+};
+
 /*
  * request_buffer and result_buffer are not required to be 4k aligned,
  * but are not allowed to cross any 4k boundary. Aligning them to 4k is
@@ -150,6 +157,7 @@ static struct kmem_cache *hv_page_cache;
 #define H24x7_DATA_BUFFER_SIZE 4096
 DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
 DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
+DEFINE_PER_CPU(struct h_24x7_hw, h_24x7_hw);
 
 static char *event_name(struct hv_24x7_event_data *ev, int *len)
 {
@@ -1210,10 +1218,46 @@ static void update_event_count(struct perf_event 
*event, u64 now)
 
 static void h_24x7_event_read(struct perf_event *event)
 {
+   int ret;
u64 now;
 
+   struct h_24x7_hw *h24x7hw;
+   struct hv_24x7_request_buffer *request_buffer;
+
+   /*
+* If in a READ transaction, add this counter to the list of
+* counters to read during the next HCALL (i.e commit_txn()).
+* If not in a READ transaction, go ahead and