Re: [PATCH 4/4] perf/powerpc: Implement group_read() txn interface for 24x7 counters
On Wed, Mar 25, 2015 at 10:57:05PM -0700, Sukadev Bhattiprolu wrote: Would something liks this work? Sure, that looks fine. Thanks. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 4/4] perf/powerpc: Implement group_read() txn interface for 24x7 counters
Peter Zijlstra [pet...@infradead.org] wrote: | | Is there a down-side to always doing the txn based group read? If an | arch does not implement the read txn support it'll fall back to doing | independent read ops, but we end up doing those anyway. | | That way we get less special case code. We could, but would need to move the perf_event_read() earlier in the perf_event_read_group(). Can we do something like this (it could be broken into two patches, but merging for easier review) Would something liks this work? perf_event_read_value() is mostly computing event count, enabled and running times. Move the perf_event_read() into caller and rename perf_event_read_value() to perf_event_compute_values(). Then, in perf_event_read_group(), read the event counts using the transaction interface for all PMUs. diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 8e6b7d8..5896cb1 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -144,9 +144,11 @@ static u64 read_pmc(struct kvm_pmc *pmc) counter = pmc-counter; - if (pmc-perf_event) - counter += perf_event_read_value(pmc-perf_event, + if (pmc-perf_event) { + perf_event_read(pmc-perf_event); + counter += perf_event_compute_values(pmc-perf_event, enabled, running); + } /* FIXME: Scaling needed? */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c8fe60e..1e30560 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -579,7 +579,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, void *context); extern void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu); -extern u64 perf_event_read_value(struct perf_event *event, +extern u64 perf_event_compute_values(struct perf_event *event, u64 *enabled, u64 *running); diff --git a/kernel/events/core.c b/kernel/events/core.c index a6abcd3..f7e4705 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3643,7 +3643,27 @@ static void orphans_remove_work(struct work_struct *work) put_ctx(ctx); } -u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) +static int perf_event_read_values(struct perf_event *leader) +{ + int ret; + struct perf_event *sub; + struct pmu *pmu; + + pmu = leader-pmu; + + pmu-start_txn(pmu, PERF_PMU_TXN_READ); + + pmu-read(leader); + list_for_each_entry(sub, leader-sibling_list, group_entry) + pmu-read(sub); + + ret = pmu-commit_txn(pmu, PERF_PMU_TXN_READ); + + return ret; +} + +u64 perf_event_compute_values(struct perf_event *event, u64 *enabled, + u64 *running) { struct perf_event *child; u64 total = 0; @@ -3653,7 +3673,6 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) mutex_lock(event-child_mutex); - perf_event_read(event); total += perf_event_count(event); *enabled += event-total_time_enabled + @@ -3671,7 +3690,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) return total; } -EXPORT_SYMBOL_GPL(perf_event_read_value); +EXPORT_SYMBOL_GPL(perf_event_compute_values); static int perf_event_read_group(struct perf_event *event, u64 read_format, char __user *buf) @@ -3684,7 +3703,11 @@ static int perf_event_read_group(struct perf_event *event, lockdep_assert_held(ctx-mutex); - count = perf_event_read_value(leader, enabled, running); + ret = perf_event_read_values(leader); + if (ret) + return ret; + + count = perf_event_compute_values(leader, enabled, running); values[n++] = 1 + leader-nr_siblings; if (read_format PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -3705,7 +3728,7 @@ static int perf_event_read_group(struct perf_event *event, list_for_each_entry(sub, leader-sibling_list, group_entry) { n = 0; - values[n++] = perf_event_read_value(sub, enabled, running); + values[n++] = perf_event_compute_values(sub, enabled, running); if (read_format PERF_FORMAT_ID) values[n++] = primary_event_id(sub); @@ -3728,7 +3751,8 @@ static int perf_event_read_one(struct perf_event *event, u64 values[4]; int n = 0; - values[n++] = perf_event_read_value(event, enabled, running); + perf_event_read(event); + values[n++] = perf_event_compute_values(event, enabled, running); if (read_format PERF_FORMAT_TOTAL_TIME_ENABLED) values[n++] = enabled; if (read_format PERF_FORMAT_TOTAL_TIME_RUNNING) ___ Linuxppc-dev mailing
[PATCH 4/4] perf/powerpc: Implement group_read() txn interface for 24x7 counters
The 24x7 counters in Powerpc allow monitoring a large number of counters simultaneously. They also allow reading several counters in a single HCALL so we can get a more consistent snapshot of the system. Use the PMU's transaction interface to monitor and read several event counters at once. The idea is that users can group several 24x7 events into a single group of events. We use the following logic to submit the group of events to the PMU and read the values: pmu-start_txn()// Intialize before first event for each event in group pmu-read(event); // queue the event to be read pmu-commit_txn() // Read/update all queued counters The -commit_txn() also updates the event counts in the respective perf_event objects. The perf subsystem can then directly get the event counts from the perf_event and can avoid submitting a new -read() request to the PMU. Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- arch/powerpc/perf/hv-24x7.c | 171 include/linux/perf_event.h | 1 + kernel/events/core.c| 37 ++ 3 files changed, 209 insertions(+) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8c571fb..a144d67 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -166,6 +166,7 @@ struct perf_event; * pmu::capabilities flags */ #define PERF_PMU_CAP_NO_INTERRUPT 0x01 +#define PERF_PMU_CAP_GROUP_READ0x02 /** * struct pmu - generic performance monitoring unit diff --git a/kernel/events/core.c b/kernel/events/core.c index 77ce4f3..ff62ea5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3677,11 +3677,34 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running, } EXPORT_SYMBOL_GPL(perf_event_read_value); +static int do_pmu_group_read(struct perf_event *leader) +{ + int ret; + struct pmu *pmu; + struct perf_event *sub; + + pmu = leader-pmu; + pmu-start_txn(pmu, PERF_PMU_TXN_READ); + + pmu-read(leader); + list_for_each_entry(sub, leader-sibling_list, group_entry) + pmu-read(sub); + + /* +* Commit_txn submits the transaction to read all the counters +* in the group _and_ updates the event count. +*/ + ret = pmu-commit_txn(pmu, PERF_PMU_TXN_READ); + + return ret; +} + static int perf_event_read_group(struct perf_event *event, u64 read_format, char __user *buf) { struct perf_event *leader = event-group_leader, *sub; struct perf_event_context *ctx = leader-ctx; + struct pmu *pmu; int n = 0, size = 0, ret; u64 count, enabled, running; u64 values[5]; @@ -3690,7 +3713,21 @@ static int perf_event_read_group(struct perf_event *event, lockdep_assert_held(ctx-mutex); + pmu = event-pmu; update = 1; + + if ((read_format PERF_FORMAT_GROUP) + (pmu-capabilities PERF_PMU_CAP_GROUP_READ)) { + ret = do_pmu_group_read(event); + if (ret) + return ret; + /* +* -commit_txn() would have updated the event count, +* so we don't have to consult the PMU again. +*/ + update = 0; + } + count = perf_event_read_value(leader, enabled, running, update); values[n++] = 1 + leader-nr_siblings; diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 66fa6c8..08c69c1 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -142,6 +142,13 @@ static struct attribute_group event_long_desc_group = { static struct kmem_cache *hv_page_cache; +struct h_24x7_hw { + int flags; + int in_txn; + int txn_err; + struct perf_event *events[255]; +}; + /* * request_buffer and result_buffer are not required to be 4k aligned, * but are not allowed to cross any 4k boundary. Aligning them to 4k is @@ -150,6 +157,7 @@ static struct kmem_cache *hv_page_cache; #define H24x7_DATA_BUFFER_SIZE 4096 DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); +DEFINE_PER_CPU(struct h_24x7_hw, h_24x7_hw); static char *event_name(struct hv_24x7_event_data *ev, int *len) { @@ -1210,10 +1218,46 @@ static void update_event_count(struct perf_event *event, u64 now) static void h_24x7_event_read(struct perf_event *event) { + int ret; u64 now; + struct h_24x7_hw *h24x7hw; + struct hv_24x7_request_buffer *request_buffer; + + /* +* If in a READ transaction, add this counter to the list of +* counters to read during the next HCALL (i.e commit_txn()). +* If not in a READ transaction, go ahead and