---
include/linux/perf_event.h | 34 +++-
kernel/events/core.c | 386
+++++++++++++++++++++++++++++++++------------
2 files changed, 315 insertions(+), 105 deletions(-)
<changelog goes here>
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 24a6358..2c1dcf1 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -574,6 +574,27 @@ struct perf_event {
struct list_head sibling_list;
/*
+ * Node on the pinned or flexible tree located at the event
context;
+ * the node may be empty in case its event is not directly attached
+ * to the tree but to group_list list of the event directly
+ * attached to the tree;
+ */
+ struct rb_node group_node;
+ /*
+ * List keeps groups allocated for the same cpu;
+ * the list may be empty in case its event is not directly
+ * attached to the tree but to group_list list of the event
directly
+ * attached to the tree;
+ */
+ struct list_head group_list;
+ /*
+ * Entry into the group_list list above;
+ * the entry may be attached to the self group_list list above
+ * in case the event is directly attached to the pinned or
+ * flexible tree;
+ */
+ struct list_head group_list_entry;
We already have group_entry for threading the RB tree. i don't see why
we need two new lists heads.
+ /*
* We need storage to track the entries in
perf_pmu_migrate_context; we
* cannot use the event_entry because of RCU and we want to
keep the
* group in tact which avoids us using the other two entries.
@@ -741,8 +762,17 @@ struct perf_event_context {
struct mutex mutex;
struct list_head active_ctx_list;
- struct list_head pinned_groups;
- struct list_head flexible_groups;
+ /*
+ * RB tree for pinned groups; keeps event's group_node
+ * nodes of attached pinned groups;
+ */
+ struct rb_root pinned_tree;
+ /*
+ * RB tree for flexible groups; keeps event's group_node
+ * nodes of attached flexible groups;
+ */
+ struct rb_root flexible_tree;
We didn't need these commesnts before, and I don't think we need them
now.
Any reason not to stick with the existing names?
The existing {pinned,flexible}_groups names are fine regardless of
whether a list or tree is used, and makes it clear that the elements are
the group leaders, not all events.
+
struct list_head event_list;
int nr_events;
int nr_active;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index bc63f8d..a3531f9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1458,13 +1458,142 @@ static enum event_type_t
get_event_type(struct perf_event *event)
return event_type;
}
-static struct list_head *
-ctx_group_list(struct perf_event *event, struct perf_event_context
*ctx)
+static struct rb_root *
+ctx_group_cpu_tree(struct perf_event *event, struct
perf_event_context *ctx)
{
if (event->attr.pinned)
- return &ctx->pinned_groups;
+ return &ctx->pinned_tree;
else
- return &ctx->flexible_groups;
+ return &ctx->flexible_tree;
+}
+
+/*
+ * Insert a group into a tree using event->cpu as a key. If
event->cpu node
+ * is already attached to the tree then the event is added to the
attached
+ * group's group_list list.
+ */
+static void
+perf_cpu_tree_insert(struct rb_root *tree, struct perf_event *event)
+{
+ struct rb_node **node;
+ struct rb_node *parent;
+
+ WARN_ON_ONCE(!tree || !event);
+
+ node = &tree->rb_node;
+ parent = *node;
The first iteration of the loop handles this, so it can go.
My understanding was that every group leader should be in the rb tree,
and every group leader should be in the same list that threads the whole
tree. Both the rb tree and list have to be maintained with the same
order.
Thus, you can walk the rb tree to find the first event in the sub-list
that you care about, then walk that sub-list.
Note that this means we need to shuffle the list and rb tree to rotate
events in each sub-tree.
+ }
+ }
+
+ list_add_tail(&event->group_list_entry, &event->group_list);
+
+ rb_link_node(&event->group_node, parent, node);
+ rb_insert_color(&event->group_node, tree);
+}
+
+/*
+ * Delete a group from a tree. If the group is directly attached to
the tree
+ * it also detaches all groups on the group's group_list list.
+ */
+static void
+perf_cpu_tree_delete(struct rb_root *tree, struct perf_event *event)
+{
+ WARN_ON_ONCE(!tree || !event);
+
+ if (RB_EMPTY_NODE(&event->group_node)) {
+ list_del_init(&event->group_list_entry);
+ } else {
+ struct perf_event *list_event, *list_next;
+
+ rb_erase(&event->group_node, tree);
+ list_for_each_entry_safe(list_event, list_next,
+ &event->group_list, group_list_entry)
+ list_del_init(&list_event->group_list_entry);
At this point, all the other group leaders withthe same filter are gone
from the rb tree, since we didn't insert any of them into the rb tree in
place of the leader we deleted.
>> + }
+}
+
+/*
+ * Find group_list list by a cpu key and call provided callback for
every
+ * group on the list. Iteration stops if the callback returns non zero.
+ */
+
+typedef int (*perf_cpu_tree_callback_t)(struct perf_event *, void *);
+
+static int
+perf_cpu_tree_iterate_cpu(struct rb_root *tree, int cpu,
+ perf_cpu_tree_callback_t callback, void *data)
+{
+ int ret = 0;
+ struct rb_node *node;
+ struct perf_event *event;
+
+ WARN_ON_ONCE(!tree);
+
+ node = tree->rb_node;
+
+ while (node) {
+ struct perf_event *node_event = container_of(node,
+ struct perf_event, group_node);
+
+ if (cpu < node_event->cpu) {
+ node = node->rb_left;
+ } else if (cpu > node_event->cpu) {
+ node = node->rb_right;
+ } else {
+ list_for_each_entry(event, &node_event->group_list,
+ group_list_entry) {
+ ret = callback(event, data);
+ if (ret)
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Iterate a tree and call provided callback for every group in the
tree.
+ * Iteration stops if the callback returns non zero.
+ */
+static int
+perf_cpu_tree_iterate(struct rb_root *tree,
+ perf_cpu_tree_callback_t callback, void *data)
+{
+ int ret = 0;
+ struct rb_node *node;
+ struct perf_event *event;
+
+ WARN_ON_ONCE(!tree);
+
+ for (node = rb_first(tree); node; node = rb_next(node)) {
+ struct perf_event *node_event = container_of(node,
+ struct perf_event, group_node);
+
+ list_for_each_entry(event, &node_event->group_list,
+ group_list_entry) {
+ ret = callback(event, data);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
}
If you need to iterate over every event, you can use the list that
threads the whole tree.
/*
@@ -1485,12 +1614,12 @@ list_add_event(struct perf_event *event,
struct perf_event_context *ctx)
* perf_group_detach can, at all times, locate all siblings.
*/
if (event->group_leader == event) {
- struct list_head *list;
+ struct rb_root *tree;
event->group_caps = event->event_caps;
- list = ctx_group_list(event, ctx);
- list_add_tail(&event->group_entry, list);
+ tree = ctx_group_cpu_tree(event, ctx);
+ perf_cpu_tree_insert(tree, event);
Can we wrap this in a helper so that finds the sub-tree and performs
the insert?