Currently, there is no mechanism to filter events based on containers.
perf -G can be used, but it will not filter events for the containers
created after perf is invoked, making it difficult to assess/analyze
performance issues of multiple containers at once. This limitation can
be overcome, if there is a standard kernel identifier for containers.

This patch introduces a container identifier entry field in perf sample
data to identify or distinguish sample data of different containers. It
uses the cgroup namespace inode number of a given task as it's container
identifier (cid). Alternatively, inode number of pid namespace can also
be used as cid. This patch assumes each container is created with it's
own cgroup namespace.

Suggested-by: Ananth N Mavinakayanahalli <ana...@linux.vnet.ibm.com>
Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---

Changes from v1:
  1. Updated PERF_RECORD_SAMPLE comment.
  2. Fixed compile issue with CONFIG_CGROUPS=n

Will post the manpage update as and when this gets in..


 include/linux/perf_event.h      |    4 ++++
 include/uapi/linux/perf_event.h |    4 +++-
 kernel/events/core.c            |   23 +++++++++++++++++++++++
 3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2b6b43c..4d553ee 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -908,6 +908,10 @@ struct perf_sample_data {
 
        struct perf_regs                regs_intr;
        u64                             stack_user_size;
+       struct {
+               u32     cid;
+               u32     reserved;
+       }                               cid_entry;
 } ____cacheline_aligned;
 
 /* default value for data source */
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index c66a485..826b799 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -139,8 +139,9 @@ enum perf_event_sample_format {
        PERF_SAMPLE_IDENTIFIER                  = 1U << 16,
        PERF_SAMPLE_TRANSACTION                 = 1U << 17,
        PERF_SAMPLE_REGS_INTR                   = 1U << 18,
+       PERF_SAMPLE_CID                         = 1U << 19,
 
-       PERF_SAMPLE_MAX = 1U << 19,             /* non-ABI */
+       PERF_SAMPLE_MAX = 1U << 20,             /* non-ABI */
 };
 
 /*
@@ -773,6 +774,7 @@ enum perf_event_type {
         *      { u64                   transaction; } && 
PERF_SAMPLE_TRANSACTION
         *      { u64                   abi; # enum perf_sample_regs_abi
         *        u64                   regs[weight(mask)]; } && 
PERF_SAMPLE_REGS_INTR
+        *      { u32                   cid, res; } && PERF_SAMPLE_CID
         * };
         */
        PERF_RECORD_SAMPLE                      = 9,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3cfabdf..465febd 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5776,6 +5776,9 @@ void perf_output_sample(struct perf_output_handle *handle,
                }
        }
 
+       if (sample_type & PERF_SAMPLE_CID)
+               perf_output_put(handle, data->cid_entry);
+
        if (!event->attr.watermark) {
                int wakeup_events = event->attr.wakeup_events;
 
@@ -5909,6 +5912,26 @@ void perf_prepare_sample(struct perf_event_header 
*header,
 
                header->size += size;
        }
+
+       if (sample_type & PERF_SAMPLE_CID) {
+               int size = sizeof(u64);
+
+               /* Container identifier for a given task */
+#ifdef CONFIG_CGROUPS
+               /*
+                * Use the task's cgroup namespace inode number.
+                */
+               data->cid_entry.cid = current->nsproxy->cgroup_ns->ns.inum;
+#else
+               /*
+                * If cgroup namespace is not enabled,
+                * all tasks have the same cid.
+                */
+               data->cid_entry.cid = 0xffffffffUL;
+#endif
+               data->cid_entry.reserved = 0;
+               header->size += size;
+       }
 }
 
 static void __always_inline

Reply via email to