Re: [PATCH 01/12] perf/core: Add PERF_SAMPLE_WEIGHT_EXT
On Tue, Jan 26, 2021 at 10:33:18AM -0500, Liang, Kan wrote: > > > On 1/26/2021 9:42 AM, Peter Zijlstra wrote: > > On Tue, Jan 19, 2021 at 12:38:20PM -0800, kan.li...@linux.intel.com wrote: > > > > > @@ -900,6 +901,13 @@ enum perf_event_type { > > >*char data[size]; } && PERF_SAMPLE_AUX > > >* { u64 data_page_size;} && > > > PERF_SAMPLE_DATA_PAGE_SIZE > > >* { u64 code_page_size;} && > > > PERF_SAMPLE_CODE_PAGE_SIZE > > > + * { union { > > > + * u64 weight_ext; > > > + * struct { > > > + * u64 instr_latency:16, > > > + * reserved:48; > > > + * }; > > > + * } && PERF_SAMPLE_WEIGHT_EXT > > >* }; > > >*/ > > > PERF_RECORD_SAMPLE = 9, > > > @@ -1248,4 +1256,12 @@ struct perf_branch_entry { > > > reserved:40; > > > }; > > > +union perf_weight_ext { > > > + __u64 val; > > > + struct { > > > + __u64 instr_latency:16, > > > + reserved:48; > > > + }; > > > +}; > > > + > > > #endif /* _UAPI_LINUX_PERF_EVENT_H */ > > > diff --git a/kernel/events/core.c b/kernel/events/core.c > > > index 55d1879..9363d12 100644 > > > --- a/kernel/events/core.c > > > +++ b/kernel/events/core.c > > > @@ -1903,6 +1903,9 @@ static void __perf_event_header_size(struct > > > perf_event *event, u64 sample_type) > > > if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) > > > size += sizeof(data->code_page_size); > > > + if (sample_type & PERF_SAMPLE_WEIGHT_EXT) > > > + size += sizeof(data->weight_ext); > > > + > > > event->header_size = size; > > > } > > > @@ -6952,6 +6955,9 @@ void perf_output_sample(struct perf_output_handle > > > *handle, > > > perf_aux_sample_output(event, handle, data); > > > } > > > + if (sample_type & PERF_SAMPLE_WEIGHT_EXT) > > > + perf_output_put(handle, data->weight_ext); > > > + > > > if (!event->attr.watermark) { > > > int wakeup_events = event->attr.wakeup_events; > > > > This patch is broken and will expose uninitialized kernel stack. > > > > Could we initialize the 'weight_ext' in perf_sample_data_init()? No. Also see my other mail for why I hate this thing.
Re: [PATCH 01/12] perf/core: Add PERF_SAMPLE_WEIGHT_EXT
On 1/26/2021 9:42 AM, Peter Zijlstra wrote: On Tue, Jan 19, 2021 at 12:38:20PM -0800, kan.li...@linux.intel.com wrote: @@ -900,6 +901,13 @@ enum perf_event_type { *char data[size]; } && PERF_SAMPLE_AUX * { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE * { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE +* { union { +* u64 weight_ext; +* struct { +* u64 instr_latency:16, +* reserved:48; +* }; +* } && PERF_SAMPLE_WEIGHT_EXT * }; */ PERF_RECORD_SAMPLE = 9, @@ -1248,4 +1256,12 @@ struct perf_branch_entry { reserved:40; }; +union perf_weight_ext { + __u64 val; + struct { + __u64 instr_latency:16, + reserved:48; + }; +}; + #endif /* _UAPI_LINUX_PERF_EVENT_H */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 55d1879..9363d12 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1903,6 +1903,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type) if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) size += sizeof(data->code_page_size); + if (sample_type & PERF_SAMPLE_WEIGHT_EXT) + size += sizeof(data->weight_ext); + event->header_size = size; } @@ -6952,6 +6955,9 @@ void perf_output_sample(struct perf_output_handle *handle, perf_aux_sample_output(event, handle, data); } + if (sample_type & PERF_SAMPLE_WEIGHT_EXT) + perf_output_put(handle, data->weight_ext); + if (!event->attr.watermark) { int wakeup_events = event->attr.wakeup_events; This patch is broken and will expose uninitialized kernel stack. Could we initialize the 'weight_ext' in perf_sample_data_init()? I understand that we prefer not to set the field in perf_sample_data_init() to minimize the cachelines touched. However, the perf_sample_data_init() should be the most proper place to do the initialization. Also, the 'weight' is already initialized in it. As an extension, I think the 'weight_ext' should be initialized in it as well. In the perf_prepare_sample(), I think we can only clear the unused fields. The [0:15] bits may still leak the data. Thanks, Kan
Re: [PATCH 01/12] perf/core: Add PERF_SAMPLE_WEIGHT_EXT
On Tue, Jan 19, 2021 at 12:38:20PM -0800, kan.li...@linux.intel.com wrote: > @@ -900,6 +901,13 @@ enum perf_event_type { >*char data[size]; } && PERF_SAMPLE_AUX >* { u64 data_page_size;} && > PERF_SAMPLE_DATA_PAGE_SIZE >* { u64 code_page_size;} && > PERF_SAMPLE_CODE_PAGE_SIZE > + * { union { > + * u64 weight_ext; > + * struct { > + * u64 instr_latency:16, > + * reserved:48; > + * }; > + * } && PERF_SAMPLE_WEIGHT_EXT >* }; >*/ > PERF_RECORD_SAMPLE = 9, > @@ -1248,4 +1256,12 @@ struct perf_branch_entry { > reserved:40; > }; > > +union perf_weight_ext { > + __u64 val; > + struct { > + __u64 instr_latency:16, > + reserved:48; > + }; > +}; > + > #endif /* _UAPI_LINUX_PERF_EVENT_H */ > diff --git a/kernel/events/core.c b/kernel/events/core.c > index 55d1879..9363d12 100644 > --- a/kernel/events/core.c > +++ b/kernel/events/core.c > @@ -1903,6 +1903,9 @@ static void __perf_event_header_size(struct perf_event > *event, u64 sample_type) > if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) > size += sizeof(data->code_page_size); > > + if (sample_type & PERF_SAMPLE_WEIGHT_EXT) > + size += sizeof(data->weight_ext); > + > event->header_size = size; > } > > @@ -6952,6 +6955,9 @@ void perf_output_sample(struct perf_output_handle > *handle, > perf_aux_sample_output(event, handle, data); > } > > + if (sample_type & PERF_SAMPLE_WEIGHT_EXT) > + perf_output_put(handle, data->weight_ext); > + > if (!event->attr.watermark) { > int wakeup_events = event->attr.wakeup_events; > This patch is broken and will expose uninitialized kernel stack.
[PATCH 01/12] perf/core: Add PERF_SAMPLE_WEIGHT_EXT
From: Kan Liang Current PERF_SAMPLE_WEIGHT sample type is very useful to expresses the cost of an action represented by the sample. This allows the profiler to scale the samples to be more informative to the programmer. It could also help to locate a hotspot, e.g., when profiling by memory latencies, the expensive load appear higher up in the histograms. But current PERF_SAMPLE_WEIGHT sample type is solely determined by one factor. This could be a problem, if users want two or more factors to contribute to the weight. For example, Golden Cove core PMU can provide both the instruction latency and the cache Latency information as factors for the memory profiling. Add a new sample type, PERF_SAMPLE_WEIGHT_EXT, as an extension of the PERF_SAMPLE_WEIGHT sample type. The first 16-bit is used as the weight value for the instruction latency, defined by the delay measured between the dispatch of an instruction for execution and its completion. This is quite generic and can be extended to other kinds of architectures, as long as the hardware provides suitable values. Other fields are reserved for future usage. Signed-off-by: Kan Liang --- include/linux/perf_event.h | 1 + include/uapi/linux/perf_event.h | 18 +- kernel/events/core.c| 6 ++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9a38f57..005b6b8 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1030,6 +1030,7 @@ struct perf_sample_data { u64 cgroup; u64 data_page_size; u64 code_page_size; + union perf_weight_ext weight_ext; } cacheline_aligned; /* default value for data source */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index b15e344..d0129e5 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -145,8 +145,9 @@ enum perf_event_sample_format { PERF_SAMPLE_CGROUP = 1U << 21, PERF_SAMPLE_DATA_PAGE_SIZE = 1U << 22, PERF_SAMPLE_CODE_PAGE_SIZE = 1U << 23, + PERF_SAMPLE_WEIGHT_EXT = 1U << 24, - PERF_SAMPLE_MAX = 1U << 24, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; @@ -900,6 +901,13 @@ enum perf_event_type { *char data[size]; } && PERF_SAMPLE_AUX * { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE * { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE +* { union { +* u64 weight_ext; +* struct { +* u64 instr_latency:16, +* reserved:48; +* }; +* } && PERF_SAMPLE_WEIGHT_EXT * }; */ PERF_RECORD_SAMPLE = 9, @@ -1248,4 +1256,12 @@ struct perf_branch_entry { reserved:40; }; +union perf_weight_ext { + __u64 val; + struct { + __u64 instr_latency:16, + reserved:48; + }; +}; + #endif /* _UAPI_LINUX_PERF_EVENT_H */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 55d1879..9363d12 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1903,6 +1903,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type) if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) size += sizeof(data->code_page_size); + if (sample_type & PERF_SAMPLE_WEIGHT_EXT) + size += sizeof(data->weight_ext); + event->header_size = size; } @@ -6952,6 +6955,9 @@ void perf_output_sample(struct perf_output_handle *handle, perf_aux_sample_output(event, handle, data); } + if (sample_type & PERF_SAMPLE_WEIGHT_EXT) + perf_output_put(handle, data->weight_ext); + if (!event->attr.watermark) { int wakeup_events = event->attr.wakeup_events; -- 2.7.4