Re: [PATCH 01/12] perf/core: Add PERF_SAMPLE_WEIGHT_EXT

2021-01-26 Thread Peter Zijlstra
On Tue, Jan 26, 2021 at 10:33:18AM -0500, Liang, Kan wrote:
> 
> 
> On 1/26/2021 9:42 AM, Peter Zijlstra wrote:
> > On Tue, Jan 19, 2021 at 12:38:20PM -0800, kan.li...@linux.intel.com wrote:
> > 
> > > @@ -900,6 +901,13 @@ enum perf_event_type {
> > >*char  data[size]; } && PERF_SAMPLE_AUX
> > >*  { u64   data_page_size;} && 
> > > PERF_SAMPLE_DATA_PAGE_SIZE
> > >*  { u64   code_page_size;} && 
> > > PERF_SAMPLE_CODE_PAGE_SIZE
> > > +  *  { union {
> > > +  *  u64 weight_ext;
> > > +  *  struct {
> > > +  *  u64 instr_latency:16,
> > > +  *  reserved:48;
> > > +  *  };
> > > +  *  } && PERF_SAMPLE_WEIGHT_EXT
> > >* };
> > >*/
> > >   PERF_RECORD_SAMPLE  = 9,
> > > @@ -1248,4 +1256,12 @@ struct perf_branch_entry {
> > >   reserved:40;
> > >   };
> > > +union perf_weight_ext {
> > > + __u64   val;
> > > + struct {
> > > + __u64   instr_latency:16,
> > > + reserved:48;
> > > + };
> > > +};
> > > +
> > >   #endif /* _UAPI_LINUX_PERF_EVENT_H */
> > > diff --git a/kernel/events/core.c b/kernel/events/core.c
> > > index 55d1879..9363d12 100644
> > > --- a/kernel/events/core.c
> > > +++ b/kernel/events/core.c
> > > @@ -1903,6 +1903,9 @@ static void __perf_event_header_size(struct 
> > > perf_event *event, u64 sample_type)
> > >   if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
> > >   size += sizeof(data->code_page_size);
> > > + if (sample_type & PERF_SAMPLE_WEIGHT_EXT)
> > > + size += sizeof(data->weight_ext);
> > > +
> > >   event->header_size = size;
> > >   }
> > > @@ -6952,6 +6955,9 @@ void perf_output_sample(struct perf_output_handle 
> > > *handle,
> > >   perf_aux_sample_output(event, handle, data);
> > >   }
> > > + if (sample_type & PERF_SAMPLE_WEIGHT_EXT)
> > > + perf_output_put(handle, data->weight_ext);
> > > +
> > >   if (!event->attr.watermark) {
> > >   int wakeup_events = event->attr.wakeup_events;
> > 
> > This patch is broken and will expose uninitialized kernel stack.
> > 
> 
> Could we initialize the 'weight_ext' in perf_sample_data_init()?

No. Also see my other mail for why I hate this thing.


Re: [PATCH 01/12] perf/core: Add PERF_SAMPLE_WEIGHT_EXT

2021-01-26 Thread Liang, Kan




On 1/26/2021 9:42 AM, Peter Zijlstra wrote:

On Tue, Jan 19, 2021 at 12:38:20PM -0800, kan.li...@linux.intel.com wrote:


@@ -900,6 +901,13 @@ enum perf_event_type {
 *char  data[size]; } && PERF_SAMPLE_AUX
 *  { u64   data_page_size;} && 
PERF_SAMPLE_DATA_PAGE_SIZE
 *  { u64   code_page_size;} && 
PERF_SAMPLE_CODE_PAGE_SIZE
+*  { union {
+*  u64 weight_ext;
+*  struct {
+*  u64 instr_latency:16,
+*  reserved:48;
+*  };
+*  } && PERF_SAMPLE_WEIGHT_EXT
 * };
 */
PERF_RECORD_SAMPLE  = 9,
@@ -1248,4 +1256,12 @@ struct perf_branch_entry {
reserved:40;
  };
  
+union perf_weight_ext {

+   __u64   val;
+   struct {
+   __u64   instr_latency:16,
+   reserved:48;
+   };
+};
+
  #endif /* _UAPI_LINUX_PERF_EVENT_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 55d1879..9363d12 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1903,6 +1903,9 @@ static void __perf_event_header_size(struct perf_event 
*event, u64 sample_type)
if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
size += sizeof(data->code_page_size);
  
+	if (sample_type & PERF_SAMPLE_WEIGHT_EXT)

+   size += sizeof(data->weight_ext);
+
event->header_size = size;
  }
  
@@ -6952,6 +6955,9 @@ void perf_output_sample(struct perf_output_handle *handle,

perf_aux_sample_output(event, handle, data);
}
  
+	if (sample_type & PERF_SAMPLE_WEIGHT_EXT)

+   perf_output_put(handle, data->weight_ext);
+
if (!event->attr.watermark) {
int wakeup_events = event->attr.wakeup_events;
  


This patch is broken and will expose uninitialized kernel stack.



Could we initialize the 'weight_ext' in perf_sample_data_init()?

I understand that we prefer not to set the field in 
perf_sample_data_init() to minimize the cachelines touched.
However, the perf_sample_data_init() should be the most proper place to 
do the initialization. Also, the 'weight' is already initialized in it. 
As an extension, I think the 'weight_ext' should be initialized in it as 
well.


In the perf_prepare_sample(), I think we can only clear the unused 
fields. The [0:15] bits may still leak the data.


Thanks,
Kan


Re: [PATCH 01/12] perf/core: Add PERF_SAMPLE_WEIGHT_EXT

2021-01-26 Thread Peter Zijlstra
On Tue, Jan 19, 2021 at 12:38:20PM -0800, kan.li...@linux.intel.com wrote:

> @@ -900,6 +901,13 @@ enum perf_event_type {
>*char  data[size]; } && PERF_SAMPLE_AUX
>*  { u64   data_page_size;} && 
> PERF_SAMPLE_DATA_PAGE_SIZE
>*  { u64   code_page_size;} && 
> PERF_SAMPLE_CODE_PAGE_SIZE
> +  *  { union {
> +  *  u64 weight_ext;
> +  *  struct {
> +  *  u64 instr_latency:16,
> +  *  reserved:48;
> +  *  };
> +  *  } && PERF_SAMPLE_WEIGHT_EXT
>* };
>*/
>   PERF_RECORD_SAMPLE  = 9,
> @@ -1248,4 +1256,12 @@ struct perf_branch_entry {
>   reserved:40;
>  };
>  
> +union perf_weight_ext {
> + __u64   val;
> + struct {
> + __u64   instr_latency:16,
> + reserved:48;
> + };
> +};
> +
>  #endif /* _UAPI_LINUX_PERF_EVENT_H */
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 55d1879..9363d12 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -1903,6 +1903,9 @@ static void __perf_event_header_size(struct perf_event 
> *event, u64 sample_type)
>   if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
>   size += sizeof(data->code_page_size);
>  
> + if (sample_type & PERF_SAMPLE_WEIGHT_EXT)
> + size += sizeof(data->weight_ext);
> +
>   event->header_size = size;
>  }
>  
> @@ -6952,6 +6955,9 @@ void perf_output_sample(struct perf_output_handle 
> *handle,
>   perf_aux_sample_output(event, handle, data);
>   }
>  
> + if (sample_type & PERF_SAMPLE_WEIGHT_EXT)
> + perf_output_put(handle, data->weight_ext);
> +
>   if (!event->attr.watermark) {
>   int wakeup_events = event->attr.wakeup_events;
>  

This patch is broken and will expose uninitialized kernel stack.


[PATCH 01/12] perf/core: Add PERF_SAMPLE_WEIGHT_EXT

2021-01-19 Thread kan . liang
From: Kan Liang 

Current PERF_SAMPLE_WEIGHT sample type is very useful to expresses the
cost of an action represented by the sample. This allows the profiler
to scale the samples to be more informative to the programmer. It could
also help to locate a hotspot, e.g., when profiling by memory latencies,
the expensive load appear higher up in the histograms. But current
PERF_SAMPLE_WEIGHT sample type is solely determined by one factor. This
could be a problem, if users want two or more factors to contribute to
the weight. For example, Golden Cove core PMU can provide both the
instruction latency and the cache Latency information as factors for the
memory profiling.

Add a new sample type, PERF_SAMPLE_WEIGHT_EXT, as an extension of the
PERF_SAMPLE_WEIGHT sample type.

The first 16-bit is used as the weight value for the instruction
latency, defined by the delay measured between the dispatch of an
instruction for execution and its completion. This is quite generic and
can be extended to other kinds of architectures, as long as the hardware
provides suitable values. Other fields are reserved for future usage.

Signed-off-by: Kan Liang 
---
 include/linux/perf_event.h  |  1 +
 include/uapi/linux/perf_event.h | 18 +-
 kernel/events/core.c|  6 ++
 3 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 9a38f57..005b6b8 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1030,6 +1030,7 @@ struct perf_sample_data {
u64 cgroup;
u64 data_page_size;
u64 code_page_size;
+   union perf_weight_ext   weight_ext;
 } cacheline_aligned;
 
 /* default value for data source */
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index b15e344..d0129e5 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -145,8 +145,9 @@ enum perf_event_sample_format {
PERF_SAMPLE_CGROUP  = 1U << 21,
PERF_SAMPLE_DATA_PAGE_SIZE  = 1U << 22,
PERF_SAMPLE_CODE_PAGE_SIZE  = 1U << 23,
+   PERF_SAMPLE_WEIGHT_EXT  = 1U << 24,
 
-   PERF_SAMPLE_MAX = 1U << 24, /* non-ABI */
+   PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */
 
__PERF_SAMPLE_CALLCHAIN_EARLY   = 1ULL << 63, /* non-ABI; 
internal use */
 };
@@ -900,6 +901,13 @@ enum perf_event_type {
 *char  data[size]; } && PERF_SAMPLE_AUX
 *  { u64   data_page_size;} && 
PERF_SAMPLE_DATA_PAGE_SIZE
 *  { u64   code_page_size;} && 
PERF_SAMPLE_CODE_PAGE_SIZE
+*  { union {
+*  u64 weight_ext;
+*  struct {
+*  u64 instr_latency:16,
+*  reserved:48;
+*  };
+*  } && PERF_SAMPLE_WEIGHT_EXT
 * };
 */
PERF_RECORD_SAMPLE  = 9,
@@ -1248,4 +1256,12 @@ struct perf_branch_entry {
reserved:40;
 };
 
+union perf_weight_ext {
+   __u64   val;
+   struct {
+   __u64   instr_latency:16,
+   reserved:48;
+   };
+};
+
 #endif /* _UAPI_LINUX_PERF_EVENT_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 55d1879..9363d12 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1903,6 +1903,9 @@ static void __perf_event_header_size(struct perf_event 
*event, u64 sample_type)
if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
size += sizeof(data->code_page_size);
 
+   if (sample_type & PERF_SAMPLE_WEIGHT_EXT)
+   size += sizeof(data->weight_ext);
+
event->header_size = size;
 }
 
@@ -6952,6 +6955,9 @@ void perf_output_sample(struct perf_output_handle *handle,
perf_aux_sample_output(event, handle, data);
}
 
+   if (sample_type & PERF_SAMPLE_WEIGHT_EXT)
+   perf_output_put(handle, data->weight_ext);
+
if (!event->attr.watermark) {
int wakeup_events = event->attr.wakeup_events;
 
-- 
2.7.4