Re: [Intel-gfx] [PATCH 05/19] drm/i915/perf: Enable commands per clock reporting in OA

2022-09-14 Thread Umesh Nerlige Ramappa

On Tue, Sep 13, 2022 at 05:19:24PM -0700, Dixit, Ashutosh wrote:

On Tue, 23 Aug 2022 13:41:41 -0700, Umesh Nerlige Ramappa wrote:




Hi Umesh,


XEHPSDV and DG2 provide a way to configure bytes per clock vs commands
per clock reporting. Enable command per clock setting on enabling OA.


should be: Enable bytes per clock setting


What is the reason for selecting commands per clock vs bytes per clock?
Also probably mention Bspec: 51762 in the commit message too.


It's a default configuration used to interpret the A36/A37 counters here 
- Bspec: 52201





diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index efa7eda83edd..6fc4f0d8fc5a 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2745,10 +2745,12 @@ static int
 gen12_enable_metric_set(struct i915_perf_stream *stream,
struct i915_active *active)
 {
+   struct drm_i915_private *i915 = stream->perf->i915;
struct intel_uncore *uncore = stream->uncore;
struct i915_oa_config *oa_config = stream->oa_config;
bool periodic = stream->periodic;
u32 period_exponent = stream->period_exponent;
+   u32 sqcnt1;
int ret;

intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
@@ -2767,6 +2769,16 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
(period_exponent << 
GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
: 0);

+   /*
+* Initialize Super Queue Internal Cnt Register
+* Set PMON Enable in order to collect valid metrics.
+* Enable commands per clock reporting in OA for XEHPSDV onward.
+*/
+   sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
+(HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);


Also from Bspec 0:Unitsof4cmd and 1:Unitsof128B so looks like bit 29 should
be set to 0 for commands per clock setting? Or I am wrong?


I know bit 29 has to be set for DG2. I think the commit message is 
wrong. Nice catch, thanks





+
+   intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1);
+
/*
 * Update all contexts prior writing the mux configurations as we need
 * to make sure all slices/subslices are ON before writing to NOA
@@ -2816,6 +2828,8 @@ static void gen11_disable_metric_set(struct 
i915_perf_stream *stream)
 static void gen12_disable_metric_set(struct i915_perf_stream *stream)
 {
struct intel_uncore *uncore = stream->uncore;
+   struct drm_i915_private *i915 = stream->perf->i915;
+   u32 sqcnt1;

/* Reset all contexts' slices/subslices configurations. */
gen12_configure_all_contexts(stream, NULL, NULL);
@@ -2826,6 +2840,12 @@ static void gen12_disable_metric_set(struct 
i915_perf_stream *stream)

/* Make sure we disable noa to save power. */
intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
+
+   sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
+(HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
+
+   /* Reset PMON Enable to save power. */
+   intel_uncore_rmw(uncore, GEN12_SQCNT1, sqcnt1, 0);
 }

 static void gen7_oa_enable(struct i915_perf_stream *stream)
diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h 
b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
index 0ef3562ff4aa..381d94101610 100644
--- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h
+++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
@@ -134,4 +134,8 @@
 #define GDT_CHICKEN_BITS_MMIO(0x9840)
 #define   GT_NOA_ENABLE0x0080

+#define GEN12_SQCNT1   _MMIO(0x8718)
+#define   GEN12_SQCNT1_PMON_ENABLE REG_BIT(30)
+#define   GEN12_SQCNT1_OABPC   REG_BIT(29)
+
 #endif /* __INTEL_PERF_OA_REGS__ */


Re: [Intel-gfx] [PATCH 05/19] drm/i915/perf: Enable commands per clock reporting in OA

2022-09-13 Thread Dixit, Ashutosh
On Tue, 23 Aug 2022 13:41:41 -0700, Umesh Nerlige Ramappa wrote:
>

Hi Umesh,

> XEHPSDV and DG2 provide a way to configure bytes per clock vs commands
> per clock reporting. Enable command per clock setting on enabling OA.

What is the reason for selecting commands per clock vs bytes per clock?
Also probably mention Bspec: 51762 in the commit message too.

> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
> b/drivers/gpu/drm/i915/i915_perf.c
> index efa7eda83edd..6fc4f0d8fc5a 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -2745,10 +2745,12 @@ static int
>  gen12_enable_metric_set(struct i915_perf_stream *stream,
>   struct i915_active *active)
>  {
> + struct drm_i915_private *i915 = stream->perf->i915;
>   struct intel_uncore *uncore = stream->uncore;
>   struct i915_oa_config *oa_config = stream->oa_config;
>   bool periodic = stream->periodic;
>   u32 period_exponent = stream->period_exponent;
> + u32 sqcnt1;
>   int ret;
>
>   intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
> @@ -2767,6 +2769,16 @@ gen12_enable_metric_set(struct i915_perf_stream 
> *stream,
>   (period_exponent << 
> GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
>   : 0);
>
> + /*
> +  * Initialize Super Queue Internal Cnt Register
> +  * Set PMON Enable in order to collect valid metrics.
> +  * Enable commands per clock reporting in OA for XEHPSDV onward.
> +  */
> + sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
> +  (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);

Also from Bspec 0:Unitsof4cmd and 1:Unitsof128B so looks like bit 29 should
be set to 0 for commands per clock setting? Or I am wrong?

> +
> + intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1);
> +
>   /*
>* Update all contexts prior writing the mux configurations as we need
>* to make sure all slices/subslices are ON before writing to NOA
> @@ -2816,6 +2828,8 @@ static void gen11_disable_metric_set(struct 
> i915_perf_stream *stream)
>  static void gen12_disable_metric_set(struct i915_perf_stream *stream)
>  {
>   struct intel_uncore *uncore = stream->uncore;
> + struct drm_i915_private *i915 = stream->perf->i915;
> + u32 sqcnt1;
>
>   /* Reset all contexts' slices/subslices configurations. */
>   gen12_configure_all_contexts(stream, NULL, NULL);
> @@ -2826,6 +2840,12 @@ static void gen12_disable_metric_set(struct 
> i915_perf_stream *stream)
>
>   /* Make sure we disable noa to save power. */
>   intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
> +
> + sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
> +  (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
> +
> + /* Reset PMON Enable to save power. */
> + intel_uncore_rmw(uncore, GEN12_SQCNT1, sqcnt1, 0);
>  }
>
>  static void gen7_oa_enable(struct i915_perf_stream *stream)
> diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h 
> b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
> index 0ef3562ff4aa..381d94101610 100644
> --- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h
> +++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
> @@ -134,4 +134,8 @@
>  #define GDT_CHICKEN_BITS_MMIO(0x9840)
>  #define   GT_NOA_ENABLE  0x0080
>
> +#define GEN12_SQCNT1 _MMIO(0x8718)
> +#define   GEN12_SQCNT1_PMON_ENABLE   REG_BIT(30)
> +#define   GEN12_SQCNT1_OABPC REG_BIT(29)
> +
>  #endif /* __INTEL_PERF_OA_REGS__ */


Re: [Intel-gfx] [PATCH 05/19] drm/i915/perf: Enable commands per clock reporting in OA

2022-09-06 Thread Lionel Landwerlin

On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote:

XEHPSDV and DG2 provide a way to configure bytes per clock vs commands
per clock reporting. Enable command per clock setting on enabling OA.

Signed-off-by: Umesh Nerlige Ramappa 

Acked-by: Lionel Landwerlin 

---
  drivers/gpu/drm/i915/i915_drv.h  |  3 +++
  drivers/gpu/drm/i915/i915_pci.c  |  1 +
  drivers/gpu/drm/i915/i915_perf.c | 20 
  drivers/gpu/drm/i915/i915_perf_oa_regs.h |  4 
  drivers/gpu/drm/i915/intel_device_info.h |  1 +
  5 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b4733c5a01da..b2e8a44bd976 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1287,6 +1287,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
  #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm)
  #define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc)
  
+#define HAS_OA_BPC_REPORTING(dev_priv) \

+   (INTEL_INFO(dev_priv)->has_oa_bpc_reporting)
+
  /*
   * Set this flag, when platform requires 64K GTT page sizes or larger for
   * device local memory access.
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index d8446bb25d5e..bd0b8502b91e 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1019,6 +1019,7 @@ static const struct intel_device_info adl_p_info = {
.has_logical_ring_contexts = 1, \
.has_logical_ring_elsq = 1, \
.has_mslice_steering = 1, \
+   .has_oa_bpc_reporting = 1, \
.has_rc6 = 1, \
.has_reset_engine = 1, \
.has_rps = 1, \
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index efa7eda83edd..6fc4f0d8fc5a 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2745,10 +2745,12 @@ static int
  gen12_enable_metric_set(struct i915_perf_stream *stream,
struct i915_active *active)
  {
+   struct drm_i915_private *i915 = stream->perf->i915;
struct intel_uncore *uncore = stream->uncore;
struct i915_oa_config *oa_config = stream->oa_config;
bool periodic = stream->periodic;
u32 period_exponent = stream->period_exponent;
+   u32 sqcnt1;
int ret;
  
  	intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,

@@ -2767,6 +2769,16 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
(period_exponent << 
GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
: 0);
  
+ 	/*

+* Initialize Super Queue Internal Cnt Register
+* Set PMON Enable in order to collect valid metrics.
+* Enable commands per clock reporting in OA for XEHPSDV onward.
+*/
+   sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
+(HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
+
+   intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1);
+
/*
 * Update all contexts prior writing the mux configurations as we need
 * to make sure all slices/subslices are ON before writing to NOA
@@ -2816,6 +2828,8 @@ static void gen11_disable_metric_set(struct 
i915_perf_stream *stream)
  static void gen12_disable_metric_set(struct i915_perf_stream *stream)
  {
struct intel_uncore *uncore = stream->uncore;
+   struct drm_i915_private *i915 = stream->perf->i915;
+   u32 sqcnt1;
  
  	/* Reset all contexts' slices/subslices configurations. */

gen12_configure_all_contexts(stream, NULL, NULL);
@@ -2826,6 +2840,12 @@ static void gen12_disable_metric_set(struct 
i915_perf_stream *stream)
  
  	/* Make sure we disable noa to save power. */

intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
+
+   sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
+(HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
+
+   /* Reset PMON Enable to save power. */
+   intel_uncore_rmw(uncore, GEN12_SQCNT1, sqcnt1, 0);
  }
  
  static void gen7_oa_enable(struct i915_perf_stream *stream)

diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h 
b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
index 0ef3562ff4aa..381d94101610 100644
--- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h
+++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
@@ -134,4 +134,8 @@
  #define GDT_CHICKEN_BITS_MMIO(0x9840)
  #define   GT_NOA_ENABLE   0x0080
  
+#define GEN12_SQCNT1_MMIO(0x8718)

+#define   GEN12_SQCNT1_PMON_ENABLE REG_BIT(30)
+#define   GEN12_SQCNT1_OABPC   REG_BIT(29)
+
  #endif /* __INTEL_PERF_OA_REGS__ */
diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
b/drivers/gpu/drm/i915/intel_device_info.h
index 23bf230aa104..fc2a0660426e 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -163,6 +163,7 @@ enum intel_ppgtt_type {
func(has_logical_ring_elsq); \
 

[Intel-gfx] [PATCH 05/19] drm/i915/perf: Enable commands per clock reporting in OA

2022-08-23 Thread Umesh Nerlige Ramappa
XEHPSDV and DG2 provide a way to configure bytes per clock vs commands
per clock reporting. Enable command per clock setting on enabling OA.

Signed-off-by: Umesh Nerlige Ramappa 
---
 drivers/gpu/drm/i915/i915_drv.h  |  3 +++
 drivers/gpu/drm/i915/i915_pci.c  |  1 +
 drivers/gpu/drm/i915/i915_perf.c | 20 
 drivers/gpu/drm/i915/i915_perf_oa_regs.h |  4 
 drivers/gpu/drm/i915/intel_device_info.h |  1 +
 5 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b4733c5a01da..b2e8a44bd976 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1287,6 +1287,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm)
 #define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc)
 
+#define HAS_OA_BPC_REPORTING(dev_priv) \
+   (INTEL_INFO(dev_priv)->has_oa_bpc_reporting)
+
 /*
  * Set this flag, when platform requires 64K GTT page sizes or larger for
  * device local memory access.
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index d8446bb25d5e..bd0b8502b91e 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1019,6 +1019,7 @@ static const struct intel_device_info adl_p_info = {
.has_logical_ring_contexts = 1, \
.has_logical_ring_elsq = 1, \
.has_mslice_steering = 1, \
+   .has_oa_bpc_reporting = 1, \
.has_rc6 = 1, \
.has_reset_engine = 1, \
.has_rps = 1, \
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index efa7eda83edd..6fc4f0d8fc5a 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2745,10 +2745,12 @@ static int
 gen12_enable_metric_set(struct i915_perf_stream *stream,
struct i915_active *active)
 {
+   struct drm_i915_private *i915 = stream->perf->i915;
struct intel_uncore *uncore = stream->uncore;
struct i915_oa_config *oa_config = stream->oa_config;
bool periodic = stream->periodic;
u32 period_exponent = stream->period_exponent;
+   u32 sqcnt1;
int ret;
 
intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
@@ -2767,6 +2769,16 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
(period_exponent << 
GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
: 0);
 
+   /*
+* Initialize Super Queue Internal Cnt Register
+* Set PMON Enable in order to collect valid metrics.
+* Enable commands per clock reporting in OA for XEHPSDV onward.
+*/
+   sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
+(HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
+
+   intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1);
+
/*
 * Update all contexts prior writing the mux configurations as we need
 * to make sure all slices/subslices are ON before writing to NOA
@@ -2816,6 +2828,8 @@ static void gen11_disable_metric_set(struct 
i915_perf_stream *stream)
 static void gen12_disable_metric_set(struct i915_perf_stream *stream)
 {
struct intel_uncore *uncore = stream->uncore;
+   struct drm_i915_private *i915 = stream->perf->i915;
+   u32 sqcnt1;
 
/* Reset all contexts' slices/subslices configurations. */
gen12_configure_all_contexts(stream, NULL, NULL);
@@ -2826,6 +2840,12 @@ static void gen12_disable_metric_set(struct 
i915_perf_stream *stream)
 
/* Make sure we disable noa to save power. */
intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
+
+   sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
+(HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
+
+   /* Reset PMON Enable to save power. */
+   intel_uncore_rmw(uncore, GEN12_SQCNT1, sqcnt1, 0);
 }
 
 static void gen7_oa_enable(struct i915_perf_stream *stream)
diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h 
b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
index 0ef3562ff4aa..381d94101610 100644
--- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h
+++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
@@ -134,4 +134,8 @@
 #define GDT_CHICKEN_BITS_MMIO(0x9840)
 #define   GT_NOA_ENABLE0x0080
 
+#define GEN12_SQCNT1   _MMIO(0x8718)
+#define   GEN12_SQCNT1_PMON_ENABLE REG_BIT(30)
+#define   GEN12_SQCNT1_OABPC   REG_BIT(29)
+
 #endif /* __INTEL_PERF_OA_REGS__ */
diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
b/drivers/gpu/drm/i915/intel_device_info.h
index 23bf230aa104..fc2a0660426e 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -163,6 +163,7 @@ enum intel_ppgtt_type {
func(has_logical_ring_elsq); \
func(has_media_ratio_mode); \
func(has_mslice_steering); \
+  

[Intel-gfx] [PATCH 05/19] drm/i915/perf: Enable commands per clock reporting in OA

2022-08-22 Thread Umesh Nerlige Ramappa
XEHPSDV and DG2 provide a way to configure bytes per clock vs commands
per clock reporting. Enable command per clock setting on enabling OA.

Signed-off-by: Umesh Nerlige Ramappa 
---
 drivers/gpu/drm/i915/i915_drv.h  |  3 +++
 drivers/gpu/drm/i915/i915_pci.c  |  1 +
 drivers/gpu/drm/i915/i915_perf.c | 20 
 drivers/gpu/drm/i915/i915_perf_oa_regs.h |  4 
 drivers/gpu/drm/i915/intel_device_info.h |  1 +
 5 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 086bbe8945d6..b1a13cfd8c09 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1287,6 +1287,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm)
 #define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc)
 
+#define HAS_OA_BPC_REPORTING(dev_priv) \
+   (INTEL_INFO(dev_priv)->has_oa_bpc_reporting)
+
 /*
  * Set this flag, when platform requires 64K GTT page sizes or larger for
  * device local memory access.
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 9fd788e147a3..3427042beab3 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1019,6 +1019,7 @@ static const struct intel_device_info adl_p_info = {
.has_logical_ring_contexts = 1, \
.has_logical_ring_elsq = 1, \
.has_mslice_steering = 1, \
+   .has_oa_bpc_reporting = 1, \
.has_rc6 = 1, \
.has_reset_engine = 1, \
.has_rps = 1, \
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index efa7eda83edd..6fc4f0d8fc5a 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2745,10 +2745,12 @@ static int
 gen12_enable_metric_set(struct i915_perf_stream *stream,
struct i915_active *active)
 {
+   struct drm_i915_private *i915 = stream->perf->i915;
struct intel_uncore *uncore = stream->uncore;
struct i915_oa_config *oa_config = stream->oa_config;
bool periodic = stream->periodic;
u32 period_exponent = stream->period_exponent;
+   u32 sqcnt1;
int ret;
 
intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
@@ -2767,6 +2769,16 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
(period_exponent << 
GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
: 0);
 
+   /*
+* Initialize Super Queue Internal Cnt Register
+* Set PMON Enable in order to collect valid metrics.
+* Enable commands per clock reporting in OA for XEHPSDV onward.
+*/
+   sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
+(HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
+
+   intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1);
+
/*
 * Update all contexts prior writing the mux configurations as we need
 * to make sure all slices/subslices are ON before writing to NOA
@@ -2816,6 +2828,8 @@ static void gen11_disable_metric_set(struct 
i915_perf_stream *stream)
 static void gen12_disable_metric_set(struct i915_perf_stream *stream)
 {
struct intel_uncore *uncore = stream->uncore;
+   struct drm_i915_private *i915 = stream->perf->i915;
+   u32 sqcnt1;
 
/* Reset all contexts' slices/subslices configurations. */
gen12_configure_all_contexts(stream, NULL, NULL);
@@ -2826,6 +2840,12 @@ static void gen12_disable_metric_set(struct 
i915_perf_stream *stream)
 
/* Make sure we disable noa to save power. */
intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
+
+   sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
+(HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
+
+   /* Reset PMON Enable to save power. */
+   intel_uncore_rmw(uncore, GEN12_SQCNT1, sqcnt1, 0);
 }
 
 static void gen7_oa_enable(struct i915_perf_stream *stream)
diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h 
b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
index 0ef3562ff4aa..381d94101610 100644
--- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h
+++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
@@ -134,4 +134,8 @@
 #define GDT_CHICKEN_BITS_MMIO(0x9840)
 #define   GT_NOA_ENABLE0x0080
 
+#define GEN12_SQCNT1   _MMIO(0x8718)
+#define   GEN12_SQCNT1_PMON_ENABLE REG_BIT(30)
+#define   GEN12_SQCNT1_OABPC   REG_BIT(29)
+
 #endif /* __INTEL_PERF_OA_REGS__ */
diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
b/drivers/gpu/drm/i915/intel_device_info.h
index 23bf230aa104..fc2a0660426e 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -163,6 +163,7 @@ enum intel_ppgtt_type {
func(has_logical_ring_elsq); \
func(has_media_ratio_mode); \
func(has_mslice_steering); \
+