Re: [Intel-gfx] [PATCH v3 9/9] drm/i915/perf: Add support for OA media units

2023-03-02 Thread Dixit, Ashutosh
On Mon, 27 Feb 2023 18:23:29 -0800, Umesh Nerlige Ramappa wrote:
>

Hi Umesh,

> @@ -1632,11 +1647,28 @@ free_noa_wait(struct i915_perf_stream *stream)
>   i915_vma_unpin_and_release(>noa_wait, 0);
>  }
>
> +/*
> + * intel_engine_lookup_user ensures that most of engine specific checks are
> + * taken care of, however, we can run into a case where the OA unit catering 
> to
> + * the engine passed by the user is disabled for some reason. In such cases,
> + * ensure oa unit corresponding to an engine is functional. If there are no
> + * engines in the group, the unit is disabled.
> + */
> +static bool oa_unit_functional(const struct intel_engine_cs *engine)
> +{
> + return engine->oa_group && engine->oa_group->num_engines;

This doesn't add anything above engine_supports_oa() below: if
engine->oa_group is true for engine X, engine->oa_group->num_engines must
at least be 1 because the oa_group must at least contain X. (Of course
oa_group->num_engines can still be 0 but engine->oa_group->num_engines must
be > 0).

We can see this in oa_init_gt where num_engines++ and oa_group assignment
is done together:

static int oa_init_gt(struct intel_gt *gt)
{
...

for_each_engine_masked(engine, gt, ALL_ENGINES, tmp) {
u32 index = __oa_engine_group(engine);

engine->oa_group = NULL;
if (index < num_groups) {
g[index].num_engines++;
engine->oa_group = [index];
}
}
}

Therefore in read_properties_unlocked these checks are sufficient:

props->engine = intel_engine_lookup_user(perf->i915, class, instance);
if (!props->engine) {
return -EINVAL;
}

if (!engine_supports_oa(props->engine)) {
return -EINVAL;
}

The oa_unit_functional check is not needed.


> +}
> +
>  static bool engine_supports_oa(const struct intel_engine_cs *engine)
>  {
>   return engine->oa_group;
>  }
>
> +static bool engine_supports_oa_format(struct intel_engine_cs *engine, int 
> type)
> +{
> + return engine->oa_group && engine->oa_group->type == type;
> +}
> +

/snip/

> @@ -4186,6 +4227,17 @@ static int read_properties_unlocked(struct i915_perf 
> *perf,
>   return -EINVAL;
>   }
>
> + if (!oa_unit_functional(props->engine))
> + return -ENODEV;
> +
> + i = array_index_nospec(props->oa_format, I915_OA_FORMAT_MAX);
> + f = >oa_formats[i];
> + if (!engine_supports_oa_format(props->engine, f->type)) {
> + DRM_DEBUG("Invalid OA format %d for class %d\n",
> +   f->type, props->engine->class);
> + return -EINVAL;
> + }
> +

Thanks.
--
Ashutosh


[Intel-gfx] [PATCH v3 9/9] drm/i915/perf: Add support for OA media units

2023-02-27 Thread Umesh Nerlige Ramappa
MTL introduces additional OA units dedicated to media use cases. Add
support for programming these OA units by passing the media engine class
and instance parameters.

UMD specific changes for GPUvis support:
https://patchwork.freedesktop.org/patch/522827/?series=114023
https://patchwork.freedesktop.org/patch/522822/?series=114023
https://patchwork.freedesktop.org/patch/522826/?series=114023
https://patchwork.freedesktop.org/patch/522828/?series=114023
https://patchwork.freedesktop.org/patch/522816/?series=114023
https://patchwork.freedesktop.org/patch/522825/?series=114023

v2: (Ashutosh)
- check for IP_VER(12, 70) instead of MTL
- remove PERF_GROUP_OAG comment in mtl_oa_base
- remove oa_buffer.group
- use engine->oa_group->type in engine_supports_oa_format
- remove fw_domains and use FORCEWAKE_ALL
- remove MPES/MPEC comment
- s/xehp/mtl/ in b counter validation function name
- remove engine_supports_oa in __oa_engine_group
- remove warn_ON from __oam_engine_group
- refactor oa_init_groups and oa_init_regs
- assign g->type correctly
- use enum oa_type definition

Signed-off-by: Umesh Nerlige Ramappa 
---
 drivers/gpu/drm/i915/i915_drv.h  |   2 +
 drivers/gpu/drm/i915/i915_pci.c  |   1 +
 drivers/gpu/drm/i915/i915_perf.c | 204 ---
 drivers/gpu/drm/i915/i915_perf_oa_regs.h |  78 +
 drivers/gpu/drm/i915/i915_perf_types.h   |  30 
 drivers/gpu/drm/i915/intel_device_info.h |   1 +
 include/uapi/drm/i915_drm.h  |   4 +
 7 files changed, 296 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0393273faa09..f3cacbf41c86 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -856,6 +856,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
(INTEL_INFO(dev_priv)->has_oa_bpc_reporting)
 #define HAS_OA_SLICE_CONTRIB_LIMITS(dev_priv) \
(INTEL_INFO(dev_priv)->has_oa_slice_contrib_limits)
+#define HAS_OAM(dev_priv) \
+   (INTEL_INFO(dev_priv)->has_oam)
 
 /*
  * Set this flag, when platform requires 64K GTT page sizes or larger for
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index a8d942b16223..621730b6551c 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1028,6 +1028,7 @@ static const struct intel_device_info adl_p_info = {
.has_mslice_steering = 1, \
.has_oa_bpc_reporting = 1, \
.has_oa_slice_contrib_limits = 1, \
+   .has_oam = 1, \
.has_rc6 = 1, \
.has_reset_engine = 1, \
.has_rps = 1, \
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 9d3db1edab64..0464e38f6db5 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -192,6 +192,7 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 
@@ -326,6 +327,12 @@ static const struct i915_oa_format 
oa_formats[I915_OA_FORMAT_MAX] = {
[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
[I915_OAR_FORMAT_A32u40_A4u32_B8_C8]= { 5, 256 },
[I915_OA_FORMAT_A24u40_A14u32_B8_C8]= { 5, 256 },
+   [I915_OAM_FORMAT_MPEC8u64_B8_C8]= { 1, 192, TYPE_OAM, 
HDR_64_BIT },
+   [I915_OAM_FORMAT_MPEC8u32_B8_C8]= { 2, 128, TYPE_OAM, 
HDR_64_BIT },
+};
+
+static const u32 mtl_oa_base[] = {
+   [PERF_GROUP_OAM_SAMEDIA_0] = 0x393000,
 };
 
 #define SAMPLE_OA_REPORT  (1<<0)
@@ -418,11 +425,17 @@ static void free_oa_config_bo(struct i915_oa_config_bo 
*oa_bo)
kfree(oa_bo);
 }
 
+static inline const
+struct i915_perf_regs *__oa_regs(struct i915_perf_stream *stream)
+{
+   return >engine->oa_group->regs;
+}
+
 static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream)
 {
struct intel_uncore *uncore = stream->uncore;
 
-   return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) &
+   return intel_uncore_read(uncore, __oa_regs(stream)->oa_tail_ptr) &
   GEN12_OAG_OATAILPTR_MASK;
 }
 
@@ -875,7 +888,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream 
*stream,
i915_reg_t oaheadptr;
 
oaheadptr = GRAPHICS_VER(stream->perf->i915) == 12 ?
-   GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR;
+   __oa_regs(stream)->oa_head_ptr :
+   GEN8_OAHEADPTR;
 
spin_lock_irqsave(>oa_buffer.ptr_lock, flags);
 
@@ -928,7 +942,8 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
return -EIO;
 
oastatus_reg = GRAPHICS_VER(stream->perf->i915) == 12 ?
-  GEN12_OAG_OASTATUS : GEN8_OASTATUS;
+  __oa_regs(stream)->oa_status :
+  GEN8_OASTATUS;
 
oastatus = intel_uncore_read(uncore, oastatus_reg);
 
@@ -1632,11 +1647,28 @@ free_noa_wait(struct i915_perf_stream *stream)
i915_vma_unpin_and_release(>noa_wait, 0);
 }
 
+/*
+ *