[Intel-gfx] [PATCH] drm/i915/guc: Cancel GuC engine busyness worker synchronously

2022-07-26 Thread Nerlige Ramappa, Umesh
The worker is canceled in gt_park path, but earlier it was assumed that
gt_park path cannot sleep and the cancel is asynchronous. This caused a
race with suspend flow where the worker runs after suspend and causes an
unclaimed register access warning. Cancel the worker synchronously since
the gt_park is indeed allowed to sleep.

Signed-off-by: Umesh Nerlige Ramappa 
Fixes: 77cdd054dd2c ("drm/i915/pmu: Connect engine busyness stats from GuC to 
pmu")
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 76916aed897a..0b7a5ecb640a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1438,7 +1438,12 @@ void intel_guc_busyness_park(struct intel_gt *gt)
if (!guc_submission_initialized(guc))
return;
 
-   cancel_delayed_work(>timestamp.work);
+   /*
+* There is a race with suspend flow where the worker runs after suspend
+* and causes an unclaimed register access warning. Cancel the worker
+* synchronously here.
+*/
+   cancel_delayed_work_sync(>timestamp.work);
 
/*
 * Before parking, we should sample engine busyness stats if we need to.
-- 
2.36.1



[Intel-gfx] [PATCH 1/2] i915/perf: Replace DRM_DEBUG with driver specific drm_dbg call

2022-07-07 Thread Nerlige Ramappa, Umesh
DRM_DEBUG is not the right debug call to use in i915 OA, replace it with
driver specific drm_dbg() call (Matt).

Signed-off-by: Umesh Nerlige Ramappa 
---
 drivers/gpu/drm/i915/i915_perf.c | 151 ---
 1 file changed, 100 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 1577ab6754db..b3beb89884e0 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -885,8 +885,9 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
if (ret)
return ret;
 
-   DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
- stream->period_exponent);
+   drm_dbg(>perf->i915->drm,
+   "OA buffer overflow (exponent = %d): force restart\n",
+   stream->period_exponent);
 
stream->perf->ops.oa_disable(stream);
stream->perf->ops.oa_enable(stream);
@@ -1108,8 +1109,9 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
if (ret)
return ret;
 
-   DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
- stream->period_exponent);
+   drm_dbg(>perf->i915->drm,
+   "OA buffer overflow (exponent = %d): force restart\n",
+   stream->period_exponent);
 
stream->perf->ops.oa_disable(stream);
stream->perf->ops.oa_enable(stream);
@@ -2863,7 +2865,8 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
int ret;
 
if (!props->engine) {
-   DRM_DEBUG("OA engine not specified\n");
+   drm_dbg(>perf->i915->drm,
+   "OA engine not specified\n");
return -EINVAL;
}
 
@@ -2873,18 +2876,21 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
 * IDs
 */
if (!perf->metrics_kobj) {
-   DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
+   drm_dbg(>perf->i915->drm,
+   "OA metrics weren't advertised via sysfs\n");
return -EINVAL;
}
 
if (!(props->sample_flags & SAMPLE_OA_REPORT) &&
(GRAPHICS_VER(perf->i915) < 12 || !stream->ctx)) {
-   DRM_DEBUG("Only OA report sampling supported\n");
+   drm_dbg(>perf->i915->drm,
+   "Only OA report sampling supported\n");
return -EINVAL;
}
 
if (!perf->ops.enable_metric_set) {
-   DRM_DEBUG("OA unit not supported\n");
+   drm_dbg(>perf->i915->drm,
+   "OA unit not supported\n");
return -ENODEV;
}
 
@@ -2894,12 +2900,14 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
 * we currently only allow exclusive access
 */
if (perf->exclusive_stream) {
-   DRM_DEBUG("OA unit already in use\n");
+   drm_dbg(>perf->i915->drm,
+   "OA unit already in use\n");
return -EBUSY;
}
 
if (!props->oa_format) {
-   DRM_DEBUG("OA report format not specified\n");
+   drm_dbg(>perf->i915->drm,
+   "OA report format not specified\n");
return -EINVAL;
}
 
@@ -2929,20 +2937,23 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
if (stream->ctx) {
ret = oa_get_render_ctx_id(stream);
if (ret) {
-   DRM_DEBUG("Invalid context id to filter with\n");
+   drm_dbg(>perf->i915->drm,
+   "Invalid context id to filter with\n");
return ret;
}
}
 
ret = alloc_noa_wait(stream);
if (ret) {
-   DRM_DEBUG("Unable to allocate NOA wait batch buffer\n");
+   drm_dbg(>perf->i915->drm,
+   "Unable to allocate NOA wait batch buffer\n");
goto err_noa_wait_alloc;
}
 
stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set);
if (!stream->oa_config) {
-   DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
+   drm_dbg(>perf->i915->drm,
+   "Invalid OA config id=%i\n", props->metrics_set);
ret = -EINVAL;
goto err_config;
}
@@ -2973,11 +2984,13 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
 
ret = i915_perf_stream_enable_sync(stream);
if (ret) {
-   DRM_DEBUG("Unable to enable metric set\n");
+   drm_dbg(>perf->i915->drm,
+   "Unable to enable metric set\n");
goto err_enable;
}
 
-  

[Intel-gfx] [PATCH 2/2] i915/perf: Disable OA sseu config param for gfx12.50+

2022-07-07 Thread Nerlige Ramappa, Umesh
The global sseu config is applicable only to gen11 platforms where
concurrent media, render and OA use cases may cause some subslices to be
turned off and hence lose NOA configuration. Ideally we want to return
ENODEV for non-gen11 platforms, however, this has shipped with gfx12, so
disable only for gfx12.50+.

v2: gfx12 is already shipped with this, disable for gfx12.50+ (Lionel)

v3: (Matt)
- Update commit message and replace "12.5" with "12.50"
- Replace DRM_DEBUG() with driver specific drm_dbg()

Signed-off-by: Umesh Nerlige Ramappa 
---
 drivers/gpu/drm/i915/i915_perf.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index b3beb89884e0..f3c23fe9ad9c 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -3731,6 +3731,13 @@ static int read_properties_unlocked(struct i915_perf 
*perf,
case DRM_I915_PERF_PROP_GLOBAL_SSEU: {
struct drm_i915_gem_context_param_sseu user_sseu;
 
+   if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 50)) {
+   drm_dbg(>i915->drm,
+   "SSEU config not supported on gfx %x\n",
+   GRAPHICS_VER_FULL(perf->i915));
+   return -ENODEV;
+   }
+
if (copy_from_user(_sseu,
   u64_to_user_ptr(value),
   sizeof(user_sseu))) {
-- 
2.35.3



[Intel-gfx] [PATCH 1/2] i915/perf: Replace DRM_DEBUG with driver specific drm_dbg call

2022-07-07 Thread Nerlige Ramappa, Umesh
DRM_DEBUG is not the right debug call to use in i915 OA, replace it with
driver specific drm_dbg() call (Matt).

Signed-off-by: Umesh Nerlige Ramappa 
---
 drivers/gpu/drm/i915/i915_perf.c | 151 ---
 1 file changed, 100 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 1577ab6754db..b3beb89884e0 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -885,8 +885,9 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
if (ret)
return ret;
 
-   DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
- stream->period_exponent);
+   drm_dbg(>perf->i915->drm,
+   "OA buffer overflow (exponent = %d): force restart\n",
+   stream->period_exponent);
 
stream->perf->ops.oa_disable(stream);
stream->perf->ops.oa_enable(stream);
@@ -1108,8 +1109,9 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
if (ret)
return ret;
 
-   DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
- stream->period_exponent);
+   drm_dbg(>perf->i915->drm,
+   "OA buffer overflow (exponent = %d): force restart\n",
+   stream->period_exponent);
 
stream->perf->ops.oa_disable(stream);
stream->perf->ops.oa_enable(stream);
@@ -2863,7 +2865,8 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
int ret;
 
if (!props->engine) {
-   DRM_DEBUG("OA engine not specified\n");
+   drm_dbg(>perf->i915->drm,
+   "OA engine not specified\n");
return -EINVAL;
}
 
@@ -2873,18 +2876,21 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
 * IDs
 */
if (!perf->metrics_kobj) {
-   DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
+   drm_dbg(>perf->i915->drm,
+   "OA metrics weren't advertised via sysfs\n");
return -EINVAL;
}
 
if (!(props->sample_flags & SAMPLE_OA_REPORT) &&
(GRAPHICS_VER(perf->i915) < 12 || !stream->ctx)) {
-   DRM_DEBUG("Only OA report sampling supported\n");
+   drm_dbg(>perf->i915->drm,
+   "Only OA report sampling supported\n");
return -EINVAL;
}
 
if (!perf->ops.enable_metric_set) {
-   DRM_DEBUG("OA unit not supported\n");
+   drm_dbg(>perf->i915->drm,
+   "OA unit not supported\n");
return -ENODEV;
}
 
@@ -2894,12 +2900,14 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
 * we currently only allow exclusive access
 */
if (perf->exclusive_stream) {
-   DRM_DEBUG("OA unit already in use\n");
+   drm_dbg(>perf->i915->drm,
+   "OA unit already in use\n");
return -EBUSY;
}
 
if (!props->oa_format) {
-   DRM_DEBUG("OA report format not specified\n");
+   drm_dbg(>perf->i915->drm,
+   "OA report format not specified\n");
return -EINVAL;
}
 
@@ -2929,20 +2937,23 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
if (stream->ctx) {
ret = oa_get_render_ctx_id(stream);
if (ret) {
-   DRM_DEBUG("Invalid context id to filter with\n");
+   drm_dbg(>perf->i915->drm,
+   "Invalid context id to filter with\n");
return ret;
}
}
 
ret = alloc_noa_wait(stream);
if (ret) {
-   DRM_DEBUG("Unable to allocate NOA wait batch buffer\n");
+   drm_dbg(>perf->i915->drm,
+   "Unable to allocate NOA wait batch buffer\n");
goto err_noa_wait_alloc;
}
 
stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set);
if (!stream->oa_config) {
-   DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
+   drm_dbg(>perf->i915->drm,
+   "Invalid OA config id=%i\n", props->metrics_set);
ret = -EINVAL;
goto err_config;
}
@@ -2973,11 +2984,13 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
 
ret = i915_perf_stream_enable_sync(stream);
if (ret) {
-   DRM_DEBUG("Unable to enable metric set\n");
+   drm_dbg(>perf->i915->drm,
+   "Unable to enable metric set\n");
goto err_enable;
}
 
-  

[Intel-gfx] [PATCH 2/2] i915/perf: Disable OA sseu config param for gfx12.50+

2022-07-07 Thread Nerlige Ramappa, Umesh
The global sseu config is applicable only to gen11 platforms where
concurrent media, render and OA use cases may cause some subslices to be
turned off and hence lose NOA configuration. Ideally we want to return
ENODEV for non-gen11 platforms, however, this has shipped with gfx12, so
disable only for gfx12.50+.

v2: gfx12 is already shipped with this, disable for gfx12.50+ (Lionel)

v3: (Matt)
- Update commit message and replace "12.5" with "12.50"
- Replace DRM_DEBUG() with driver specific drm_dbg()

Signed-off-by: Umesh Nerlige Ramappa 
---
 drivers/gpu/drm/i915/i915_perf.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index b3beb89884e0..2c7763bf59a8 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -3731,6 +3731,12 @@ static int read_properties_unlocked(struct i915_perf 
*perf,
case DRM_I915_PERF_PROP_GLOBAL_SSEU: {
struct drm_i915_gem_context_param_sseu user_sseu;
 
+   if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 50)) {
+   DRM_DEBUG("SSEU config not supported on gfx 
%x\n",
+ GRAPHICS_VER_FULL(perf->i915));
+   return -ENODEV;
+   }
+
if (copy_from_user(_sseu,
   u64_to_user_ptr(value),
   sizeof(user_sseu))) {
-- 
2.35.3



[Intel-gfx] [PATCH] i915/perf: Disable OA sseu config param for gfx12.5+

2022-07-07 Thread Nerlige Ramappa, Umesh
The global sseu config is applicable only to gen11 platforms where
concurrent media, render and OA use cases may cause some subslices to be
turned off and hence lose NOA configuration. Ideally we want to return
ENODEV for non-gen11 platforms, however, this has shipped with gfx12, so
disable only for gfx12.5+.

v2: gfx12 is already shipped with this, disable for gfx12.5+ (Lionel)

Signed-off-by: Umesh Nerlige Ramappa 
---
 drivers/gpu/drm/i915/i915_perf.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 1577ab6754db..0ba98f73f217 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -3706,6 +3706,12 @@ static int read_properties_unlocked(struct i915_perf 
*perf,
case DRM_I915_PERF_PROP_GLOBAL_SSEU: {
struct drm_i915_gem_context_param_sseu user_sseu;
 
+   if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 50)) {
+   DRM_DEBUG("SSEU config not supported on gfx 
%x\n",
+ GRAPHICS_VER_FULL(perf->i915));
+   return -ENODEV;
+   }
+
if (copy_from_user(_sseu,
   u64_to_user_ptr(value),
   sizeof(user_sseu))) {
-- 
2.35.3



[Intel-gfx] [PATCH] i915/perf: Disable OA sseu config param for non-gen11 platforms

2022-07-07 Thread Nerlige Ramappa, Umesh
The global sseu config is applicable only to gen11 platforms where
concurrent media, render and OA use cases may cause some subslices to be
turned off and hence lose NOA configuration. Return ENODEV for non-gen11
platforms.

v2: gfx12 is already shipped with this, disable for gfx12.5+ (Lionel)

Signed-off-by: Umesh Nerlige Ramappa 
---
 drivers/gpu/drm/i915/i915_perf.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 1577ab6754db..0ba98f73f217 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -3706,6 +3706,12 @@ static int read_properties_unlocked(struct i915_perf 
*perf,
case DRM_I915_PERF_PROP_GLOBAL_SSEU: {
struct drm_i915_gem_context_param_sseu user_sseu;
 
+   if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 50)) {
+   DRM_DEBUG("SSEU config not supported on gfx 
%x\n",
+ GRAPHICS_VER_FULL(perf->i915));
+   return -ENODEV;
+   }
+
if (copy_from_user(_sseu,
   u64_to_user_ptr(value),
   sizeof(user_sseu))) {
-- 
2.35.3



[Intel-gfx] [PATCH] i915/perf: Disable OA sseu config param for non-gen11 platforms

2022-07-06 Thread Nerlige Ramappa, Umesh
The global sseu config is applicable only to gen11 platforms where
concurrent media, render and OA use cases may cause some subslices to be
turned off and hence lose NOA configuration. Return ENODEV for non-gen11
platforms.

Signed-off-by: Umesh Nerlige Ramappa 
---
 drivers/gpu/drm/i915/i915_perf.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 1577ab6754db..512c163fdbeb 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -3706,6 +3706,12 @@ static int read_properties_unlocked(struct i915_perf 
*perf,
case DRM_I915_PERF_PROP_GLOBAL_SSEU: {
struct drm_i915_gem_context_param_sseu user_sseu;
 
+   if (GRAPHICS_VER(perf->i915) != 11) {
+   DRM_DEBUG("Global SSEU config not supported on 
gen%d\n",
+ GRAPHICS_VER(perf->i915));
+   return -ENODEV;
+   }
+
if (copy_from_user(_sseu,
   u64_to_user_ptr(value),
   sizeof(user_sseu))) {
-- 
2.35.3



[Intel-gfx] [PATCH] drm/i915/reset: Add additional steps for Wa_22011802037 for execlist backend

2022-06-21 Thread Nerlige Ramappa, Umesh
From: Umesh Nerlige Ramappa 

For execlists backend, current implementation of Wa_22011802037 is to
stop the CS before doing a reset of the engine. This WA was further
extended to wait for any pending MI FORCE WAKEUPs before issuing a
reset. Add the extended steps in the execlist path of reset.

In addition, extend the WA to gen11.

v2: (Tvrtko)
- Clarify comments, commit message, fix typos
- Use IS_GRAPHICS_VER for gen 11/12 checks

v3: (Daneile)
- Drop changes to intel_ring_submission since WA does not apply to it
- Log an error if MSG IDLE is not defined for an engine

Signed-off-by: Umesh Nerlige Ramappa 
Fixes: f6aa0d713c88 ("drm/i915: Add Wa_22011802037 force cs halt")
Acked-by: Tvrtko Ursulin 
Reviewed-by: Daniele Ceraolo Spurio 
---
 drivers/gpu/drm/i915/gt/intel_engine.h|  2 +
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 88 ++-
 .../drm/i915/gt/intel_execlists_submission.c  |  7 ++
 drivers/gpu/drm/i915/gt/uc/intel_guc.c|  4 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 81 ++---
 5 files changed, 103 insertions(+), 79 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h 
b/drivers/gpu/drm/i915/gt/intel_engine.h
index 1431f1e9dbee..04e435bce79b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -201,6 +201,8 @@ int intel_ring_submission_setup(struct intel_engine_cs 
*engine);
 int intel_engine_stop_cs(struct intel_engine_cs *engine);
 void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
 
+void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine);
+
 void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);
 
 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 136cc44c3deb..283870c65991 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1376,10 +1376,10 @@ static int __intel_engine_stop_cs(struct 
intel_engine_cs *engine,
intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
 
/*
-* Wa_22011802037 : gen12, Prior to doing a reset, ensure CS is
+* Wa_22011802037 : gen11, gen12, Prior to doing a reset, ensure CS is
 * stopped, set ring stop bit and prefetch disable bit to halt CS
 */
-   if (GRAPHICS_VER(engine->i915) == 12)
+   if (IS_GRAPHICS_VER(engine->i915, 11, 12))
intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base),
  
_MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE));
 
@@ -1402,6 +1402,18 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine)
return -ENODEV;
 
ENGINE_TRACE(engine, "\n");
+   /*
+* TODO: Find out why occasionally stopping the CS times out. Seen
+* especially with gem_eio tests.
+*
+* Occasionally trying to stop the cs times out, but does not adversely
+* affect functionality. The timeout is set as a config parameter that
+* defaults to 100ms. In most cases the follow up operation is to wait
+* for pending MI_FORCE_WAKES. The assumption is that this timeout is
+* sufficient for any pending MI_FORCEWAKEs to complete. Once root
+* caused, the caller must check and handle the return from this
+* function.
+*/
if (__intel_engine_stop_cs(engine, 1000, stop_timeout(engine))) {
ENGINE_TRACE(engine,
 "timed out on STOP_RING -> IDLE; HEAD:%04x, 
TAIL:%04x\n",
@@ -1428,6 +1440,78 @@ void intel_engine_cancel_stop_cs(struct intel_engine_cs 
*engine)
ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
 }
 
+static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine)
+{
+   static const i915_reg_t _reg[I915_NUM_ENGINES] = {
+   [RCS0] = MSG_IDLE_CS,
+   [BCS0] = MSG_IDLE_BCS,
+   [VCS0] = MSG_IDLE_VCS0,
+   [VCS1] = MSG_IDLE_VCS1,
+   [VCS2] = MSG_IDLE_VCS2,
+   [VCS3] = MSG_IDLE_VCS3,
+   [VCS4] = MSG_IDLE_VCS4,
+   [VCS5] = MSG_IDLE_VCS5,
+   [VCS6] = MSG_IDLE_VCS6,
+   [VCS7] = MSG_IDLE_VCS7,
+   [VECS0] = MSG_IDLE_VECS0,
+   [VECS1] = MSG_IDLE_VECS1,
+   [VECS2] = MSG_IDLE_VECS2,
+   [VECS3] = MSG_IDLE_VECS3,
+   [CCS0] = MSG_IDLE_CS,
+   [CCS1] = MSG_IDLE_CS,
+   [CCS2] = MSG_IDLE_CS,
+   [CCS3] = MSG_IDLE_CS,
+   };
+   u32 val;
+
+   if (!_reg[engine->id].reg) {
+   drm_err(>i915->drm,
+   "MSG IDLE undefined for engine id %u\n", engine->id);
+   return 0;
+   }
+
+   val = intel_uncore_read(engine->uncore, _reg[engine->id]);

[Intel-gfx] [PATCH] i915/pmu: Wire GuC backend to per-client busyness

2022-06-16 Thread Nerlige Ramappa, Umesh
From: John Harrison 

GuC provides engine_id and last_switch_in ticks for an active context in
the pphwsp. The context image provides a 32 bit total ticks which is the
accumulated by the context (a.k.a. context[CTX_TIMESTAMP]). This
information is used to calculate the context busyness as follows:

If the engine_id is valid, then busyness is the sum of accumulated total
ticks and active ticks. Active ticks is calculated with current gt time
as reference.

If engine_id is invalid, busyness is equal to accumulated total ticks.

Since KMD (CPU) retrieves busyness data from 2 sources - GPU and GuC, a
potential race was highlighted in an earlier review that can lead to
double accounting of busyness. While the solution to this is a wip,
busyness is still usable for platforms running GuC submission.

v2: (Tvrtko)
- Use COPS_RUNTIME_ACTIVE_TOTAL
- Add code comment for the race
- Undo local variables initializations

v3:
- Add support for virtual engines based on
  https://patchwork.freedesktop.org/series/105227/

Signed-off-by: John Harrison 
Signed-off-by: Umesh Nerlige Ramappa 
---
 drivers/gpu/drm/i915/gt/intel_context.c   | 12 +++-
 drivers/gpu/drm/i915/gt/intel_context.h   |  6 +-
 drivers/gpu/drm/i915/gt/intel_context_types.h |  6 ++
 drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |  5 ++
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 65 ++-
 drivers/gpu/drm/i915/i915_drm_client.c|  6 +-
 6 files changed, 89 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 4070cb5711d8..4a84146710e0 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -576,16 +576,24 @@ void intel_context_bind_parent_child(struct intel_context 
*parent,
child->parallel.parent = parent;
 }
 
-u64 intel_context_get_total_runtime_ns(const struct intel_context *ce)
+u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
 {
u64 total, active;
 
+   if (ce->ops->update_stats)
+   ce->ops->update_stats(ce);
+
total = ce->stats.runtime.total;
if (ce->ops->flags & COPS_RUNTIME_CYCLES)
total *= ce->engine->gt->clock_period_ns;
 
active = READ_ONCE(ce->stats.active);
-   if (active)
+   /*
+* When COPS_RUNTIME_ACTIVE_TOTAL is set for ce->cops, the backend
+* already provides the total active time of the context, so skip this
+* calculation when this flag is set.
+*/
+   if (active && !(ce->ops->flags & COPS_RUNTIME_ACTIVE_TOTAL))
active = intel_context_clock() - active;
 
return total + active;
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h 
b/drivers/gpu/drm/i915/gt/intel_context.h
index b7d3214d2cdd..5fc7c19ab29b 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -56,7 +56,7 @@ static inline bool intel_context_is_parent(struct 
intel_context *ce)
return !!ce->parallel.number_children;
 }
 
-static inline bool intel_context_is_pinned(struct intel_context *ce);
+static inline bool intel_context_is_pinned(const struct intel_context *ce);
 
 static inline struct intel_context *
 intel_context_to_parent(struct intel_context *ce)
@@ -116,7 +116,7 @@ static inline int intel_context_lock_pinned(struct 
intel_context *ce)
  * Returns: true if the context is currently pinned for use by the GPU.
  */
 static inline bool
-intel_context_is_pinned(struct intel_context *ce)
+intel_context_is_pinned(const struct intel_context *ce)
 {
return atomic_read(>pin_count);
 }
@@ -351,7 +351,7 @@ intel_context_clear_nopreempt(struct intel_context *ce)
clear_bit(CONTEXT_NOPREEMPT, >flags);
 }
 
-u64 intel_context_get_total_runtime_ns(const struct intel_context *ce);
+u64 intel_context_get_total_runtime_ns(struct intel_context *ce);
 u64 intel_context_get_avg_runtime_ns(struct intel_context *ce);
 
 static inline u64 intel_context_clock(void)
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 09f82545789f..797bb4242c18 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -38,6 +38,9 @@ struct intel_context_ops {
 #define COPS_RUNTIME_CYCLES_BIT 1
 #define COPS_RUNTIME_CYCLES BIT(COPS_RUNTIME_CYCLES_BIT)
 
+#define COPS_RUNTIME_ACTIVE_TOTAL_BIT 2
+#define COPS_RUNTIME_ACTIVE_TOTAL BIT(COPS_RUNTIME_ACTIVE_TOTAL_BIT)
+
int (*alloc)(struct intel_context *ce);
 
void (*ban)(struct intel_context *ce, struct i915_request *rq);
@@ -55,6 +58,8 @@ struct intel_context_ops {
 
void (*sched_disable)(struct intel_context *ce);
 
+   void (*update_stats)(struct intel_context *ce);
+
void (*reset)(struct intel_context *ce);
void (*destroy)(struct kref *kref);
 
@@ -146,6 +151,7 @@ struct intel_context {
struct 

[Intel-gfx] [PATCH] i915/pmu: Wire GuC backend to per-client busyness

2022-06-16 Thread Nerlige Ramappa, Umesh
From: John Harrison 

GuC provides engine_id and last_switch_in ticks for an active context in
the pphwsp. The context image provides a 32 bit total ticks which is the
accumulated by the context (a.k.a. context[CTX_TIMESTAMP]). This
information is used to calculate the context busyness as follows:

If the engine_id is valid, then busyness is the sum of accumulated total
ticks and active ticks. Active ticks is calculated with current gt time
as reference.

If engine_id is invalid, busyness is equal to accumulated total ticks.

Since KMD (CPU) retrieves busyness data from 2 sources - GPU and GuC, a
potential race was highlighted in an earlier review that can lead to
double accounting of busyness. While the solution to this is a wip,
busyness is still usable for platforms running GuC submission.

v2: (Tvrtko)
- Use COPS_RUNTIME_ACTIVE_TOTAL
- Add code comment for the race
- Undo local variables initializations

Signed-off-by: John Harrison 
Signed-off-by: Umesh Nerlige Ramappa 
---
 drivers/gpu/drm/i915/gt/intel_context.c   | 12 +++-
 drivers/gpu/drm/i915/gt/intel_context.h   |  6 +-
 drivers/gpu/drm/i915/gt/intel_context_types.h |  6 ++
 drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |  5 ++
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 63 ++-
 drivers/gpu/drm/i915/i915_drm_client.c|  6 +-
 6 files changed, 87 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 4070cb5711d8..4a84146710e0 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -576,16 +576,24 @@ void intel_context_bind_parent_child(struct intel_context 
*parent,
child->parallel.parent = parent;
 }
 
-u64 intel_context_get_total_runtime_ns(const struct intel_context *ce)
+u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
 {
u64 total, active;
 
+   if (ce->ops->update_stats)
+   ce->ops->update_stats(ce);
+
total = ce->stats.runtime.total;
if (ce->ops->flags & COPS_RUNTIME_CYCLES)
total *= ce->engine->gt->clock_period_ns;
 
active = READ_ONCE(ce->stats.active);
-   if (active)
+   /*
+* When COPS_RUNTIME_ACTIVE_TOTAL is set for ce->cops, the backend
+* already provides the total active time of the context, so skip this
+* calculation when this flag is set.
+*/
+   if (active && !(ce->ops->flags & COPS_RUNTIME_ACTIVE_TOTAL))
active = intel_context_clock() - active;
 
return total + active;
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h 
b/drivers/gpu/drm/i915/gt/intel_context.h
index b7d3214d2cdd..5fc7c19ab29b 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -56,7 +56,7 @@ static inline bool intel_context_is_parent(struct 
intel_context *ce)
return !!ce->parallel.number_children;
 }
 
-static inline bool intel_context_is_pinned(struct intel_context *ce);
+static inline bool intel_context_is_pinned(const struct intel_context *ce);
 
 static inline struct intel_context *
 intel_context_to_parent(struct intel_context *ce)
@@ -116,7 +116,7 @@ static inline int intel_context_lock_pinned(struct 
intel_context *ce)
  * Returns: true if the context is currently pinned for use by the GPU.
  */
 static inline bool
-intel_context_is_pinned(struct intel_context *ce)
+intel_context_is_pinned(const struct intel_context *ce)
 {
return atomic_read(>pin_count);
 }
@@ -351,7 +351,7 @@ intel_context_clear_nopreempt(struct intel_context *ce)
clear_bit(CONTEXT_NOPREEMPT, >flags);
 }
 
-u64 intel_context_get_total_runtime_ns(const struct intel_context *ce);
+u64 intel_context_get_total_runtime_ns(struct intel_context *ce);
 u64 intel_context_get_avg_runtime_ns(struct intel_context *ce);
 
 static inline u64 intel_context_clock(void)
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 09f82545789f..797bb4242c18 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -38,6 +38,9 @@ struct intel_context_ops {
 #define COPS_RUNTIME_CYCLES_BIT 1
 #define COPS_RUNTIME_CYCLES BIT(COPS_RUNTIME_CYCLES_BIT)
 
+#define COPS_RUNTIME_ACTIVE_TOTAL_BIT 2
+#define COPS_RUNTIME_ACTIVE_TOTAL BIT(COPS_RUNTIME_ACTIVE_TOTAL_BIT)
+
int (*alloc)(struct intel_context *ce);
 
void (*ban)(struct intel_context *ce, struct i915_request *rq);
@@ -55,6 +58,8 @@ struct intel_context_ops {
 
void (*sched_disable)(struct intel_context *ce);
 
+   void (*update_stats)(struct intel_context *ce);
+
void (*reset)(struct intel_context *ce);
void (*destroy)(struct kref *kref);
 
@@ -146,6 +151,7 @@ struct intel_context {
struct ewma_runtime avg;
u64 total;
u32 last;
+ 

[Intel-gfx] [PATCH] i915/pmu: Wire GuC backend to per-client busyness

2022-06-13 Thread Nerlige Ramappa, Umesh
From: John Harrison 

GuC provides engine_id and last_switch_in ticks for an active context in the
pphwsp. The context image provides a 32 bit total ticks which is the accumulated
by the context (a.k.a. context[CTX_TIMESTAMP]). This information is used to
calculate the context busyness as follows:

If the engine_id is valid, then busyness is the sum of accumulated total ticks
and active ticks. Active ticks is calculated with current gt time as reference.

If engine_id is invalid, busyness is equal to accumulated total ticks.

Since KMD (CPU) retrieves busyness data from 2 sources - GPU and GuC, a
potential race was highlighted in an earlier review that can lead to double
accounting of busyness. While the solution to this is a wip, busyness is still
usable for platforms running GuC submission.

Signed-off-by: John Harrison 
Signed-off-by: Umesh Nerlige Ramappa 
---
 drivers/gpu/drm/i915/gt/intel_context.c   | 11 +++-
 drivers/gpu/drm/i915/gt/intel_context.h   |  6 +-
 drivers/gpu/drm/i915/gt/intel_context_types.h |  3 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |  5 ++
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 55 ++-
 drivers/gpu/drm/i915/i915_drm_client.c|  6 +-
 6 files changed, 75 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 4070cb5711d8..a49f313db911 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -576,16 +576,23 @@ void intel_context_bind_parent_child(struct intel_context 
*parent,
child->parallel.parent = parent;
 }
 
-u64 intel_context_get_total_runtime_ns(const struct intel_context *ce)
+u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
 {
u64 total, active;
 
+   if (ce->ops->update_stats)
+   ce->ops->update_stats(ce);
+
total = ce->stats.runtime.total;
if (ce->ops->flags & COPS_RUNTIME_CYCLES)
total *= ce->engine->gt->clock_period_ns;
 
active = READ_ONCE(ce->stats.active);
-   if (active)
+   /*
+* GuC backend returns the actual time the context was active, so skip
+* the calculation here for GuC.
+*/
+   if (active && !intel_engine_uses_guc(ce->engine))
active = intel_context_clock() - active;
 
return total + active;
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h 
b/drivers/gpu/drm/i915/gt/intel_context.h
index b7d3214d2cdd..5fc7c19ab29b 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -56,7 +56,7 @@ static inline bool intel_context_is_parent(struct 
intel_context *ce)
return !!ce->parallel.number_children;
 }
 
-static inline bool intel_context_is_pinned(struct intel_context *ce);
+static inline bool intel_context_is_pinned(const struct intel_context *ce);
 
 static inline struct intel_context *
 intel_context_to_parent(struct intel_context *ce)
@@ -116,7 +116,7 @@ static inline int intel_context_lock_pinned(struct 
intel_context *ce)
  * Returns: true if the context is currently pinned for use by the GPU.
  */
 static inline bool
-intel_context_is_pinned(struct intel_context *ce)
+intel_context_is_pinned(const struct intel_context *ce)
 {
return atomic_read(>pin_count);
 }
@@ -351,7 +351,7 @@ intel_context_clear_nopreempt(struct intel_context *ce)
clear_bit(CONTEXT_NOPREEMPT, >flags);
 }
 
-u64 intel_context_get_total_runtime_ns(const struct intel_context *ce);
+u64 intel_context_get_total_runtime_ns(struct intel_context *ce);
 u64 intel_context_get_avg_runtime_ns(struct intel_context *ce);
 
 static inline u64 intel_context_clock(void)
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 09f82545789f..0a3290c99a31 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -55,6 +55,8 @@ struct intel_context_ops {
 
void (*sched_disable)(struct intel_context *ce);
 
+   void (*update_stats)(struct intel_context *ce);
+
void (*reset)(struct intel_context *ce);
void (*destroy)(struct kref *kref);
 
@@ -146,6 +148,7 @@ struct intel_context {
struct ewma_runtime avg;
u64 total;
u32 last;
+   u64 start_gt_clk;
I915_SELFTEST_DECLARE(u32 num_underflow);
I915_SELFTEST_DECLARE(u32 max_underflow);
} runtime;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index b3c9a9327f76..6231ad03e4eb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -196,6 +196,11 @@ static inline u8 guc_class_to_engine_class(u8 guc_class)
return guc_class_engine_class_map[guc_class];
 }
 
+/* Per context 

[Intel-gfx] [PATCH] drm/i915/reset: Add additional steps for Wa_22011802037 for execlist backend

2022-06-09 Thread Nerlige Ramappa, Umesh
From: Umesh Nerlige Ramappa 

For execlists backend, current implementation of Wa_22011802037 is to
stop the CS before doing a reset of the engine. This WA was further
extended to wait for any pending MI FORCE WAKEUPs before issuing a
reset. Add the extended steps in the execlist path of reset.

In addition, extend the WA to gen11.

v2: (Tvrtko)
- Clarify comments, commit message, fix typos
- Use IS_GRAPHICS_VER for gen 11/12 checks

v3: (Daneile)
- Drop changes to intel_ring_submission since WA does not apply to it
- Log an error if MSG IDLE is not defined for an engine

Signed-off-by: Umesh Nerlige Ramappa 
Fixes: f6aa0d713c88 ("drm/i915: Add Wa_22011802037 force cs halt")
Acked-by: Tvrtko Ursulin 
Reviewed-by: Daniele Ceraolo Spurio 
---
 drivers/gpu/drm/i915/gt/intel_engine.h|  2 +
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 88 ++-
 .../drm/i915/gt/intel_execlists_submission.c  |  7 ++
 drivers/gpu/drm/i915/gt/uc/intel_guc.c|  4 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 81 ++---
 5 files changed, 103 insertions(+), 79 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h 
b/drivers/gpu/drm/i915/gt/intel_engine.h
index 1431f1e9dbee..04e435bce79b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -201,6 +201,8 @@ int intel_ring_submission_setup(struct intel_engine_cs 
*engine);
 int intel_engine_stop_cs(struct intel_engine_cs *engine);
 void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
 
+void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine);
+
 void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);
 
 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index f0acf8518a51..b3dc32fe6c51 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1375,10 +1375,10 @@ static int __intel_engine_stop_cs(struct 
intel_engine_cs *engine,
intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
 
/*
-* Wa_22011802037 : gen12, Prior to doing a reset, ensure CS is
+* Wa_22011802037 : gen11, gen12, Prior to doing a reset, ensure CS is
 * stopped, set ring stop bit and prefetch disable bit to halt CS
 */
-   if (GRAPHICS_VER(engine->i915) == 12)
+   if (IS_GRAPHICS_VER(engine->i915, 11, 12))
intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base),
  
_MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE));
 
@@ -1401,6 +1401,18 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine)
return -ENODEV;
 
ENGINE_TRACE(engine, "\n");
+   /*
+* TODO: Find out why occasionally stopping the CS times out. Seen
+* especially with gem_eio tests.
+*
+* Occasionally trying to stop the cs times out, but does not adversely
+* affect functionality. The timeout is set as a config parameter that
+* defaults to 100ms. In most cases the follow up operation is to wait
+* for pending MI_FORCE_WAKES. The assumption is that this timeout is
+* sufficient for any pending MI_FORCEWAKEs to complete. Once root
+* caused, the caller must check and handle the return from this
+* function.
+*/
if (__intel_engine_stop_cs(engine, 1000, stop_timeout(engine))) {
ENGINE_TRACE(engine,
 "timed out on STOP_RING -> IDLE; HEAD:%04x, 
TAIL:%04x\n",
@@ -1427,6 +1439,78 @@ void intel_engine_cancel_stop_cs(struct intel_engine_cs 
*engine)
ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
 }
 
+static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine)
+{
+   static const i915_reg_t _reg[I915_NUM_ENGINES] = {
+   [RCS0] = MSG_IDLE_CS,
+   [BCS0] = MSG_IDLE_BCS,
+   [VCS0] = MSG_IDLE_VCS0,
+   [VCS1] = MSG_IDLE_VCS1,
+   [VCS2] = MSG_IDLE_VCS2,
+   [VCS3] = MSG_IDLE_VCS3,
+   [VCS4] = MSG_IDLE_VCS4,
+   [VCS5] = MSG_IDLE_VCS5,
+   [VCS6] = MSG_IDLE_VCS6,
+   [VCS7] = MSG_IDLE_VCS7,
+   [VECS0] = MSG_IDLE_VECS0,
+   [VECS1] = MSG_IDLE_VECS1,
+   [VECS2] = MSG_IDLE_VECS2,
+   [VECS3] = MSG_IDLE_VECS3,
+   [CCS0] = MSG_IDLE_CS,
+   [CCS1] = MSG_IDLE_CS,
+   [CCS2] = MSG_IDLE_CS,
+   [CCS3] = MSG_IDLE_CS,
+   };
+   u32 val;
+
+   if (!_reg[engine->id].reg) {
+   drm_err(>i915->drm,
+   "MSG IDLE undefined for engine id %u\n", engine->id);
+   return 0;
+   }
+
+   val = intel_uncore_read(engine->uncore, _reg[engine->id]);

[Intel-gfx] [PATCH] For execlists backend, current implementation of Wa_22011802037 is to stop the CS before doing a reset of the engine. This WA was further extended to wait for any pending MI FORCE

2022-06-09 Thread Nerlige Ramappa, Umesh
From: Umesh Nerlige Ramappa 

In addition, extend the WA to gen11.

v2: (Tvrtko)
- Clarify comments, commit message, fix typos
- Use IS_GRAPHICS_VER for gen 11/12 checks

v3: (Daneile)
- Drop changes to intel_ring_submission since WA does not apply to it
- Log an error if MSG IDLE is not defined for an engine

Signed-off-by: Umesh Nerlige Ramappa 
Fixes: f6aa0d713c88 ("drm/i915: Add Wa_22011802037 force cs halt")
Acked-by: Tvrtko Ursulin 
Reviewed-by: Daniele Ceraolo Spurio 
---
 drivers/gpu/drm/i915/gt/intel_engine.h|  2 +
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 88 ++-
 .../drm/i915/gt/intel_execlists_submission.c  |  7 ++
 drivers/gpu/drm/i915/gt/uc/intel_guc.c|  4 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 81 ++---
 5 files changed, 103 insertions(+), 79 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h 
b/drivers/gpu/drm/i915/gt/intel_engine.h
index 1431f1e9dbee..04e435bce79b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -201,6 +201,8 @@ int intel_ring_submission_setup(struct intel_engine_cs 
*engine);
 int intel_engine_stop_cs(struct intel_engine_cs *engine);
 void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
 
+void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine);
+
 void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);
 
 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index f0acf8518a51..b3dc32fe6c51 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1375,10 +1375,10 @@ static int __intel_engine_stop_cs(struct 
intel_engine_cs *engine,
intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
 
/*
-* Wa_22011802037 : gen12, Prior to doing a reset, ensure CS is
+* Wa_22011802037 : gen11, gen12, Prior to doing a reset, ensure CS is
 * stopped, set ring stop bit and prefetch disable bit to halt CS
 */
-   if (GRAPHICS_VER(engine->i915) == 12)
+   if (IS_GRAPHICS_VER(engine->i915, 11, 12))
intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base),
  
_MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE));
 
@@ -1401,6 +1401,18 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine)
return -ENODEV;
 
ENGINE_TRACE(engine, "\n");
+   /*
+* TODO: Find out why occasionally stopping the CS times out. Seen
+* especially with gem_eio tests.
+*
+* Occasionally trying to stop the cs times out, but does not adversely
+* affect functionality. The timeout is set as a config parameter that
+* defaults to 100ms. In most cases the follow up operation is to wait
+* for pending MI_FORCE_WAKES. The assumption is that this timeout is
+* sufficient for any pending MI_FORCEWAKEs to complete. Once root
+* caused, the caller must check and handle the return from this
+* function.
+*/
if (__intel_engine_stop_cs(engine, 1000, stop_timeout(engine))) {
ENGINE_TRACE(engine,
 "timed out on STOP_RING -> IDLE; HEAD:%04x, 
TAIL:%04x\n",
@@ -1427,6 +1439,78 @@ void intel_engine_cancel_stop_cs(struct intel_engine_cs 
*engine)
ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
 }
 
+static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine)
+{
+   static const i915_reg_t _reg[I915_NUM_ENGINES] = {
+   [RCS0] = MSG_IDLE_CS,
+   [BCS0] = MSG_IDLE_BCS,
+   [VCS0] = MSG_IDLE_VCS0,
+   [VCS1] = MSG_IDLE_VCS1,
+   [VCS2] = MSG_IDLE_VCS2,
+   [VCS3] = MSG_IDLE_VCS3,
+   [VCS4] = MSG_IDLE_VCS4,
+   [VCS5] = MSG_IDLE_VCS5,
+   [VCS6] = MSG_IDLE_VCS6,
+   [VCS7] = MSG_IDLE_VCS7,
+   [VECS0] = MSG_IDLE_VECS0,
+   [VECS1] = MSG_IDLE_VECS1,
+   [VECS2] = MSG_IDLE_VECS2,
+   [VECS3] = MSG_IDLE_VECS3,
+   [CCS0] = MSG_IDLE_CS,
+   [CCS1] = MSG_IDLE_CS,
+   [CCS2] = MSG_IDLE_CS,
+   [CCS3] = MSG_IDLE_CS,
+   };
+   u32 val;
+
+   if (!_reg[engine->id].reg) {
+   drm_err(>i915->drm,
+   "MSG IDLE undefined for engine id %u\n", engine->id);
+   return 0;
+   }
+
+   val = intel_uncore_read(engine->uncore, _reg[engine->id]);
+
+   /* bits[29:25] & bits[13:9] >> shift */
+   return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT;
+}
+
+static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask)
+{
+   int ret;
+
+   /* Ensure GPM receives fw 

[Intel-gfx] [PATCH] drm/i915/reset: Add additional steps for Wa_22011802037 for execlist backend

2022-05-10 Thread Nerlige Ramappa, Umesh
From: Umesh Nerlige Ramappa 

For execlists backend, current implementation of Wa_22011802037 is to
stop the CS before doing a reset of the engine. This WA was further
extended to wait for any pending MI FORCE WAKEUPs before issuing a
reset. Add the extended steps in the execlist path of reset.

In addition, extend the WA to gen11.

v2: (Tvrtko)
- Clarify comments, commit message, fix typos
- Use IS_GRAPHICS_VER for gen 11/12 checks

Signed-off-by: Umesh Nerlige Ramappa 
Fixes: f6aa0d713c88 ("drm/i915: Add Wa_22011802037 force cs halt")
---
 drivers/gpu/drm/i915/gt/intel_engine.h|  2 +
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 85 ++-
 .../drm/i915/gt/intel_execlists_submission.c  |  7 ++
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  7 ++
 drivers/gpu/drm/i915/gt/uc/intel_guc.c|  4 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 81 ++
 6 files changed, 107 insertions(+), 79 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h 
b/drivers/gpu/drm/i915/gt/intel_engine.h
index 1431f1e9dbee..04e435bce79b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -201,6 +201,8 @@ int intel_ring_submission_setup(struct intel_engine_cs 
*engine);
 int intel_engine_stop_cs(struct intel_engine_cs *engine);
 void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
 
+void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine);
+
 void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);
 
 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 14c6ddbbfde8..9943cf9655b2 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1282,10 +1282,10 @@ static int __intel_engine_stop_cs(struct 
intel_engine_cs *engine,
intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
 
/*
-* Wa_22011802037 : gen12, Prior to doing a reset, ensure CS is
+* Wa_22011802037 : gen11, gen12, Prior to doing a reset, ensure CS is
 * stopped, set ring stop bit and prefetch disable bit to halt CS
 */
-   if (GRAPHICS_VER(engine->i915) == 12)
+   if (IS_GRAPHICS_VER(engine->i915, 11, 12))
intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base),
  
_MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE));
 
@@ -1308,6 +1308,18 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine)
return -ENODEV;
 
ENGINE_TRACE(engine, "\n");
+   /*
+* TODO: Find out why occasionally stopping the CS times out. Seen
+* especially with gem_eio tests.
+*
+* Occasionally trying to stop the cs times out, but does not adversely
+* affect functionality. The timeout is set as a config parameter that
+* defaults to 100ms. In most cases the follow up operation is to wait
+* for pending MI_FORCE_WAKES. The assumption is that this timeout is
+* sufficient for any pending MI_FORCEWAKEs to complete. Once root
+* caused, the caller must check and handle the return from this
+* function.
+*/
if (__intel_engine_stop_cs(engine, 1000, stop_timeout(engine))) {
ENGINE_TRACE(engine,
 "timed out on STOP_RING -> IDLE; HEAD:%04x, 
TAIL:%04x\n",
@@ -1334,6 +1346,75 @@ void intel_engine_cancel_stop_cs(struct intel_engine_cs 
*engine)
ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
 }
 
+static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine)
+{
+   static const i915_reg_t _reg[I915_NUM_ENGINES] = {
+   [RCS0] = MSG_IDLE_CS,
+   [BCS0] = MSG_IDLE_BCS,
+   [VCS0] = MSG_IDLE_VCS0,
+   [VCS1] = MSG_IDLE_VCS1,
+   [VCS2] = MSG_IDLE_VCS2,
+   [VCS3] = MSG_IDLE_VCS3,
+   [VCS4] = MSG_IDLE_VCS4,
+   [VCS5] = MSG_IDLE_VCS5,
+   [VCS6] = MSG_IDLE_VCS6,
+   [VCS7] = MSG_IDLE_VCS7,
+   [VECS0] = MSG_IDLE_VECS0,
+   [VECS1] = MSG_IDLE_VECS1,
+   [VECS2] = MSG_IDLE_VECS2,
+   [VECS3] = MSG_IDLE_VECS3,
+   [CCS0] = MSG_IDLE_CS,
+   [CCS1] = MSG_IDLE_CS,
+   [CCS2] = MSG_IDLE_CS,
+   [CCS3] = MSG_IDLE_CS,
+   };
+   u32 val;
+
+   if (!_reg[engine->id].reg)
+   return 0;
+
+   val = intel_uncore_read(engine->uncore, _reg[engine->id]);
+
+   /* bits[29:25] & bits[13:9] >> shift */
+   return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT;
+}
+
+static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask)
+{
+   int ret;
+
+   /* Ensure GPM receives fw up/down after