[Intel-gfx] [PATCH 12/27] drm/i915/selftests: Add initial GuC selftest for scrubbing lost G2H
While debugging an issue with full GT resets I went down a rabbit hole thinking the scrubbing of lost G2H wasn't working correctly. This proved to be incorrect as this was working just fine but this chase inspired me to write a selftest to prove that this works. This simple selftest injects errors dropping various G2H and then issues a full GT reset proving that the scrubbing of these G2H doesn't blow up. v2: (Daniel Vetter) - Use ifdef instead of macros for selftests v3: (Checkpatch) - A space after 'switch' statement v4: (Daniele) - A comment saying GT won't idle if G2H are lost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Brost --- drivers/gpu/drm/i915/gt/intel_context_types.h | 18 +++ .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 25 drivers/gpu/drm/i915/gt/uc/selftest_guc.c | 127 ++ .../drm/i915/selftests/i915_live_selftests.h | 1 + .../i915/selftests/intel_scheduler_helpers.c | 12 ++ .../i915/selftests/intel_scheduler_helpers.h | 2 + 6 files changed, 185 insertions(+) create mode 100644 drivers/gpu/drm/i915/gt/uc/selftest_guc.c diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index e54351a170e2..3a73f3117873 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -198,6 +198,24 @@ struct intel_context { */ u8 guc_prio; u32 guc_prio_count[GUC_CLIENT_PRIORITY_NUM]; + +#ifdef CONFIG_DRM_I915_SELFTEST + /** +* @drop_schedule_enable: Force drop of schedule enable G2H for selftest +*/ + bool drop_schedule_enable; + + /** +* @drop_schedule_disable: Force drop of schedule disable G2H for +* selftest +*/ + bool drop_schedule_disable; + + /** +* @drop_deregister: Force drop of deregister G2H for selftest +*/ + bool drop_deregister; +#endif }; #endif /* __INTEL_CONTEXT_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 56f11accd6cc..5844bb954922 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2645,6 +2645,13 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, trace_intel_context_deregister_done(ce); +#ifdef CONFIG_DRM_I915_SELFTEST + if (unlikely(ce->drop_deregister)) { + ce->drop_deregister = false; + return 0; + } +#endif + if (context_wait_for_deregister_to_register(ce)) { struct intel_runtime_pm *runtime_pm = >engine->gt->i915->runtime_pm; @@ -2699,10 +2706,24 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, trace_intel_context_sched_done(ce); if (context_pending_enable(ce)) { +#ifdef CONFIG_DRM_I915_SELFTEST + if (unlikely(ce->drop_schedule_enable)) { + ce->drop_schedule_enable = false; + return 0; + } +#endif + clr_context_pending_enable(ce); } else if (context_pending_disable(ce)) { bool banned; +#ifdef CONFIG_DRM_I915_SELFTEST + if (unlikely(ce->drop_schedule_disable)) { + ce->drop_schedule_disable = false; + return 0; + } +#endif + /* * Unpin must be done before __guc_signal_context_fence, * otherwise a race exists between the requests getting @@ -3079,3 +3100,7 @@ bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) return false; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_guc.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c new file mode 100644 index ..fb0e4a7bd8ca --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright �� 2021 Intel Corporation + */ + +#include "selftests/intel_scheduler_helpers.h" + +static struct i915_request *nop_user_request(struct intel_context *ce, +struct i915_request *from) +{ + struct i915_request *rq; + int ret; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return rq; + + if (from) { + ret = i915_sw_fence_await_dma_fence(>submit, + >fence, 0, + I915_FENCE_GFP); + if (ret < 0) { + i915_request_put(rq); + return ERR_PTR(ret); + } + } + + i915_request_get(rq); + i915_request_add(rq); + + return rq; +} + +static int
Re: [Intel-gfx] [PATCH 12/27] drm/i915/selftests: Add initial GuC selftest for scrubbing lost G2H
On 8/18/2021 11:16 PM, Matthew Brost wrote: While debugging an issue with full GT resets I went down a rabbit hole thinking the scrubbing of lost G2H wasn't working correctly. This proved to be incorrect as this was working just fine but this chase inspired me to write a selftest to prove that this works. This simple selftest injects errors dropping various G2H and then issues a full GT reset proving that the scrubbing of these G2H doesn't blow up. v2: (Daniel Vetter) - Use ifdef instead of macros for selftests v3: (Checkpatch) - A space after 'switch' statement Signed-off-by: Matthew Brost --- drivers/gpu/drm/i915/gt/intel_context_types.h | 18 +++ .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 25 drivers/gpu/drm/i915/gt/uc/selftest_guc.c | 126 ++ .../drm/i915/selftests/i915_live_selftests.h | 1 + .../i915/selftests/intel_scheduler_helpers.c | 12 ++ .../i915/selftests/intel_scheduler_helpers.h | 2 + 6 files changed, 184 insertions(+) create mode 100644 drivers/gpu/drm/i915/gt/uc/selftest_guc.c diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index e54351a170e2..3a73f3117873 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -198,6 +198,24 @@ struct intel_context { */ u8 guc_prio; u32 guc_prio_count[GUC_CLIENT_PRIORITY_NUM]; + +#ifdef CONFIG_DRM_I915_SELFTEST + /** +* @drop_schedule_enable: Force drop of schedule enable G2H for selftest +*/ + bool drop_schedule_enable; + + /** +* @drop_schedule_disable: Force drop of schedule disable G2H for +* selftest +*/ + bool drop_schedule_disable; + + /** +* @drop_deregister: Force drop of deregister G2H for selftest +*/ + bool drop_deregister; +#endif }; #endif /* __INTEL_CONTEXT_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index e53a4ef7d442..e0e85e4ad512 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2635,6 +2635,13 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, trace_intel_context_deregister_done(ce); +#ifdef CONFIG_DRM_I915_SELFTEST + if (unlikely(ce->drop_deregister)) { + ce->drop_deregister = false; + return 0; + } +#endif + if (context_wait_for_deregister_to_register(ce)) { struct intel_runtime_pm *runtime_pm = >engine->gt->i915->runtime_pm; @@ -2689,10 +2696,24 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, trace_intel_context_sched_done(ce); if (context_pending_enable(ce)) { +#ifdef CONFIG_DRM_I915_SELFTEST + if (unlikely(ce->drop_schedule_enable)) { + ce->drop_schedule_enable = false; + return 0; + } +#endif + clr_context_pending_enable(ce); } else if (context_pending_disable(ce)) { bool banned; +#ifdef CONFIG_DRM_I915_SELFTEST + if (unlikely(ce->drop_schedule_disable)) { + ce->drop_schedule_disable = false; + return 0; + } +#endif + /* * Unpin must be done before __guc_signal_context_fence, * otherwise a race exists between the requests getting @@ -3069,3 +3090,7 @@ bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) return false; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_guc.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c new file mode 100644 index ..264e2f705c17 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright �� 2021 Intel Corporation + */ + +#include "selftests/intel_scheduler_helpers.h" + +static struct i915_request *nop_user_request(struct intel_context *ce, +struct i915_request *from) +{ + struct i915_request *rq; + int ret; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return rq; + + if (from) { + ret = i915_sw_fence_await_dma_fence(>submit, + >fence, 0, + I915_FENCE_GFP); + if (ret < 0) { + i915_request_put(rq); + return ERR_PTR(ret); + } + } + + i915_request_get(rq); + i915_request_add(rq); + + return rq; +} + +static int intel_guc_scrub_ctbs(void *arg) +{ + struct
[Intel-gfx] [PATCH 12/27] drm/i915/selftests: Add initial GuC selftest for scrubbing lost G2H
While debugging an issue with full GT resets I went down a rabbit hole thinking the scrubbing of lost G2H wasn't working correctly. This proved to be incorrect as this was working just fine but this chase inspired me to write a selftest to prove that this works. This simple selftest injects errors dropping various G2H and then issues a full GT reset proving that the scrubbing of these G2H doesn't blow up. v2: (Daniel Vetter) - Use ifdef instead of macros for selftests v3: (Checkpatch) - A space after 'switch' statement Signed-off-by: Matthew Brost --- drivers/gpu/drm/i915/gt/intel_context_types.h | 18 +++ .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 25 drivers/gpu/drm/i915/gt/uc/selftest_guc.c | 126 ++ .../drm/i915/selftests/i915_live_selftests.h | 1 + .../i915/selftests/intel_scheduler_helpers.c | 12 ++ .../i915/selftests/intel_scheduler_helpers.h | 2 + 6 files changed, 184 insertions(+) create mode 100644 drivers/gpu/drm/i915/gt/uc/selftest_guc.c diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index e54351a170e2..3a73f3117873 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -198,6 +198,24 @@ struct intel_context { */ u8 guc_prio; u32 guc_prio_count[GUC_CLIENT_PRIORITY_NUM]; + +#ifdef CONFIG_DRM_I915_SELFTEST + /** +* @drop_schedule_enable: Force drop of schedule enable G2H for selftest +*/ + bool drop_schedule_enable; + + /** +* @drop_schedule_disable: Force drop of schedule disable G2H for +* selftest +*/ + bool drop_schedule_disable; + + /** +* @drop_deregister: Force drop of deregister G2H for selftest +*/ + bool drop_deregister; +#endif }; #endif /* __INTEL_CONTEXT_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index e53a4ef7d442..e0e85e4ad512 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2635,6 +2635,13 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, trace_intel_context_deregister_done(ce); +#ifdef CONFIG_DRM_I915_SELFTEST + if (unlikely(ce->drop_deregister)) { + ce->drop_deregister = false; + return 0; + } +#endif + if (context_wait_for_deregister_to_register(ce)) { struct intel_runtime_pm *runtime_pm = >engine->gt->i915->runtime_pm; @@ -2689,10 +2696,24 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, trace_intel_context_sched_done(ce); if (context_pending_enable(ce)) { +#ifdef CONFIG_DRM_I915_SELFTEST + if (unlikely(ce->drop_schedule_enable)) { + ce->drop_schedule_enable = false; + return 0; + } +#endif + clr_context_pending_enable(ce); } else if (context_pending_disable(ce)) { bool banned; +#ifdef CONFIG_DRM_I915_SELFTEST + if (unlikely(ce->drop_schedule_disable)) { + ce->drop_schedule_disable = false; + return 0; + } +#endif + /* * Unpin must be done before __guc_signal_context_fence, * otherwise a race exists between the requests getting @@ -3069,3 +3090,7 @@ bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) return false; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_guc.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c new file mode 100644 index ..264e2f705c17 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright �� 2021 Intel Corporation + */ + +#include "selftests/intel_scheduler_helpers.h" + +static struct i915_request *nop_user_request(struct intel_context *ce, +struct i915_request *from) +{ + struct i915_request *rq; + int ret; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return rq; + + if (from) { + ret = i915_sw_fence_await_dma_fence(>submit, + >fence, 0, + I915_FENCE_GFP); + if (ret < 0) { + i915_request_put(rq); + return ERR_PTR(ret); + } + } + + i915_request_get(rq); + i915_request_add(rq); + + return rq; +} + +static int intel_guc_scrub_ctbs(void *arg) +{ + struct intel_gt *gt = arg; + int ret = 0; + int i; +