Re: [PATCH] drm/i915/gt: Delete the live_hearbeat_fast selftest
Hi Andi, On 10/06/2024 13:10, Andi Shyti wrote: Hi Tvrtko, On Mon, Jun 10, 2024 at 12:42:31PM +0100, Tvrtko Ursulin wrote: On 03/06/2024 17:20, Niemiec, Krzysztof wrote: The test is trying to push the heartbeat frequency to the limit, which might sometimes fail. Such a failure does not provide valuable information, because it does not indicate that there is something necessarily wrong with either the driver or the hardware. Remove the test to prevent random, unnecessary failures from appearing in CI. Suggested-by: Chris Wilson Signed-off-by: Niemiec, Krzysztof Just a note in passing that comma in the email display name is I believe not RFC 5322 compliant and there might be tools which barf on it(*). If you can put it in double quotes, it would be advisable. yes, we discussed it with Krzysztof, I noticed it right after I submitted the code. Regards, Tvrtko *) Such as my internal pull request generator which uses CPAN's Email::Address::XS. :) If we are in time, we can fix it as Krzysztof Niemiec Sorry about this oversight, It's not a big deal (it isn't the first and only occurence) and no need to do anything more than correct the display name going forward. Regards, Tvrtko
Re: [PATCH] drm/i915/gt: Delete the live_hearbeat_fast selftest
Hi Tvrtko, On Mon, Jun 10, 2024 at 12:42:31PM +0100, Tvrtko Ursulin wrote: > On 03/06/2024 17:20, Niemiec, Krzysztof wrote: > > The test is trying to push the heartbeat frequency to the limit, which > > might sometimes fail. Such a failure does not provide valuable > > information, because it does not indicate that there is something > > necessarily wrong with either the driver or the hardware. > > > > Remove the test to prevent random, unnecessary failures from appearing > > in CI. > > > > Suggested-by: Chris Wilson > > Signed-off-by: Niemiec, Krzysztof > > Just a note in passing that comma in the email display name is I believe not > RFC 5322 compliant and there might be tools which barf on it(*). If you can > put it in double quotes, it would be advisable. yes, we discussed it with Krzysztof, I noticed it right after I submitted the code. > Regards, > > Tvrtko > > *) Such as my internal pull request generator which uses CPAN's > Email::Address::XS. :) If we are in time, we can fix it as Krzysztof Niemiec Sorry about this oversight, Andi
Re: [PATCH] drm/i915/gt: Delete the live_hearbeat_fast selftest
On 03/06/2024 17:20, Niemiec, Krzysztof wrote: The test is trying to push the heartbeat frequency to the limit, which might sometimes fail. Such a failure does not provide valuable information, because it does not indicate that there is something necessarily wrong with either the driver or the hardware. Remove the test to prevent random, unnecessary failures from appearing in CI. Suggested-by: Chris Wilson Signed-off-by: Niemiec, Krzysztof Just a note in passing that comma in the email display name is I believe not RFC 5322 compliant and there might be tools which barf on it(*). If you can put it in double quotes, it would be advisable. Regards, Tvrtko *) Such as my internal pull request generator which uses CPAN's Email::Address::XS. :) --- .../drm/i915/gt/selftest_engine_heartbeat.c | 110 -- 1 file changed, 110 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index ef014df4c4fc..9e4f0e417b3b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -193,115 +193,6 @@ static int live_idle_pulse(void *arg) return err; } -static int cmp_u32(const void *_a, const void *_b) -{ - const u32 *a = _a, *b = _b; - - return *a - *b; -} - -static int __live_heartbeat_fast(struct intel_engine_cs *engine) -{ - const unsigned int error_threshold = max(2u, jiffies_to_usecs(6)); - struct intel_context *ce; - struct i915_request *rq; - ktime_t t0, t1; - u32 times[5]; - int err; - int i; - - ce = intel_context_create(engine); - if (IS_ERR(ce)) - return PTR_ERR(ce); - - intel_engine_pm_get(engine); - - err = intel_engine_set_heartbeat(engine, 1); - if (err) - goto err_pm; - - for (i = 0; i < ARRAY_SIZE(times); i++) { - do { - /* Manufacture a tick */ - intel_engine_park_heartbeat(engine); - GEM_BUG_ON(engine->heartbeat.systole); - engine->serial++; /* pretend we are not idle! */ - intel_engine_unpark_heartbeat(engine); - - flush_delayed_work(>heartbeat.work); - if (!delayed_work_pending(>heartbeat.work)) { - pr_err("%s: heartbeat %d did not start\n", - engine->name, i); - err = -EINVAL; - goto err_pm; - } - - rcu_read_lock(); - rq = READ_ONCE(engine->heartbeat.systole); - if (rq) - rq = i915_request_get_rcu(rq); - rcu_read_unlock(); - } while (!rq); - - t0 = ktime_get(); - while (rq == READ_ONCE(engine->heartbeat.systole)) - yield(); /* work is on the local cpu! */ - t1 = ktime_get(); - - i915_request_put(rq); - times[i] = ktime_us_delta(t1, t0); - } - - sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL); - - pr_info("%s: Heartbeat delay: %uus [%u, %u]\n", - engine->name, - times[ARRAY_SIZE(times) / 2], - times[0], - times[ARRAY_SIZE(times) - 1]); - - /* -* Ideally, the upper bound on min work delay would be something like -* 2 * 2 (worst), +1 for scheduling, +1 for slack. In practice, we -* are, even with system_wq_highpri, at the mercy of the CPU scheduler -* and may be stuck behind some slow work for many millisecond. Such -* as our very own display workers. -*/ - if (times[ARRAY_SIZE(times) / 2] > error_threshold) { - pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n", - engine->name, - times[ARRAY_SIZE(times) / 2], - error_threshold); - err = -EINVAL; - } - - reset_heartbeat(engine); -err_pm: - intel_engine_pm_put(engine); - intel_context_put(ce); - return err; -} - -static int live_heartbeat_fast(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = 0; - - /* Check that the heartbeat ticks at the desired rate. */ - if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) - return 0; - - for_each_engine(engine, gt, id) { - err = __live_heartbeat_fast(engine); - if (err) - break; - } - - return err; -} - static int __live_heartbeat_off(struct intel_engine_cs *engine) { int err; @@ -372,7 +263,6 @@ int
Re: [PATCH] drm/i915/gt: Delete the live_hearbeat_fast selftest
Hi Krzysztof, On Mon, Jun 03, 2024 at 06:20:22PM +0200, Niemiec, Krzysztof wrote: > The test is trying to push the heartbeat frequency to the limit, which > might sometimes fail. Such a failure does not provide valuable > information, because it does not indicate that there is something > necessarily wrong with either the driver or the hardware. > > Remove the test to prevent random, unnecessary failures from appearing > in CI. > > Suggested-by: Chris Wilson > Signed-off-by: Niemiec, Krzysztof merged in drm-intel-gt-next. Thank you, Andi
Re: [PATCH] drm/i915/gt: Delete the live_hearbeat_fast selftest
Hi Krzysztof, On Mon, Jun 03, 2024 at 06:20:22PM +0200, Niemiec, Krzysztof wrote: > The test is trying to push the heartbeat frequency to the limit, which > might sometimes fail. Such a failure does not provide valuable > information, because it does not indicate that there is something > necessarily wrong with either the driver or the hardware. > > Remove the test to prevent random, unnecessary failures from appearing > in CI. > > Suggested-by: Chris Wilson > Signed-off-by: Niemiec, Krzysztof Reviewed-by: Andi Shyti Thanks, Andi
[PATCH] drm/i915/gt: Delete the live_hearbeat_fast selftest
The test is trying to push the heartbeat frequency to the limit, which might sometimes fail. Such a failure does not provide valuable information, because it does not indicate that there is something necessarily wrong with either the driver or the hardware. Remove the test to prevent random, unnecessary failures from appearing in CI. Suggested-by: Chris Wilson Signed-off-by: Niemiec, Krzysztof --- .../drm/i915/gt/selftest_engine_heartbeat.c | 110 -- 1 file changed, 110 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index ef014df4c4fc..9e4f0e417b3b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -193,115 +193,6 @@ static int live_idle_pulse(void *arg) return err; } -static int cmp_u32(const void *_a, const void *_b) -{ - const u32 *a = _a, *b = _b; - - return *a - *b; -} - -static int __live_heartbeat_fast(struct intel_engine_cs *engine) -{ - const unsigned int error_threshold = max(2u, jiffies_to_usecs(6)); - struct intel_context *ce; - struct i915_request *rq; - ktime_t t0, t1; - u32 times[5]; - int err; - int i; - - ce = intel_context_create(engine); - if (IS_ERR(ce)) - return PTR_ERR(ce); - - intel_engine_pm_get(engine); - - err = intel_engine_set_heartbeat(engine, 1); - if (err) - goto err_pm; - - for (i = 0; i < ARRAY_SIZE(times); i++) { - do { - /* Manufacture a tick */ - intel_engine_park_heartbeat(engine); - GEM_BUG_ON(engine->heartbeat.systole); - engine->serial++; /* pretend we are not idle! */ - intel_engine_unpark_heartbeat(engine); - - flush_delayed_work(>heartbeat.work); - if (!delayed_work_pending(>heartbeat.work)) { - pr_err("%s: heartbeat %d did not start\n", - engine->name, i); - err = -EINVAL; - goto err_pm; - } - - rcu_read_lock(); - rq = READ_ONCE(engine->heartbeat.systole); - if (rq) - rq = i915_request_get_rcu(rq); - rcu_read_unlock(); - } while (!rq); - - t0 = ktime_get(); - while (rq == READ_ONCE(engine->heartbeat.systole)) - yield(); /* work is on the local cpu! */ - t1 = ktime_get(); - - i915_request_put(rq); - times[i] = ktime_us_delta(t1, t0); - } - - sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL); - - pr_info("%s: Heartbeat delay: %uus [%u, %u]\n", - engine->name, - times[ARRAY_SIZE(times) / 2], - times[0], - times[ARRAY_SIZE(times) - 1]); - - /* -* Ideally, the upper bound on min work delay would be something like -* 2 * 2 (worst), +1 for scheduling, +1 for slack. In practice, we -* are, even with system_wq_highpri, at the mercy of the CPU scheduler -* and may be stuck behind some slow work for many millisecond. Such -* as our very own display workers. -*/ - if (times[ARRAY_SIZE(times) / 2] > error_threshold) { - pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n", - engine->name, - times[ARRAY_SIZE(times) / 2], - error_threshold); - err = -EINVAL; - } - - reset_heartbeat(engine); -err_pm: - intel_engine_pm_put(engine); - intel_context_put(ce); - return err; -} - -static int live_heartbeat_fast(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = 0; - - /* Check that the heartbeat ticks at the desired rate. */ - if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) - return 0; - - for_each_engine(engine, gt, id) { - err = __live_heartbeat_fast(engine); - if (err) - break; - } - - return err; -} - static int __live_heartbeat_off(struct intel_engine_cs *engine) { int err; @@ -372,7 +263,6 @@ int intel_heartbeat_live_selftests(struct drm_i915_private *i915) static const struct i915_subtest tests[] = { SUBTEST(live_idle_flush), SUBTEST(live_idle_pulse), - SUBTEST(live_heartbeat_fast), SUBTEST(live_heartbeat_off), }; int saved_hangcheck; -- 2.34.1