On 25/01/2022 05:47, Jani Nikula wrote:
On Mon, 24 Jan 2022, Umesh Nerlige Ramappa <umesh.nerlige.rama...@intel.com> 
wrote:
All timestamps returned by GuC for GuC PMU busyness are captured from
GUC PM TIMESTAMP. Since this timestamp does not tick when GuC goes idle,
kmd uses RING_TIMESTAMP to measure busyness of an engine with an active
context. In further stress testing, the MMIO read of the RING_TIMESTAMP
is seen to cause a rare hang. Resolve the issue by using gt specific
timestamp from PM which is in sync with the GuC PM timestamp.

Fixes: 77cdd054dd2c ("drm/i915/pmu: Connect engine busyness stats from GuC to 
pmu")
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.rama...@intel.com>
Reviewed-by: Alan Previn <alan.previn.teres.ale...@intel.com>
---
  drivers/gpu/drm/i915/gt/uc/intel_guc.h        |  5 ++
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 56 ++++++++++++++-----
  drivers/gpu/drm/i915/i915_reg.h               |  3 +-
  3 files changed, 50 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index d59bbf49d1c2..697d9d66acef 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -215,6 +215,11 @@ struct intel_guc {
                 * context usage for overflows.
                 */
                struct delayed_work work;
+
+               /**
+                * @shift: Right shift value for the gpm timestamp
+                */
+               u32 shift;
        } timestamp;
#ifdef CONFIG_DRM_I915_SELFTEST
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 1331ff91c5b0..66760f5df0c1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1150,23 +1150,51 @@ static void guc_update_engine_gt_clks(struct 
intel_engine_cs *engine)
        }
  }
-static void guc_update_pm_timestamp(struct intel_guc *guc,
-                                   struct intel_engine_cs *engine,
-                                   ktime_t *now)
+static u32 gpm_timestamp_shift(struct intel_gt *gt)
  {
-       u32 gt_stamp_now, gt_stamp_hi;
+       intel_wakeref_t wakeref;
+       u32 reg, shift;
+
+       with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+               reg = intel_uncore_read(gt->uncore, RPM_CONFIG0);
+
+       shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
+               GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT;
+
+       return 3 - shift;
+}
+
+static u64 gpm_timestamp(struct intel_gt *gt)
+{
+       u32 lo, hi, old_hi, loop = 0;
+
+       hi = intel_uncore_read(gt->uncore, MISC_STATUS1);
+       do {
+               lo = intel_uncore_read(gt->uncore, MISC_STATUS0);
+               old_hi = hi;
+               hi = intel_uncore_read(gt->uncore, MISC_STATUS1);
+       } while (old_hi != hi && loop++ < 2);
+
+       return ((u64)hi << 32) | lo;
+}

See intel_uncore_read64_2x32().

This was a good suggestion - since the patch was merged regardless I think a follow up cleanup is in order so we don't accumulate duplicated code.

Regards,

Tvrtko


+
+static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
+{
+       struct intel_gt *gt = guc_to_gt(guc);
+       u32 gt_stamp_lo, gt_stamp_hi;
+       u64 gpm_ts;
lockdep_assert_held(&guc->timestamp.lock); gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
-       gt_stamp_now = intel_uncore_read(engine->uncore,
-                                        RING_TIMESTAMP(engine->mmio_base));
+       gpm_ts = gpm_timestamp(gt) >> guc->timestamp.shift;
+       gt_stamp_lo = lower_32_bits(gpm_ts);
        *now = ktime_get();
- if (gt_stamp_now < lower_32_bits(guc->timestamp.gt_stamp))
+       if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp))
                gt_stamp_hi++;
- guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_now;
+       guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo;
  }
/*
@@ -1210,7 +1238,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs 
*engine, ktime_t *now)
                stats_saved = *stats;
                gt_stamp_saved = guc->timestamp.gt_stamp;
                guc_update_engine_gt_clks(engine);
-               guc_update_pm_timestamp(guc, engine, now);
+               guc_update_pm_timestamp(guc, now);
                intel_gt_pm_put_async(gt);
                if (i915_reset_count(gpu_error) != reset_count) {
                        *stats = stats_saved;
@@ -1242,8 +1270,8 @@ static void __reset_guc_busyness_stats(struct intel_guc 
*guc)
spin_lock_irqsave(&guc->timestamp.lock, flags); + guc_update_pm_timestamp(guc, &unused);
        for_each_engine(engine, gt, id) {
-               guc_update_pm_timestamp(guc, engine, &unused);
                guc_update_engine_gt_clks(engine);
                engine->stats.guc.prev_total = 0;
        }
@@ -1260,10 +1288,11 @@ static void __update_guc_busyness_stats(struct 
intel_guc *guc)
        ktime_t unused;
spin_lock_irqsave(&guc->timestamp.lock, flags);
-       for_each_engine(engine, gt, id) {
-               guc_update_pm_timestamp(guc, engine, &unused);
+
+       guc_update_pm_timestamp(guc, &unused);
+       for_each_engine(engine, gt, id)
                guc_update_engine_gt_clks(engine);
-       }
+
        spin_unlock_irqrestore(&guc->timestamp.lock, flags);
  }
@@ -1757,6 +1786,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
        spin_lock_init(&guc->timestamp.lock);
        INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
        guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) 
* HZ;
+       guc->timestamp.shift = gpm_timestamp_shift(gt);
return 0;
  }
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 23b7c6930453..655517eb90ab 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1848,7 +1848,8 @@
  #define BLT_HWS_PGA_GEN7      _MMIO(0x04280)
  #define VEBOX_HWS_PGA_GEN7    _MMIO(0x04380)
-#define GUCPMTIMESTAMP _MMIO(0xC3E8)
+#define MISC_STATUS0           _MMIO(0xA500)
+#define MISC_STATUS1           _MMIO(0xA504)
#define GEN7_TLB_RD_ADDR _MMIO(0x4700)

Reply via email to