From: Umesh Nerlige Ramappa <umesh.nerlige.rama...@intel.com>

In new version of GuC engine busyness, GuC provides engine busyness
ticks as a 64 bit counter. Add a new counter to relay this value to the
user as is.

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.rama...@intel.com>
Signed-off-by: John Harrison <john.c.harri...@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine.h        |  1 +
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 16 +++++
 drivers/gpu/drm/i915/gt/intel_engine_types.h  | 12 ++++
 drivers/gpu/drm/i915/gt/intel_engine_user.c   |  1 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 67 ++++++++++++++-----
 drivers/gpu/drm/i915/i915_pmu.c               | 25 ++++++-
 drivers/gpu/drm/i915/i915_pmu.h               |  2 +-
 include/uapi/drm/i915_drm.h                   | 13 +++-
 8 files changed, 116 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h 
b/drivers/gpu/drm/i915/gt/intel_engine.h
index b58c30ac8ef02..57af7ec8ecd82 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -249,6 +249,7 @@ void intel_engine_dump_active_requests(struct list_head 
*requests,
 
 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine,
                                   ktime_t *now);
+u64 intel_engine_get_busy_ticks(struct intel_engine_cs *engine);
 
 void intel_engine_get_hung_entity(struct intel_engine_cs *engine,
                                  struct intel_context **ce, struct 
i915_request **rq);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 84a75c95f3f7d..1c9ffb1ae9889 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -2426,6 +2426,22 @@ ktime_t intel_engine_get_busy_time(struct 
intel_engine_cs *engine, ktime_t *now)
        return engine->busyness(engine, now);
 }
 
+/**
+ * intel_engine_get_busy_ticks() - Return current accumulated engine busyness
+ * ticks
+ * @engine: engine to report on
+ *
+ * Returns accumulated ticks @engine was busy since engine stats were enabled.
+ */
+u64 intel_engine_get_busy_ticks(struct intel_engine_cs *engine)
+{
+       if (!engine->busyness_ticks ||
+           !(engine->flags & I915_ENGINE_SUPPORTS_TICKS_STATS))
+               return 0;
+
+       return engine->busyness_ticks(engine);
+}
+
 struct intel_context *
 intel_engine_create_virtual(struct intel_engine_cs **siblings,
                            unsigned int count, unsigned long flags)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 40fd8f984d64b..a88d40c74d604 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -548,6 +548,11 @@ struct intel_engine_cs {
        ktime_t         (*busyness)(struct intel_engine_cs *engine,
                                    ktime_t *now);
 
+       /*
+        * Get engine busyness ticks
+        */
+       u64             (*busyness_ticks)(struct intel_engine_cs *engine);
+
        struct intel_engine_execlists execlists;
 
        /*
@@ -574,6 +579,7 @@ struct intel_engine_cs {
 #define I915_ENGINE_HAS_EU_PRIORITY    BIT(10)
 #define I915_ENGINE_FIRST_RENDER_COMPUTE BIT(11)
 #define I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT BIT(12)
+#define I915_ENGINE_SUPPORTS_TICKS_STATS   BIT(13)
        unsigned int flags;
 
        /*
@@ -649,6 +655,12 @@ intel_engine_supports_stats(const struct intel_engine_cs 
*engine)
        return engine->flags & I915_ENGINE_SUPPORTS_STATS;
 }
 
+static inline bool
+intel_engine_supports_tick_stats(const struct intel_engine_cs *engine)
+{
+       return engine->flags & I915_ENGINE_SUPPORTS_TICKS_STATS;
+}
+
 static inline bool
 intel_engine_has_preemption(const struct intel_engine_cs *engine)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c 
b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index dcedff41a825f..69eb610b5ab0a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -100,6 +100,7 @@ static void set_scheduler_caps(struct drm_i915_private 
*i915)
                MAP(HAS_PREEMPTION, PREEMPTION),
                MAP(HAS_SEMAPHORES, SEMAPHORES),
                MAP(SUPPORTS_STATS, ENGINE_BUSY_STATS),
+               MAP(SUPPORTS_TICKS_STATS, ENGINE_BUSY_TICKS_STATS),
 #undef MAP
        };
        struct intel_engine_cs *engine;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 0c1fee5360777..71749fb9ad35b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1289,12 +1289,7 @@ static void busy_v1_guc_update_pm_timestamp(struct 
intel_guc *guc, ktime_t *now)
        guc->busy.v1.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo;
 }
 
-/*
- * Unlike the execlist mode of submission total and active times are in terms 
of
- * gt clocks. The *now parameter is retained to return the cpu time at which 
the
- * busyness was sampled.
- */
-static ktime_t busy_v1_guc_engine_busyness(struct intel_engine_cs *engine, 
ktime_t *now)
+static u64 __busy_v1_guc_engine_busyness_ticks(struct intel_engine_cs *engine, 
ktime_t *now_out)
 {
        struct intel_engine_guc_stats_v1 stats_saved, *stats = 
&engine->stats.guc_v1;
        struct i915_gpu_error *gpu_error = &engine->i915->gpu_error;
@@ -1304,6 +1299,7 @@ static ktime_t busy_v1_guc_engine_busyness(struct 
intel_engine_cs *engine, ktime
        unsigned long flags;
        u32 reset_count;
        bool in_reset;
+       ktime_t now;
 
        spin_lock_irqsave(&guc->busy.v1.lock, flags);
 
@@ -1318,7 +1314,7 @@ static ktime_t busy_v1_guc_engine_busyness(struct 
intel_engine_cs *engine, ktime
        reset_count = i915_reset_count(gpu_error);
        in_reset = test_bit(I915_RESET_BACKOFF, &gt->reset.flags);
 
-       *now = ktime_get();
+       now = ktime_get();
 
        /*
         * The active busyness depends on start_gt_clk and gt_stamp.
@@ -1334,7 +1330,7 @@ static ktime_t busy_v1_guc_engine_busyness(struct 
intel_engine_cs *engine, ktime
                 * start_gt_clk' calculation below for active engines.
                 */
                busy_v1_guc_update_engine_gt_clks(engine);
-               busy_v1_guc_update_pm_timestamp(guc, now);
+               busy_v1_guc_update_pm_timestamp(guc, &now);
                intel_gt_pm_put_async(gt);
                if (i915_reset_count(gpu_error) != reset_count) {
                        *stats = stats_saved;
@@ -1342,16 +1338,37 @@ static ktime_t busy_v1_guc_engine_busyness(struct 
intel_engine_cs *engine, ktime
                }
        }
 
-       total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks);
+       total = stats->total_gt_clks;
        if (stats->running) {
                u64 clk = guc->busy.v1.gt_stamp - stats->start_gt_clk;
 
-               total += intel_gt_clock_interval_to_ns(gt, clk);
+               total += clk;
        }
 
        spin_unlock_irqrestore(&guc->busy.v1.lock, flags);
 
-       return ns_to_ktime(total);
+       if (now_out)
+               *now_out = now;
+
+       return total;
+}
+
+/*
+ * Unlike the execlist mode of submission total and active times are in terms 
of
+ * gt clocks. The *now parameter is retained to return the cpu time at which 
the
+ * busyness was sampled.
+ */
+static ktime_t busy_v1_guc_engine_busyness(struct intel_engine_cs *engine, 
ktime_t *now)
+{
+       u64 ticks = __busy_v1_guc_engine_busyness_ticks(engine, now);
+       u64 ns = intel_gt_clock_interval_to_ns(engine->gt, ticks);
+
+       return ns_to_ktime(ns);
+}
+
+static u64 busy_v1_guc_engine_busyness_ticks(struct intel_engine_cs *engine)
+{
+       return __busy_v1_guc_engine_busyness_ticks(engine, NULL);
 }
 
 static void busy_v1_guc_enable_worker(struct intel_guc *guc)
@@ -1607,6 +1624,16 @@ static ktime_t busy_v2_guc_engine_busyness(struct 
intel_engine_cs *engine, ktime
        return ns_to_ktime(total);
 }
 
+static u64 busy_v2_guc_engine_busyness_ticks(struct intel_engine_cs *engine)
+{
+       struct intel_guc *guc = &engine->gt->uc.guc;
+       u64 ticks_engine;
+
+       __busy_v2_get_engine_usage_record(guc, engine, &ticks_engine, NULL, 
NULL);
+
+       return ticks_engine;
+}
+
 static u64 busy_v1_intel_guc_total_active_ticks(struct intel_guc *guc)
 {
        return guc->busy.v1.gt_stamp;
@@ -4480,12 +4507,20 @@ static void guc_default_vfuncs(struct intel_engine_cs 
*engine)
                engine->emit_flush = gen12_emit_flush_xcs;
        }
        engine->set_default_submission = guc_set_default_submission;
-       if (GUC_SUBMIT_VER(&engine->gt->uc.guc) < MAKE_GUC_VER(1, 3, 1))
-               engine->busyness = busy_v1_guc_engine_busyness;
-       else
-               engine->busyness = busy_v2_guc_engine_busyness;
+       if (GUC_SUBMIT_VER(&engine->gt->uc.guc) < MAKE_GUC_VER(1, 3, 1)) {
+               if (GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))
+                       engine->busyness = busy_v1_guc_engine_busyness;
+               engine->busyness_ticks = busy_v1_guc_engine_busyness_ticks;
+       } else {
+               if (GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))
+                       engine->busyness = busy_v2_guc_engine_busyness;
+               engine->busyness_ticks = busy_v2_guc_engine_busyness_ticks;
+       }
+
+       if (engine->busyness)
+               engine->flags |= I915_ENGINE_SUPPORTS_STATS;
 
-       engine->flags |= I915_ENGINE_SUPPORTS_STATS;
+       engine->flags |= I915_ENGINE_SUPPORTS_TICKS_STATS;
        engine->flags |= I915_ENGINE_HAS_PREEMPTION;
        engine->flags |= I915_ENGINE_HAS_TIMESLICES;
 
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 4f52636eb4a80..1b859556644f6 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -26,7 +26,8 @@
 #define ENGINE_SAMPLE_MASK \
        (BIT(I915_SAMPLE_BUSY) | \
         BIT(I915_SAMPLE_WAIT) | \
-        BIT(I915_SAMPLE_SEMA))
+        BIT(I915_SAMPLE_SEMA) | \
+        BIT(I915_SAMPLE_BUSY_TICKS))
 
 static cpumask_t i915_pmu_cpumask;
 static unsigned int i915_pmu_target_cpu = -1;
@@ -161,8 +162,11 @@ static bool pmu_needs_timer(struct i915_pmu *pmu)
         * Also there is software busyness tracking available we do not
         * need the timer for I915_SAMPLE_BUSY counter.
         */
-       if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
+       if ((i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS) ||
+           (i915->caps.scheduler & 
I915_SCHEDULER_CAP_ENGINE_BUSY_TICKS_STATS)) {
                enable &= ~BIT(I915_SAMPLE_BUSY);
+               enable &= ~BIT(I915_SAMPLE_BUSY_TICKS);
+       }
 
        /*
         * If some bits remain it means we need the sampling timer running.
@@ -362,7 +366,8 @@ static void engine_sample(struct intel_engine_cs *engine, 
unsigned int period_ns
                add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
 
        /* No need to sample when busy stats are supported. */
-       if (intel_engine_supports_stats(engine))
+       if (intel_engine_supports_stats(engine) ||
+           intel_engine_supports_tick_stats(engine))
                return;
 
        /*
@@ -519,6 +524,13 @@ engine_event_status(struct intel_engine_cs *engine,
 {
        switch (sample) {
        case I915_SAMPLE_BUSY:
+               if (!intel_engine_supports_stats(engine))
+                       return -ENODEV;
+               break;
+       case I915_SAMPLE_BUSY_TICKS:
+               if (!intel_engine_supports_tick_stats(engine))
+                       return -ENODEV;
+               break;
        case I915_SAMPLE_WAIT:
                break;
        case I915_SAMPLE_SEMA:
@@ -651,6 +663,9 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
 
                        val = ktime_to_ns(intel_engine_get_busy_time(engine,
                                                                     &unused));
+               } else if (sample == I915_SAMPLE_BUSY_TICKS &&
+                          intel_engine_supports_tick_stats(engine)) {
+                       val = intel_engine_get_busy_ticks(engine);
                } else {
                        val = engine->pmu.sample[sample].cur;
                }
@@ -1000,6 +1015,7 @@ create_event_attributes(struct i915_pmu *pmu)
                __engine_event(I915_SAMPLE_BUSY, "busy"),
                __engine_event(I915_SAMPLE_SEMA, "sema"),
                __engine_event(I915_SAMPLE_WAIT, "wait"),
+               __engine_event(I915_SAMPLE_BUSY_TICKS, "busy-ticks"),
        };
        unsigned int count = 0;
        struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
@@ -1103,6 +1119,9 @@ create_event_attributes(struct i915_pmu *pmu)
                                                                
engine->uabi_instance,
                                                                
engine_events[i].sample));
 
+                       if (engine_events[i].sample == I915_SAMPLE_BUSY_TICKS)
+                               continue;
+
                        str = kasprintf(GFP_KERNEL, "%s-%s.unit",
                                        engine->name, engine_events[i].name);
                        if (!str)
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 41af038c37388..72a9c71944f8d 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -49,7 +49,7 @@ enum {
        (I915_ENGINE_SAMPLE_COUNT + \
         I915_PMU_MAX_GT * __I915_PMU_TRACKED_EVENT_COUNT)
 
-#define I915_ENGINE_SAMPLE_COUNT (I915_SAMPLE_SEMA + 1)
+#define I915_ENGINE_SAMPLE_COUNT (I915_SAMPLE_BUSY_TICKS + 1)
 
 struct i915_pmu_sample {
        u64 cur;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index e26dd27ff4a5f..8ae98c1bda0ea 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -257,7 +257,8 @@ struct i915_engine_class_instance {
 enum drm_i915_pmu_engine_sample {
        I915_SAMPLE_BUSY = 0,
        I915_SAMPLE_WAIT = 1,
-       I915_SAMPLE_SEMA = 2
+       I915_SAMPLE_SEMA = 2,
+       I915_SAMPLE_BUSY_TICKS = 3,
 };
 
 #define I915_PMU_SAMPLE_BITS (4)
@@ -274,6 +275,9 @@ enum drm_i915_pmu_engine_sample {
 #define I915_PMU_ENGINE_BUSY(class, instance) \
        __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
 
+#define I915_PMU_ENGINE_BUSY_TICKS(class, instance) \
+       __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY_TICKS)
+
 #define I915_PMU_ENGINE_WAIT(class, instance) \
        __I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
 
@@ -651,7 +655,14 @@ typedef struct drm_i915_irq_wait {
 #define   I915_SCHEDULER_CAP_PRIORITY  (1ul << 1)
 #define   I915_SCHEDULER_CAP_PREEMPTION        (1ul << 2)
 #define   I915_SCHEDULER_CAP_SEMAPHORES        (1ul << 3)
+/*
+ * BUSY_STATS is deprecated on platforms with GuC based submission and will nt
+ * be available at all on newer platforms. It has accuracy issues due to the
+ * conversions from tick counts to wall time.
+ * BUSY_TICKS_STATS should be used instead.
+ */
 #define   I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4)
+#define   I915_SCHEDULER_CAP_ENGINE_BUSY_TICKS_STATS   (1ul << 5)
 /*
  * Indicates the 2k user priority levels are statically mapped into 3 buckets 
as
  * follows:
-- 
2.41.0

Reply via email to