On 19/11/2019 00:24, Umesh Nerlige Ramappa wrote:
Gen12 supports saving/restoring render counters per context. Apply OAR
configuration only for the context that is passed in to perf.

v2:
- Fix OACTXCONTROL value to only stop/resume counters.
- Remove gen12_update_reg_state_unlocked as power state is already
   applied by the caller.

v3: (Lionel)
- Move register initialization into the array
- Assume a valid oa_config in enable_metric_set

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.rama...@intel.com>

Looks all good, thanks!


Reviewed-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>

---
  drivers/gpu/drm/i915/i915_perf.c | 199 +++++++++++++++++--------------
  1 file changed, 112 insertions(+), 87 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index ca58502a67d8..ea911961fd8c 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2080,20 +2080,12 @@ gen8_update_reg_state_unlocked(const struct 
intel_context *ce,
        u32 *reg_state = ce->lrc_reg_state;
        int i;
- if (IS_GEN(stream->perf->i915, 12)) {
-               u32 format = stream->oa_buffer.format;
+       reg_state[ctx_oactxctrl + 1] =
+               (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
+               (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
+               GEN8_OA_COUNTER_RESUME;
- reg_state[ctx_oactxctrl + 1] =
-                       (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
-                       (stream->oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE 
: 0);
-       } else {
-               reg_state[ctx_oactxctrl + 1] =
-                       (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) 
|
-                       (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
-                       GEN8_OA_COUNTER_RESUME;
-       }
-
-       for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++)
+       for (i = 0; i < ARRAY_SIZE(flex_regs); i++)
                reg_state[ctx_flexeu0 + i * 2 + 1] =
                        oa_config_flex_reg(stream->oa_config, flex_regs[i]);
@@ -2226,34 +2218,51 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
        return err;
  }
-static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
+static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool 
enable)
  {
-       struct i915_request *rq;
-       u32 *cs;
-       int err = 0;
-
-       rq = i915_request_create(ce);
-       if (IS_ERR(rq))
-               return PTR_ERR(rq);
-
-       cs = intel_ring_begin(rq, 4);
-       if (IS_ERR(cs)) {
-               err = PTR_ERR(cs);
-               goto out;
-       }
-
-       *cs++ = MI_LOAD_REGISTER_IMM(1);
-       *cs++ = 
i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
-       *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
-                             enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
-       *cs++ = MI_NOOP;
+       int err;
+       struct intel_context *ce = stream->pinned_ctx;
+       u32 format = stream->oa_buffer.format;
+       struct flex regs_context[] = {
+               {
+                       GEN8_OACTXCONTROL,
+                       stream->perf->ctx_oactxctrl_offset + 1,
+                       enable ? GEN8_OA_COUNTER_RESUME : 0,
+               },
+       };
+       /* Offsets in regs_lri are not used since this configuration is only
+        * applied using LRI. Initialize the correct offsets for posterity.
+        */
+#define GEN12_OAR_OACONTROL_OFFSET 0x5B0
+       struct flex regs_lri[] = {
+               {
+                       GEN12_OAR_OACONTROL,
+                       GEN12_OAR_OACONTROL_OFFSET + 1,
+                       (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
+                       (enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0)
+               },
+               {
+                       RING_CONTEXT_CONTROL(ce->engine->mmio_base),
+                       CTX_CONTEXT_CONTROL,
+                       _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
+                                     enable ?
+                                     GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
+                                     0)
+               },
+       };
- intel_ring_advance(rq, cs);
+       /* Modify the context image of pinned context with regs_context*/
+       err = intel_context_lock_pinned(ce);
+       if (err)
+               return err;
-out:
-       i915_request_add(rq);
+       err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
+       intel_context_unlock_pinned(ce);
+       if (err)
+               return err;
- return err;
+       /* Apply regs_lri using LRI with pinned context */
+       return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri));
  }
/*
@@ -2279,53 +2288,16 @@ static int gen12_emit_oar_config(struct intel_context 
*ce, bool enable)
   *   per-context OA state.
   *
   * Note: it's only the RCS/Render context that has any OA state.
+ * Note: the first flex register passed must always be R_PWR_CLK_STATE
   */
-static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
-                                     const struct i915_oa_config *oa_config)
+static int oa_configure_all_contexts(struct i915_perf_stream *stream,
+                                    struct flex *regs,
+                                    size_t num_regs)
  {
        struct drm_i915_private *i915 = stream->perf->i915;
-       /* The MMIO offsets for Flex EU registers aren't contiguous */
-       const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
-#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
-       struct flex regs[] = {
-               {
-                       GEN8_R_PWR_CLK_STATE,
-                       CTX_R_PWR_CLK_STATE,
-               },
-               {
-                       IS_GEN(i915, 12) ?
-                       GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL,
-                       stream->perf->ctx_oactxctrl_offset + 1,
-               },
-               { EU_PERF_CNTL0, ctx_flexeuN(0) },
-               { EU_PERF_CNTL1, ctx_flexeuN(1) },
-               { EU_PERF_CNTL2, ctx_flexeuN(2) },
-               { EU_PERF_CNTL3, ctx_flexeuN(3) },
-               { EU_PERF_CNTL4, ctx_flexeuN(4) },
-               { EU_PERF_CNTL5, ctx_flexeuN(5) },
-               { EU_PERF_CNTL6, ctx_flexeuN(6) },
-       };
-#undef ctx_flexeuN
        struct intel_engine_cs *engine;
        struct i915_gem_context *ctx, *cn;
-       size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs);
-       int i, err;
-
-       if (IS_GEN(i915, 12)) {
-               u32 format = stream->oa_buffer.format;
-
-               regs[1].value =
-                       (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
-                       (oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
-       } else {
-               regs[1].value =
-                       (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) 
|
-                       (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
-                       GEN8_OA_COUNTER_RESUME;
-       }
-
-       for (i = 2; !!ctx_flexeu0 && i < array_size; i++)
-               regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
+       int err;
lockdep_assert_held(&stream->perf->lock); @@ -2355,7 +2327,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, spin_unlock(&i915->gem.contexts.lock); - err = gen8_configure_context(ctx, regs, array_size);
+               err = gen8_configure_context(ctx, regs, num_regs);
                if (err) {
                        i915_gem_context_put(ctx);
                        return err;
@@ -2380,7 +2352,7 @@ static int lrc_configure_all_contexts(struct 
i915_perf_stream *stream,
regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu); - err = gen8_modify_self(ce, regs, array_size);
+               err = gen8_modify_self(ce, regs, num_regs);
                if (err)
                        return err;
        }
@@ -2388,6 +2360,56 @@ static int lrc_configure_all_contexts(struct 
i915_perf_stream *stream,
        return 0;
  }
+static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
+                                       const struct i915_oa_config *oa_config)
+{
+       struct flex regs[] = {
+               {
+                       GEN8_R_PWR_CLK_STATE,
+                       CTX_R_PWR_CLK_STATE,
+               },
+       };




+
+       return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
+}
+
+static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
+                                     const struct i915_oa_config *oa_config)
+{
+       /* The MMIO offsets for Flex EU registers aren't contiguous */
+       const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
+#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
+       struct flex regs[] = {
+               {
+                       GEN8_R_PWR_CLK_STATE,
+                       CTX_R_PWR_CLK_STATE,
+               },
+               {
+                       GEN8_OACTXCONTROL,
+                       stream->perf->ctx_oactxctrl_offset + 1,
+               },
+               { EU_PERF_CNTL0, ctx_flexeuN(0) },
+               { EU_PERF_CNTL1, ctx_flexeuN(1) },
+               { EU_PERF_CNTL2, ctx_flexeuN(2) },
+               { EU_PERF_CNTL3, ctx_flexeuN(3) },
+               { EU_PERF_CNTL4, ctx_flexeuN(4) },
+               { EU_PERF_CNTL5, ctx_flexeuN(5) },
+               { EU_PERF_CNTL6, ctx_flexeuN(6) },
+       };
+#undef ctx_flexeuN
+       int i;
+
+       regs[1].value =
+               (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
+               (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
+               GEN8_OA_COUNTER_RESUME;
+
+       for (i = 2; i < ARRAY_SIZE(regs); i++)
+               regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
+
+       return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
+}
+
  static int gen8_enable_metric_set(struct i915_perf_stream *stream)
  {
        struct intel_uncore *uncore = stream->uncore;
@@ -2471,7 +2493,7 @@ static int gen12_enable_metric_set(struct 
i915_perf_stream *stream)
         * to make sure all slices/subslices are ON before writing to NOA
         * registers.
         */
-       ret = lrc_configure_all_contexts(stream, oa_config);
+       ret = gen12_configure_all_contexts(stream, oa_config);
        if (ret)
                return ret;
@@ -2481,8 +2503,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
         * requested this.
         */
        if (stream->ctx) {
-               ret = gen12_emit_oar_config(stream->pinned_ctx,
-                                           oa_config != NULL);
+               ret = gen12_configure_oar_context(stream, true);
                if (ret)
                        return ret;
        }
@@ -2516,11 +2537,11 @@ static void gen12_disable_metric_set(struct 
i915_perf_stream *stream)
        struct intel_uncore *uncore = stream->uncore;
/* Reset all contexts' slices/subslices configurations. */
-       lrc_configure_all_contexts(stream, NULL);
+       gen12_configure_all_contexts(stream, NULL);
/* disable the context save/restore or OAR counters */
        if (stream->ctx)
-               gen12_emit_oar_config(stream->pinned_ctx, false);
+               gen12_configure_oar_context(stream, false);
/* Make sure we disable noa to save power. */
        intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
@@ -2862,7 +2883,11 @@ void i915_oa_init_reg_state(const struct intel_context 
*ce,
                return;
stream = engine->i915->perf.exclusive_stream;
-       if (stream)
+       /*
+        * For gen12, only CTX_R_PWR_CLK_STATE needs update, but the caller
+        * is already doing that, so nothing to be done for gen12 here.
+        */
+       if (stream && INTEL_GEN(stream->perf->i915) < 12)
                gen8_update_reg_state_unlocked(ce, stream);
  }


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to