We are still investigating the detailed requirements here, but there are
some constraints we need to apply on unit level clock gating for
reliable metrics (in particular for a reliable sampling period).

Signed-off-by: Robert Bragg <rob...@sixbynine.org>
---
 drivers/gpu/drm/i915/i915_oa_perf.c | 70 +++++++++++++++++++++++++++++++------
 drivers/gpu/drm/i915/i915_reg.h     |  3 ++
 2 files changed, 63 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c 
b/drivers/gpu/drm/i915/i915_oa_perf.c
index d0dad5d..2a4121b 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -257,20 +257,46 @@ oa_buffer_destroy(struct drm_i915_private *i915)
 
 static void i915_oa_event_destroy(struct perf_event *event)
 {
-       struct drm_i915_private *i915 =
-               container_of(event->pmu, typeof(*i915), oa_pmu.pmu);
+       struct drm_i915_private *dev_priv =
+               container_of(event->pmu, typeof(*dev_priv), oa_pmu.pmu);
 
        WARN_ON(event->parent);
 
-       oa_buffer_destroy(i915);
+       I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) &
+                                 ~GEN6_RCZUNIT_CLOCK_GATE_DISABLE));
+       //I915_WRITE(GEN6_UCGCTL3, (I915_READ(GEN6_UCGCTL3) &
+       //                        ~GEN6_OACSUNIT_CLOCK_GATE_DISABLE));
+       I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) |
+                                   GEN7_DOP_CLOCK_GATE_ENABLE));
+
+       I915_WRITE(GEN7_ROW_CHICKEN2,
+                  _MASKED_BIT_DISABLE(DOP_CLOCK_GATING_DISABLE));
+
+       //if (IS_HSW_GT2(dev_priv->dev)) {
+       if (1) {
+               I915_WRITE(HSW_ROW_CHICKEN2_GT2,
+                          _MASKED_BIT_DISABLE(DOP_CLOCK_GATING_DISABLE));
+       }
+
+       if (IS_HSW_GT3(dev_priv->dev)) {
+               I915_WRITE(HSW_ROW_CHICKEN2_GT3_0,
+                          _MASKED_BIT_DISABLE(DOP_CLOCK_GATING_DISABLE));
+               I915_WRITE(HSW_ROW_CHICKEN2_GT3_1,
+                          _MASKED_BIT_DISABLE(DOP_CLOCK_GATING_DISABLE));
+       }
 
-       i915->oa_pmu.specific_ctx = NULL;
+       I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
+                                     ~GT_NOA_ENABLE));
+
+       oa_buffer_destroy(dev_priv);
+
+       dev_priv->oa_pmu.specific_ctx = NULL;
 
-       BUG_ON(i915->oa_pmu.exclusive_event != event);
-       i915->oa_pmu.exclusive_event = NULL;
+       BUG_ON(dev_priv->oa_pmu.exclusive_event != event);
+       dev_priv->oa_pmu.exclusive_event = NULL;
 
-       intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
-       intel_runtime_pm_put(i915);
+       intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+       intel_runtime_pm_put(dev_priv);
 }
 
 static void *vmap_oa_buffer(struct drm_i915_gem_object *obj)
@@ -581,6 +607,32 @@ static int i915_oa_event_init(struct perf_event *event)
        BUG_ON(dev_priv->oa_pmu.exclusive_event);
        dev_priv->oa_pmu.exclusive_event = event;
 
+
+       I915_WRITE(GDT_CHICKEN_BITS, GT_NOA_ENABLE);
+
+       I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) |
+                                 GEN6_RCZUNIT_CLOCK_GATE_DISABLE));
+       //I915_WRITE(GEN6_UCGCTL3, (I915_READ(GEN6_UCGCTL3) |
+       //                        GEN6_OACSUNIT_CLOCK_GATE_DISABLE));
+       I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
+                                   ~GEN7_DOP_CLOCK_GATE_ENABLE));
+
+       I915_WRITE(GEN7_ROW_CHICKEN2,
+                  _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+
+       //if (IS_HSW_GT2(dev_priv->dev)) {
+       if (1) {
+               I915_WRITE(HSW_ROW_CHICKEN2_GT2,
+                          _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+       }
+
+       if (IS_HSW_GT3(dev_priv->dev)) {
+               I915_WRITE(HSW_ROW_CHICKEN2_GT3_0,
+                          _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+               I915_WRITE(HSW_ROW_CHICKEN2_GT3_1,
+                          _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+       }
+
        event->destroy = i915_oa_event_destroy;
 
        /* PRM - observability performance counters:
@@ -678,8 +730,6 @@ static void i915_oa_event_start(struct perf_event *event, 
int flags)
        WARN_ONCE(I915_READ(GEN6_UCGCTL3) & GEN6_OACSUNIT_CLOCK_GATE_DISABLE,
                  "disabled OA unit level clock gating will result in incorrect 
per-context OA counters");
 
-       I915_WRITE(GDT_CHICKEN_BITS, GT_NOA_ENABLE);
-
        if (dev_priv->oa_pmu.metrics_set == I915_OA_METRICS_SET_3D) {
                config_oa_regs(dev_priv, hsw_profile_3d_mux_config,
                               ARRAY_SIZE(hsw_profile_3d_mux_config));
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index d94932a..518b34c 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7036,6 +7036,9 @@ enum skl_disp_power_wells {
 
 #define GEN7_ROW_CHICKEN2              0xe4f4
 #define GEN7_ROW_CHICKEN2_GT2          0xf4f4
+#define HSW_ROW_CHICKEN2_GT2           0xe5f4
+#define HSW_ROW_CHICKEN2_GT3_0         0xe6f4
+#define HSW_ROW_CHICKEN2_GT3_1         0xe7f4
 #define   DOP_CLOCK_GATING_DISABLE     (1<<0)
 
 #define HSW_ROW_CHICKEN3               0xe49c
-- 
2.3.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to