The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

v2: Added needs_preempt_context() change so that it is not created when
    preempt-to-idle is supported. (Chris)
    Updated setting HWACK flag so that it is cleared after
    preempt-to-dle. (Chris, Daniele)
    Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)

v3: Fixed needs_preempt_context() change. (Chris)
    Merged preemption trigger functions to one. (Chris)
    Fixed conyext state tonot assume COMPLETED_MASK after preemption,
    since idle-to-idle case will not have it set.

v4: Simplified needs_preempt_context() change. (Daniele)
    Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)

Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com>
Cc: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospu...@intel.com>
Cc: Michal Winiarski <michal.winiar...@intel.com>
Cc: Mika Kuoppala <mika.kuopp...@intel.com>
Bspec: 18922
Signed-off-by: Tomasz Lis <tomasz....@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |   2 +
 drivers/gpu/drm/i915/i915_gem_context.c  |   3 +-
 drivers/gpu/drm/i915/i915_pci.c          |   3 +-
 drivers/gpu/drm/i915/intel_device_info.h |   1 +
 drivers/gpu/drm/i915/intel_lrc.c         | 113 +++++++++++++++++++++----------
 drivers/gpu/drm/i915/intel_lrc.h         |   1 +
 6 files changed, 86 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 487922f..35eddf7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2534,6 +2534,8 @@ intel_info(const struct drm_i915_private *dev_priv)
                ((dev_priv)->info.has_logical_ring_elsq)
 #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
                ((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+               ((dev_priv)->info.has_hw_preempt_to_idle)
 
 #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 45393f6..341a5ff 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -455,7 +455,8 @@ destroy_kernel_context(struct i915_gem_context **ctxp)
 
 static bool needs_preempt_context(struct drm_i915_private *i915)
 {
-       return HAS_LOGICAL_RING_PREEMPTION(i915);
+       return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+              !HAS_HW_PREEMPT_TO_IDLE(i915);
 }
 
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 97a91e6a..ee09926 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -593,7 +593,8 @@ static const struct intel_device_info intel_cannonlake_info 
= {
        GEN(11), \
        .ddb_size = 2048, \
        .has_csr = 0, \
-       .has_logical_ring_elsq = 1
+       .has_logical_ring_elsq = 1, \
+       .has_hw_preempt_to_idle = 1
 
 static const struct intel_device_info intel_icelake_11_info = {
        GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
        func(has_logical_ring_contexts); \
        func(has_logical_ring_elsq); \
        func(has_logical_ring_preemption); \
+       func(has_hw_preempt_to_idle); \
        func(has_overlay); \
        func(has_pooled_eu); \
        func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8a6058b..f95cb37 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -154,6 +154,7 @@
 #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
 #define GEN8_CTX_STATUS_COMPLETE       (1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
 
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
         (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -522,31 +523,46 @@ static void port_assign(struct execlist_port *port, 
struct i915_request *rq)
 static void inject_preempt_context(struct intel_engine_cs *engine)
 {
        struct intel_engine_execlists *execlists = &engine->execlists;
-       struct intel_context *ce =
-               to_intel_context(engine->i915->preempt_context, engine);
-       unsigned int n;
-
-       GEM_BUG_ON(execlists->preempt_complete_status !=
-                  upper_32_bits(ce->lrc_desc));
-       GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
-                   _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-                                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
-                  _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-                                     CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
 
-       /*
-        * Switch to our empty preempt context so
-        * the state of the GPU is known (idle).
-        */
        GEM_TRACE("%s\n", engine->name);
-       for (n = execlists_num_ports(execlists); --n; )
-               write_desc(execlists, 0, n);
+       if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+               /*
+                * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+                * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+                */
+               GEM_BUG_ON(execlists->ctrl_reg == NULL);
 
-       write_desc(execlists, ce->lrc_desc, n);
+               /*
+                * If we have hardware preempt-to-idle, we do not need to
+                * inject any job to the hardware. We only set a flag.
+                */
+               writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+       } else {
+               struct intel_context *ce =
+                       to_intel_context(engine->i915->preempt_context, engine);
+               unsigned int n;
 
-       /* we need to manually load the submit queue */
-       if (execlists->ctrl_reg)
-               writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+               GEM_BUG_ON(execlists->preempt_complete_status !=
+                          upper_32_bits(ce->lrc_desc));
+               GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+                           
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+                                              
CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+                          
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+                                             
CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+
+               /*
+                * Switch to our empty preempt context so
+                * the state of the GPU is known (idle).
+                */
+               for (n = execlists_num_ports(execlists); --n; )
+                       write_desc(execlists, 0, n);
+
+               write_desc(execlists, ce->lrc_desc, n);
+
+               /* we need to manually load the submit queue */
+               if (execlists->ctrl_reg)
+                       writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+       }
 
        execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
        execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
@@ -1031,22 +1047,48 @@ static void process_csb(struct intel_engine_cs *engine)
                                  status, buf[2*head + 1],
                                  execlists->active);
 
-                       if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-                                     GEN8_CTX_STATUS_PREEMPTED))
-                               execlists_set_active(execlists,
-                                                    EXECLISTS_ACTIVE_HWACK);
-                       if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
-                               execlists_clear_active(execlists,
-                                                      EXECLISTS_ACTIVE_HWACK);
+                       /*
+                        * Check if preempted from idle to idle directly.
+                        * The STATUS_IDLE_ACTIVE flag is used to mark
+                        * such transition.
+                        */
+                       if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+                            (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
+
+                               /* Cannot be waiting for HWACK while HW is idle 
*/
+                               GEM_BUG_ON(execlists_is_active(execlists,
+                                                     EXECLISTS_ACTIVE_HWACK));
 
-                       if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-                               continue;
+                               /*
+                                * We could not have COMPLETED anything
+                                * if we were idle before preemption.
+                                */
+                               GEM_BUG_ON(status & 
GEN8_CTX_STATUS_COMPLETED_MASK);
+                       } else {
+                               if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+                                             GEN8_CTX_STATUS_PREEMPTED))
+                                       execlists_set_active(execlists,
+                                                            
EXECLISTS_ACTIVE_HWACK);
+
+                               if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+                                       execlists_clear_active(execlists,
+                                                              
EXECLISTS_ACTIVE_HWACK);
+
+                               if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+                                       continue;
+
+                               /* We should never get a COMPLETED | 
IDLE_ACTIVE! */
+                               GEM_BUG_ON(status & 
GEN8_CTX_STATUS_IDLE_ACTIVE);
+                       }
 
-                       /* We should never get a COMPLETED | IDLE_ACTIVE! */
-                       GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
 
-                       if (status & GEN8_CTX_STATUS_COMPLETE &&
-                           buf[2*head + 1] == 
execlists->preempt_complete_status) {
+                       /*
+                        * Check if preempted to real idle, either directly or
+                        * the preemptive context already finished executing
+                        */
+                       if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+                           (status & GEN8_CTX_STATUS_COMPLETE &&
+                           buf[2*head + 1] == 
execlists->preempt_complete_status)) {
                                GEM_TRACE("%s preempt-idle\n", engine->name);
                                complete_preempt_context(execlists);
                                continue;
@@ -2337,7 +2379,8 @@ static void execlists_set_default_submission(struct 
intel_engine_cs *engine)
        engine->unpark = NULL;
 
        engine->flags |= I915_ENGINE_SUPPORTS_STATS;
-       if (engine->i915->preempt_context)
+       if (engine->i915->preempt_context ||
+           HAS_HW_PREEMPT_TO_IDLE(engine->i915))
                engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 
        engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 1593194..3249e9b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -45,6 +45,7 @@
 #define RING_EXECLIST_SQ_CONTENTS(engine)      _MMIO((engine)->mmio_base + 
0x510)
 #define RING_EXECLIST_CONTROL(engine)          _MMIO((engine)->mmio_base + 
0x550)
 #define          EL_CTRL_LOAD                          (1 << 0)
+#define          EL_CTRL_PREEMPT_TO_IDLE               (1 << 1)
 
 /* The docs specify that the write pointer wraps around after 5h, "After status
  * is written out to the last available status QW at offset 5h, this pointer
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to