Ping review, thanks very much. 

BRs, Xiaolin 

-----Original Message-----
From: Zhang, Xiaolin 
Sent: Friday, October 19, 2018 3:27 PM
To: intel-gfx@lists.freedesktop.org
Cc: intel-gvt-...@lists.freedesktop.org; Zhang, Xiaolin 
<xiaolin.zh...@intel.com>; Zhenyu Wang <zhen...@linux.intel.com>; Wang, Zhi A 
<zhi.a.w...@intel.com>; Chris Wilson <ch...@chris-wilson.co.uk>; Joonas 
Lahtinen <joonas.lahti...@linux.intel.com>; He; He, Min <min...@intel.com>; 
Jiang; Jiang, Fei <fei.ji...@intel.com>; Gong; Gong, Zhipeng 
<zhipeng.g...@intel.com>; Yuan; Yuan, Hang <hang.y...@intel.com>; Lv, Zhiyuan 
<zhiyuan...@intel.com>
Subject: [PATCH v2 3/5] drm/i915: context submission pvmmio optimization

It is performance optimization to reduce mmio trap numbers from 4 to
1 durning ELSP porting writing (context submission).

When context subission, to cache elsp_data[4] values in the shared page, the 
last elsp_data[0] port writing will be trapped to gvt for real context 
submission.

Use PVMMIO_ELSP_SUBMIT to control this level of pvmmio optimization.

v0: RFC
v1: rebase
v2: added pv ops for pv context submission. to maximize code resuse, introduced 
2 more ops (submit_ports & preempt_context) instead of 1 op
(set_default_submission) in engine structure. pv version of submit_ports and 
preempt_context implemented.

Cc: Zhenyu Wang <zhen...@linux.intel.com>
Cc: Zhi Wang <zhi.a.w...@intel.com>
Cc: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com>
Cc: He, Min <min...@intel.com>
Cc: Jiang, Fei <fei.ji...@intel.com>
Cc: Gong, Zhipeng <zhipeng.g...@intel.com>
Cc: Yuan, Hang <hang.y...@intel.com>
Cc: Zhiyuan Lv <zhiyuan...@intel.com>
Signed-off-by: Xiaolin Zhang <xiaolin.zh...@intel.com>
---
 drivers/gpu/drm/i915/i915_vgpu.c        |  2 +
 drivers/gpu/drm/i915/intel_lrc.c        | 88 +++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/intel_ringbuffer.h |  3 ++
 3 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index cb409d5..9870ea6 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -66,6 +66,8 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv)
 
        BUILD_BUG_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE);
 
+       dev_priv->vgpu.pv_caps = PVMMIO_ELSP_SUBMIT;
+
        magic = __raw_i915_read64(dev_priv, vgtif_reg(magic));
        if (magic != VGT_MAGIC)
                return;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 22b57b8..9e6ccf9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -460,6 +460,60 @@ static void execlists_submit_ports(struct intel_engine_cs 
*engine)
        execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);  }
 
+static void execlists_submit_ports_pv(struct intel_engine_cs *engine) {
+       struct intel_engine_execlists *execlists = &engine->execlists;
+       struct execlist_port *port = execlists->port;
+       u32 __iomem *elsp =
+               engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+       u32 *elsp_data;
+       unsigned int n;
+       u32 descs[4];
+       int i = 0;
+
+       /*
+        * ELSQ note: the submit queue is not cleared after being submitted
+        * to the HW so we need to make sure we always clean it up. This is
+        * currently ensured by the fact that we always write the same number
+        * of elsq entries, keep this in mind before changing the loop below.
+        */
+       for (n = execlists_num_ports(execlists); n--; ) {
+               struct i915_request *rq;
+               unsigned int count;
+               u64 desc;
+
+               rq = port_unpack(&port[n], &count);
+               if (rq) {
+                       GEM_BUG_ON(count > !n);
+                       if (!count++)
+                               execlists_context_schedule_in(rq);
+                       port_set(&port[n], port_pack(rq, count));
+                       desc = execlists_update_context(rq);
+               } else {
+                       GEM_BUG_ON(!n);
+                       desc = 0;
+               }
+               GEM_BUG_ON(i >= 4);
+               descs[i] = upper_32_bits(desc);
+               descs[i + 1] = lower_32_bits(desc);
+               i += 2;
+       }
+
+       spin_lock(&engine->i915->vgpu.shared_page_lock);
+       elsp_data = engine->i915->vgpu.shared_page->elsp_data;
+       *elsp_data = descs[0];
+       *(elsp_data + 1) = descs[1];
+       *(elsp_data + 2) = descs[2];
+       writel(descs[3], elsp);
+       spin_unlock(&engine->i915->vgpu.shared_page_lock);
+
+       /* we need to manually load the submit queue */
+       if (execlists->ctrl_reg)
+               writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+
+       execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); }
+
 static bool ctx_single_port_submission(const struct intel_context *ce)  {
        return (IS_ENABLED(CONFIG_DRM_I915_GVT) && @@ -497,7 +551,6 @@ static 
void inject_preempt_context(struct intel_engine_cs *engine)
 
        GEM_BUG_ON(execlists->preempt_complete_status !=
                   upper_32_bits(ce->lrc_desc));
-
        /*
         * Switch to our empty preempt context so
         * the state of the GPU is known (idle).
@@ -516,6 +569,27 @@ static void inject_preempt_context(struct intel_engine_cs 
*engine)
        execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);  }
 
+static void inject_preempt_context_pv(struct intel_engine_cs *engine) {
+       struct intel_engine_execlists *execlists = &engine->execlists;
+       struct intel_context *ce =
+               to_intel_context(engine->i915->preempt_context, engine);
+       u32 __iomem *elsp =
+               engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+       u32 *elsp_data;
+
+       GEM_BUG_ON(execlists->preempt_complete_status !=
+                  upper_32_bits(ce->lrc_desc));
+
+       spin_lock(&engine->i915->vgpu.shared_page_lock);
+       elsp_data = engine->i915->vgpu.shared_page->elsp_data;
+       *elsp_data = 0;
+       *(elsp_data + 1) = 0;
+       *(elsp_data + 2) = upper_32_bits(ce->lrc_desc);
+       writel(lower_32_bits(ce->lrc_desc), elsp);
+       spin_unlock(&engine->i915->vgpu.shared_page_lock);
+}
+
 static void complete_preempt_context(struct intel_engine_execlists *execlists) 
 {
        GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)); 
@@ -583,7 +657,7 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
                        return;
 
                if (need_preempt(engine, last, execlists->queue_priority)) {
-                       inject_preempt_context(engine);
+                       engine->preempt_context(engine);
                        return;
                }
 
@@ -705,7 +779,7 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
 
        if (submit) {
                port_assign(port, last);
-               execlists_submit_ports(engine);
+               engine->submit_ports(engine);
        }
 
        /* We must always keep the beast fed if we have work piled up */ @@ 
-2134,6 +2208,14 @@ void intel_execlists_set_default_submission(struct 
intel_engine_cs *engine)
 
        engine->reset.prepare = execlists_reset_prepare;
 
+       engine->preempt_context = inject_preempt_context;
+       engine->submit_ports = execlists_submit_ports;
+
+       if (PVMMIO_LEVEL_ENABLE(engine->i915, PVMMIO_ELSP_SUBMIT)) {
+               engine->preempt_context = inject_preempt_context_pv;
+               engine->submit_ports = execlists_submit_ports_pv;
+       }
+
        engine->park = NULL;
        engine->unpark = NULL;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f6ec48a..e9895bf 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -523,6 +523,9 @@ struct intel_engine_cs {
        void            (*irq_seqno_barrier)(struct intel_engine_cs *engine);
        void            (*cleanup)(struct intel_engine_cs *engine);
 
+       void            (*preempt_context)(struct intel_engine_cs *engine);
+       void            (*submit_ports)(struct intel_engine_cs *engine);
+
        /* GEN8 signal/wait table - never trust comments!
         *        signal to     signal to    signal to   signal to      signal 
to
         *          RCS            VCS          BCS        VECS          VCS2
--
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to