It is performance optimization to reduce mmio trap numbers from 4 to
1 durning ELSP porting writing (context submission).

When context subission, to cache elsp_data[4] values in
the shared page, the last elsp_data[0] port writing will be trapped
to gvt for real context submission.

Use PVMMIO_ELSP_SUBMIT to control this level of pvmmio optimization.

v0: RFC
v1: rebase
v2: added pv ops for pv context submission. to maximize code resuse,
introduced 2 more ops (submit_ports & preempt_context) instead of 1 op
(set_default_submission) in engine structure. pv version of
submit_ports and preempt_context implemented.
v3:
1. to reduce more code duplication, code refactor and replaced 2 ops
"submit_ports & preempt_contex" from v2 by 1 ops "write_desc"
in engine structure. pv version of write_des implemented.
2. added VGT_G2V_ELSP_SUBMIT for g2v pv notification.

Cc: Zhenyu Wang <zhen...@linux.intel.com>
Cc: Zhi Wang <zhi.a.w...@intel.com>
Cc: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com>
Cc: He Min <min...@intel.com>
Cc: Jiang Fei <fei.ji...@intel.com>
Cc: Gong Zhipeng <zhipeng.g...@intel.com>
Cc: Yuan Hang <hang.y...@intel.com>
Cc: Zhiyuan Lv <zhiyuan...@intel.com>
Signed-off-by: Xiaolin Zhang <xiaolin.zh...@intel.com>
---
 drivers/gpu/drm/i915/i915_pvinfo.h      |  1 +
 drivers/gpu/drm/i915/i915_vgpu.c        |  2 ++
 drivers/gpu/drm/i915/intel_lrc.c        | 33 +++++++++++++++++++++++++++++----
 drivers/gpu/drm/i915/intel_ringbuffer.h |  3 +++
 4 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h 
b/drivers/gpu/drm/i915/i915_pvinfo.h
index aa5eebc..3da644d 100644
--- a/drivers/gpu/drm/i915/i915_pvinfo.h
+++ b/drivers/gpu/drm/i915/i915_pvinfo.h
@@ -49,6 +49,7 @@ enum vgt_g2v_type {
        VGT_G2V_EXECLIST_CONTEXT_CREATE,
        VGT_G2V_EXECLIST_CONTEXT_DESTROY,
        VGT_G2V_SHARED_PAGE_SETUP,
+       VGT_G2V_ELSP_SUBMIT,
        VGT_G2V_MAX,
 };
 
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index 63f70bf..82120f6 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -85,6 +85,8 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv)
                return;
        }
 
+       dev_priv->vgpu.pv_caps = PVMMIO_ELSP_SUBMIT;
+
        /* If guest wants to enable pvmmio, it needs to enable it explicitly
         * through vgt_if interface, and then read back the enable state from
         * gvt layer.
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ff0e2b3..660e24c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -391,8 +391,10 @@ static u64 execlists_update_context(struct i915_request 
*rq)
        return ce->lrc_desc;
 }
 
-static inline void write_desc(struct intel_engine_execlists *execlists, u64 
desc, u32 port)
+static inline void write_desc(struct intel_engine_cs *engine,
+                       u64 desc, u32 port)
 {
+       struct intel_engine_execlists *execlists = &engine->execlists;
        if (execlists->ctrl_reg) {
                writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
                writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 
1);
@@ -402,6 +404,24 @@ static inline void write_desc(struct 
intel_engine_execlists *execlists, u64 desc
        }
 }
 
+static inline void write_desc_pv(struct intel_engine_cs *engine,
+                       u64 desc, u32 port)
+{
+       struct drm_i915_private *dev_priv = engine->i915;
+       u32 *elsp_data;
+
+       spin_lock(&engine->i915->vgpu.shared_page_lock);
+       elsp_data = engine->i915->vgpu.shared_page->elsp_data;
+       elsp_data[engine->id * 4 + port * 2] = upper_32_bits(desc);
+       elsp_data[engine->id * 4 + port * 2 + 1] = lower_32_bits(desc);
+       if (port == 0) {
+               engine->i915->vgpu.shared_page->ring_id = engine->id;
+               __raw_i915_write32(dev_priv, vgtif_reg(g2v_notify),
+                               VGT_G2V_ELSP_SUBMIT);
+       }
+       spin_unlock(&engine->i915->vgpu.shared_page_lock);
+}
+
 static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
        struct intel_engine_execlists *execlists = &engine->execlists;
@@ -450,7 +470,7 @@ static void execlists_submit_ports(struct intel_engine_cs 
*engine)
                        desc = 0;
                }
 
-               write_desc(execlists, desc, n);
+               engine->write_desc(engine, desc, n);
        }
 
        /* we need to manually load the submit queue */
@@ -504,9 +524,9 @@ static void inject_preempt_context(struct intel_engine_cs 
*engine)
         */
        GEM_TRACE("%s\n", engine->name);
        for (n = execlists_num_ports(execlists); --n; )
-               write_desc(execlists, 0, n);
+               engine->write_desc(engine, 0, n);
 
-       write_desc(execlists, ce->lrc_desc, n);
+       engine->write_desc(engine, ce->lrc_desc, n);
 
        /* we need to manually load the submit queue */
        if (execlists->ctrl_reg)
@@ -2134,6 +2154,11 @@ void intel_execlists_set_default_submission(struct 
intel_engine_cs *engine)
 
        engine->reset.prepare = execlists_reset_prepare;
 
+       engine->write_desc = write_desc;
+
+       if (PVMMIO_LEVEL_ENABLE(engine->i915, PVMMIO_ELSP_SUBMIT))
+               engine->write_desc = write_desc_pv;
+
        engine->park = NULL;
        engine->unpark = NULL;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f6ec48a..b752aab 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -523,6 +523,9 @@ struct intel_engine_cs {
        void            (*irq_seqno_barrier)(struct intel_engine_cs *engine);
        void            (*cleanup)(struct intel_engine_cs *engine);
 
+       void            (*write_desc)(struct intel_engine_cs *engine,
+                                       u64 desc, u32 port);
+
        /* GEN8 signal/wait table - never trust comments!
         *        signal to     signal to    signal to   signal to      signal 
to
         *          RCS            VCS          BCS        VECS          VCS2
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to