Re: [Intel-gfx] [PATCH v4 4/8] drm/i915: vgpu context submission pv optimization

2019-04-10 Thread Zhang, Xiaolin
On 03/30/2019 03:14 AM, Chris Wilson wrote:
> Quoting Xiaolin Zhang (2019-03-29 13:32:40)
>> +   spin_lock(>i915->vgpu.shared_page_lock);
>> +   shared_page->ring_id = engine->id;
>> +   for (n = 0; n < execlists_num_ports(execlists); n++)
>> +   shared_page->descs[n] = descs[n];
>> +
>> +   __raw_i915_write32(uncore, vgtif_reg(g2v_notify),
>> +   VGT_G2V_PV_SUBMISSION);
> There's no serialisation with the consumer here. The
> shared_page->descs[] will be clobbered by a second engine before they
> are read by pv. There should be a wait-for-ack here, or separate
> channels for each engine. In execlists, we avoid this by not dequeuing
> until after we get a response/ack from HW.
>
> Preemption? I guess preemption was turned off, or else you have to worry
> about the same engine overwriting the desc[] (if no ack).
> -Chris
>
Chris, thanks you to point it out my flaw here. will use the per-engine
channel for notification.  preemption is turned off and no support in
current version.

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v4 4/8] drm/i915: vgpu context submission pv optimization

2019-04-10 Thread Zhang, Xiaolin
On 03/29/2019 11:40 PM, Chris Wilson wrote:
> Quoting Xiaolin Zhang (2019-03-29 13:32:40)
>> diff --git a/drivers/gpu/drm/i915/i915_irq.c 
>> b/drivers/gpu/drm/i915/i915_irq.c
>> index 2f78829..28e8ee0 100644
>> --- a/drivers/gpu/drm/i915/i915_irq.c
>> +++ b/drivers/gpu/drm/i915/i915_irq.c
>> @@ -37,6 +37,7 @@
>>  #include "i915_drv.h"
>>  #include "i915_trace.h"
>>  #include "intel_drv.h"
>> +#include "i915_vgpu.h"
>>  
>>  /**
>>   * DOC: interrupt handling
>> @@ -1470,6 +1471,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, 
>> u32 iir)
>> if (iir & GT_RENDER_USER_INTERRUPT) {
>> intel_engine_breadcrumbs_irq(engine);
>> tasklet |= USES_GUC_SUBMISSION(engine->i915);
>> +   tasklet |= USES_PV_SUBMISSION(engine->i915);
> We should move this to an engine->flag.
sure, I will remove this next version.
>
>> }
>>  
>> if (tasklet)
>> +static void vgpu_pv_set_default_submission(struct intel_engine_cs *engine)
>> +{
>> +   /*
>> +* We inherit a bunch of functions from execlists that we'd like
>> +* to keep using:
>> +*
>> +*engine->submit_request = execlists_submit_request;
>> +*engine->cancel_requests = execlists_cancel_requests;
>> +*engine->schedule = execlists_schedule;
>> +*
>> +* But we need to override the actual submission backend in order
>> +* to talk to the GVT with PV notification message.
>> +*/
>> +   intel_execlists_set_default_submission(engine);
>> +
>> +   engine->execlists.tasklet.func = vgpu_pv_submission_tasklet;
> You need to pin the breadcrumbs irq, or it will not fire on every
> request.
indeed, I should do. thanks your great point.
> I'd push for this to live in intel_pv_submission.c or
> intel_vgpu_submission.c
this will create new file and make a change  in Makefile. if this kind
of change is OK,
I will create one to use intel_pv_submission.c name.
>> @@ -401,6 +551,12 @@ void intel_vgpu_config_pv_caps(struct drm_i915_private 
>> *dev_priv,
>> ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl_pv;
>> ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl_pv;
>> }
>> +
>> +   if (cap == PV_SUBMISSION) {
>> +   engine = (struct intel_engine_cs *)data;
>> +   engine->set_default_submission = 
>> vgpu_pv_set_default_submission;
>> +   engine->set_default_submission(engine);
>> +   }
>>  }
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index c1b9780..0a66714 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -2352,6 +2352,9 @@ logical_ring_default_vfuncs(struct intel_engine_cs 
>> *engine)
>>  */
>> }
>> engine->emit_bb_start = gen8_emit_bb_start;
>> +
>> +   if (intel_vgpu_active(engine->i915))
>> +   intel_vgpu_config_pv_caps(engine->i915, PV_SUBMISSION, 
>> engine);
> That pair is ugly. Should clean up the engine initialisation so that it
> doesn't involve placing a chunk of code in a foreign class.
> -Chris
>
Yep, will move it to other location.


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v4 4/8] drm/i915: vgpu context submission pv optimization

2019-03-29 Thread Chris Wilson
Quoting Xiaolin Zhang (2019-03-29 13:32:40)
> +   spin_lock(>i915->vgpu.shared_page_lock);
> +   shared_page->ring_id = engine->id;
> +   for (n = 0; n < execlists_num_ports(execlists); n++)
> +   shared_page->descs[n] = descs[n];
> +
> +   __raw_i915_write32(uncore, vgtif_reg(g2v_notify),
> +   VGT_G2V_PV_SUBMISSION);

There's no serialisation with the consumer here. The
shared_page->descs[] will be clobbered by a second engine before they
are read by pv. There should be a wait-for-ack here, or separate
channels for each engine. In execlists, we avoid this by not dequeuing
until after we get a response/ack from HW.

Preemption? I guess preemption was turned off, or else you have to worry
about the same engine overwriting the desc[] (if no ack).
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v4 4/8] drm/i915: vgpu context submission pv optimization

2019-03-29 Thread Chris Wilson
Quoting Xiaolin Zhang (2019-03-29 13:32:40)
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 2f78829..28e8ee0 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -37,6 +37,7 @@
>  #include "i915_drv.h"
>  #include "i915_trace.h"
>  #include "intel_drv.h"
> +#include "i915_vgpu.h"
>  
>  /**
>   * DOC: interrupt handling
> @@ -1470,6 +1471,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 
> iir)
> if (iir & GT_RENDER_USER_INTERRUPT) {
> intel_engine_breadcrumbs_irq(engine);
> tasklet |= USES_GUC_SUBMISSION(engine->i915);
> +   tasklet |= USES_PV_SUBMISSION(engine->i915);

We should move this to an engine->flag.

> }
>  
> if (tasklet)
> +static void vgpu_pv_set_default_submission(struct intel_engine_cs *engine)
> +{
> +   /*
> +* We inherit a bunch of functions from execlists that we'd like
> +* to keep using:
> +*
> +*engine->submit_request = execlists_submit_request;
> +*engine->cancel_requests = execlists_cancel_requests;
> +*engine->schedule = execlists_schedule;
> +*
> +* But we need to override the actual submission backend in order
> +* to talk to the GVT with PV notification message.
> +*/
> +   intel_execlists_set_default_submission(engine);
> +
> +   engine->execlists.tasklet.func = vgpu_pv_submission_tasklet;

You need to pin the breadcrumbs irq, or it will not fire on every
request.

I'd push for this to live in intel_pv_submission.c or
intel_vgpu_submission.c

> @@ -401,6 +551,12 @@ void intel_vgpu_config_pv_caps(struct drm_i915_private 
> *dev_priv,
> ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl_pv;
> ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl_pv;
> }
> +
> +   if (cap == PV_SUBMISSION) {
> +   engine = (struct intel_engine_cs *)data;
> +   engine->set_default_submission = 
> vgpu_pv_set_default_submission;
> +   engine->set_default_submission(engine);
> +   }
>  }

> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index c1b9780..0a66714 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2352,6 +2352,9 @@ logical_ring_default_vfuncs(struct intel_engine_cs 
> *engine)
>  */
> }
> engine->emit_bb_start = gen8_emit_bb_start;
> +
> +   if (intel_vgpu_active(engine->i915))
> +   intel_vgpu_config_pv_caps(engine->i915, PV_SUBMISSION, 
> engine);

That pair is ugly. Should clean up the engine initialisation so that it
doesn't involve placing a chunk of code in a foreign class.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v4 4/8] drm/i915: vgpu context submission pv optimization

2019-03-29 Thread Chris Wilson
Quoting Xiaolin Zhang (2019-03-29 13:32:40)
> It is performance optimization to override the actual submisison backend
> in order to eliminate execlists csb process and reduce mmio trap numbers
> for workload submission without contextswith interrupt by talking with
> GVT via PV submisison notification mechanism between guest and GVT.

> Use PV_SUBMISSION to control this level of pv optimization.
> 
> v0: RFC
> v1: rebase
> v2: added pv ops for pv context submission. to maximize code resuse,
> introduced 2 more ops (submit_ports & preempt_context) instead of 1 op
> (set_default_submission) in engine structure. pv version of
> submit_ports and preempt_context implemented.
> v3:
> 1. to reduce more code duplication, code refactor and replaced 2 ops
> "submit_ports & preempt_contex" from v2 by 1 ops "write_desc"
> in engine structure. pv version of write_des implemented.
> 2. added VGT_G2V_ELSP_SUBMIT for g2v pv notification.
> v4: implemented pv elsp submission tasklet as the backend workload
> submisison by talking to GVT with PV notificaiton mechanism and renamed
> VGT_G2V_ELSP_SUBMIT to VGT_G2V_PV_SUBMISIION.
> 
> Signed-off-by: Xiaolin Zhang 
> ---
>  drivers/gpu/drm/i915/i915_irq.c|   2 +
>  drivers/gpu/drm/i915/i915_pvinfo.h |   1 +
>  drivers/gpu/drm/i915/i915_vgpu.c   | 158 
> -
>  drivers/gpu/drm/i915/i915_vgpu.h   |  10 +++
>  drivers/gpu/drm/i915/intel_lrc.c   |   3 +
>  5 files changed, 173 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 2f78829..28e8ee0 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -37,6 +37,7 @@
>  #include "i915_drv.h"
>  #include "i915_trace.h"
>  #include "intel_drv.h"
> +#include "i915_vgpu.h"
>  
>  /**
>   * DOC: interrupt handling
> @@ -1470,6 +1471,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 
> iir)
> if (iir & GT_RENDER_USER_INTERRUPT) {
> intel_engine_breadcrumbs_irq(engine);
> tasklet |= USES_GUC_SUBMISSION(engine->i915);
> +   tasklet |= USES_PV_SUBMISSION(engine->i915);

You call this an optimisation!
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH v4 4/8] drm/i915: vgpu context submission pv optimization

2019-03-28 Thread Xiaolin Zhang
It is performance optimization to override the actual submisison backend
in order to eliminate execlists csb process and reduce mmio trap numbers
for workload submission without contextswith interrupt by talking with
GVT via PV submisison notification mechanism between guest and GVT.

Use PV_SUBMISSION to control this level of pv optimization.

v0: RFC
v1: rebase
v2: added pv ops for pv context submission. to maximize code resuse,
introduced 2 more ops (submit_ports & preempt_context) instead of 1 op
(set_default_submission) in engine structure. pv version of
submit_ports and preempt_context implemented.
v3:
1. to reduce more code duplication, code refactor and replaced 2 ops
"submit_ports & preempt_contex" from v2 by 1 ops "write_desc"
in engine structure. pv version of write_des implemented.
2. added VGT_G2V_ELSP_SUBMIT for g2v pv notification.
v4: implemented pv elsp submission tasklet as the backend workload
submisison by talking to GVT with PV notificaiton mechanism and renamed
VGT_G2V_ELSP_SUBMIT to VGT_G2V_PV_SUBMISIION.

Signed-off-by: Xiaolin Zhang 
---
 drivers/gpu/drm/i915/i915_irq.c|   2 +
 drivers/gpu/drm/i915/i915_pvinfo.h |   1 +
 drivers/gpu/drm/i915/i915_vgpu.c   | 158 -
 drivers/gpu/drm/i915/i915_vgpu.h   |  10 +++
 drivers/gpu/drm/i915/intel_lrc.c   |   3 +
 5 files changed, 173 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 2f78829..28e8ee0 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -37,6 +37,7 @@
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
+#include "i915_vgpu.h"
 
 /**
  * DOC: interrupt handling
@@ -1470,6 +1471,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 
iir)
if (iir & GT_RENDER_USER_INTERRUPT) {
intel_engine_breadcrumbs_irq(engine);
tasklet |= USES_GUC_SUBMISSION(engine->i915);
+   tasklet |= USES_PV_SUBMISSION(engine->i915);
}
 
if (tasklet)
diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h 
b/drivers/gpu/drm/i915/i915_pvinfo.h
index 2408a9d..362d898 100644
--- a/drivers/gpu/drm/i915/i915_pvinfo.h
+++ b/drivers/gpu/drm/i915/i915_pvinfo.h
@@ -50,6 +50,7 @@ enum vgt_g2v_type {
VGT_G2V_PPGTT_L4_ALLOC,
VGT_G2V_PPGTT_L4_CLEAR,
VGT_G2V_PPGTT_L4_INSERT,
+   VGT_G2V_PV_SUBMISSION,
VGT_G2V_MAX,
 };
 
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index 87a0ca5..53d05b3 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -23,6 +23,7 @@
 
 #include "intel_drv.h"
 #include "i915_vgpu.h"
+#include "intel_lrc_reg.h"
 
 /**
  * DOC: Intel GVT-g guest support
@@ -81,7 +82,7 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv)
dev_priv->vgpu.active = true;
 
/* guest driver PV capability */
-   dev_priv->vgpu.pv_caps = PV_PPGTT_UPDATE;
+   dev_priv->vgpu.pv_caps = PV_PPGTT_UPDATE | PV_SUBMISSION;
 
if (!intel_vgpu_check_pv_caps(dev_priv)) {
DRM_INFO("Virtual GPU for Intel GVT-g detected.\n");
@@ -292,6 +293,154 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv)
  * i915 vgpu PV support for Linux
  */
 
+static u64 execlists_update_context(struct i915_request *rq)
+{
+   struct intel_context *ce = rq->hw_context;
+   u32 *reg_state = ce->lrc_reg_state;
+
+   reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
+
+   return ce->lrc_desc;
+}
+
+static inline struct i915_priolist *to_priolist(struct rb_node *rb)
+{
+   return rb_entry(rb, struct i915_priolist, node);
+}
+
+static void pv_submit(struct intel_engine_cs *engine)
+{
+   struct intel_uncore *uncore = >i915->uncore;
+   struct intel_engine_execlists * const execlists = >execlists;
+   struct execlist_port *port = execlists->port;
+   unsigned int n;
+   struct gvt_shared_page *shared_page = engine->i915->vgpu.shared_page;
+   u64 descs[2];
+
+   for (n = 0; n < execlists_num_ports(execlists); n++) {
+   struct i915_request *rq;
+   unsigned int count = 0;
+
+   descs[n] = 0;
+   rq = port_unpack([n], );
+   if (rq && count == 0) {
+   port_set([n], port_pack(rq, ++count));
+   descs[n] = execlists_update_context(rq);
+   }
+   }
+
+   spin_lock(>i915->vgpu.shared_page_lock);
+   shared_page->ring_id = engine->id;
+   for (n = 0; n < execlists_num_ports(execlists); n++)
+   shared_page->descs[n] = descs[n];
+
+   __raw_i915_write32(uncore, vgtif_reg(g2v_notify),
+   VGT_G2V_PV_SUBMISSION);
+   spin_unlock(>i915->vgpu.shared_page_lock);
+}
+
+static void pv_dequeue(struct intel_engine_cs *engine)
+{
+   struct intel_engine_execlists * const execlists = >execlists;
+   struct