Re: [Intel-gfx] [PATCH v1 01/12] drm/i915: introduced vgpu pv capability

2020-09-20 Thread Zhang, Xiaolin
On Thu, 2020-09-10 at 16:10 +0300, Jani Nikula wrote:
> On Sat, 05 Sep 2020, Xiaolin Zhang  wrote:
> > to enable vgpu pv feature, pv capability is introduced for guest by
> > new pv_caps member in struct i915_virtual_gpu and for host GVT by
> > new pv_caps register in struct vgt_if.
> > 
> > both of them are used to control different pv feature support in
> > each
> > domain and the final pv caps runtime negotiated between guest and
> > host.
> > 
> > it also adds VGT_CAPS_PV capability BIT useb by guest to query host
> > GVTg
> > whether support any PV feature or not.
> > 
> > Signed-off-by: Xiaolin Zhang 
> > ---
> >  drivers/gpu/drm/i915/i915_debugfs.c |  3 ++
> >  drivers/gpu/drm/i915/i915_drv.h |  1 +
> >  drivers/gpu/drm/i915/i915_pvinfo.h  |  5 ++-
> >  drivers/gpu/drm/i915/i915_vgpu.c| 63
> > -
> >  drivers/gpu/drm/i915/i915_vgpu.h| 10 ++
> >  5 files changed, 80 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
> > b/drivers/gpu/drm/i915/i915_debugfs.c
> > index 7842199..fd1e0fc 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -48,6 +48,7 @@
> >  #include "i915_trace.h"
> >  #include "intel_pm.h"
> >  #include "intel_sideband.h"
> > +#include "i915_vgpu.h"
> >  
> >  static inline struct drm_i915_private *node_to_i915(struct
> > drm_info_node *node)
> >  {
> > @@ -60,6 +61,8 @@ static int i915_capabilities(struct seq_file *m,
> > void *data)
> > struct drm_printer p = drm_seq_file_printer(m);
> >  
> > seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(i915));
> > +   if (intel_vgpu_active(i915))
> > +   seq_printf(m, "vgpu pv_caps: 0x%x\n", i915-
> > >vgpu.pv_caps);
> 
> I think the placement here over-emphasizes the importance of the
> caps. Maybe you also want to print something if vgpu isn't active?
thanks comment. will consider how to print this. 
> 
> >  
> > intel_device_info_print_static(INTEL_INFO(i915), );
> > intel_device_info_print_runtime(RUNTIME_INFO(i915), );
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > b/drivers/gpu/drm/i915/i915_drv.h
> > index a455752..16d1b51 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -808,6 +808,7 @@ struct i915_virtual_gpu {
> > struct mutex lock; /* serialises sending of g2v_notify command
> > pkts */
> > bool active;
> > u32 caps;
> > +   u32 pv_caps;
> >  };
> >  
> >  struct intel_cdclk_config {
> > diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h
> > b/drivers/gpu/drm/i915/i915_pvinfo.h
> > index 683e97a..8b0dc25 100644
> > --- a/drivers/gpu/drm/i915/i915_pvinfo.h
> > +++ b/drivers/gpu/drm/i915/i915_pvinfo.h
> > @@ -57,6 +57,7 @@ enum vgt_g2v_type {
> >  #define VGT_CAPS_FULL_PPGTTBIT(2)
> >  #define VGT_CAPS_HWSP_EMULATIONBIT(3)
> >  #define VGT_CAPS_HUGE_GTT  BIT(4)
> > +#define VGT_CAPS_PVBIT(5)
> >  
> >  struct vgt_if {
> > u64 magic;  /* VGT_MAGIC */
> > @@ -109,7 +110,9 @@ struct vgt_if {
> > u32 execlist_context_descriptor_lo;
> > u32 execlist_context_descriptor_hi;
> >  
> > -   u32  rsv7[0x200 - 24];/* pad to one page */
> > +   u32 pv_caps;
> > +
> > +   u32  rsv7[0x200 - 25];/* pad to one page */
> >  } __packed;
> >  
> >  #define vgtif_offset(x) (offsetof(struct vgt_if, x))
> > diff --git a/drivers/gpu/drm/i915/i915_vgpu.c
> > b/drivers/gpu/drm/i915/i915_vgpu.c
> > index 70fca72..10960125 100644
> > --- a/drivers/gpu/drm/i915/i915_vgpu.c
> > +++ b/drivers/gpu/drm/i915/i915_vgpu.c
> > @@ -98,7 +98,13 @@ void intel_vgpu_detect(struct drm_i915_private
> > *dev_priv)
> >  
> > dev_priv->vgpu.active = true;
> > mutex_init(_priv->vgpu.lock);
> > -   drm_info(_priv->drm, "Virtual GPU for Intel GVT-g
> > detected.\n");
> > +
> > +   if (!intel_vgpu_detect_pv_caps(dev_priv, shared_area)) {
> > +   DRM_INFO("Virtual GPU for Intel GVT-g detected.\n");
> > +   goto out;
> 
> Seems clearer without the goto. It's not like one is an error path,
> right?
> 
> > +   }
> > +
> > +   DRM_INFO("Virtual GPU for Intel GVT-g detected with PV
> > Optimized.\n");
> 
> Please retain use of drm_info().
> 
> >  
> >  out:
> > pci_iounmap(pdev, shared_area);
> > @@ -134,6 +140,18 @@ bool intel_vgpu_has_huge_gtt(struct
> > drm_i915_private *dev_priv)
> > return dev_priv->vgpu.caps & VGT_CAPS_HUGE_GTT;
> >  }
> >  
> > +static bool intel_vgpu_check_pv_cap(struct drm_i915_private
> > *dev_priv,
> > +   enum pv_caps cap)
> 
> The indentation is off here, and all over the place, as reported by
> checkpatch. Please address them everywhere.
> 
> > +{
> > +   return (dev_priv->vgpu.active && (dev_priv->vgpu.caps &
> > VGT_CAPS_PV)
> > +   && (dev_priv->vgpu.pv_caps & cap));
> > +}
> > +
> > +static bool intel_vgpu_has_pv_caps(struct drm_i915_private
> > *dev_priv)
> > +{
> > +   return 

Re: [Intel-gfx] [PATCH v1 03/12] drm/i915: vgpu pv command buffer transport protocol

2020-09-20 Thread Zhang, Xiaolin
On Thu, 2020-09-10 at 16:20 +0300, Jani Nikula wrote:
> On Sat, 05 Sep 2020, Xiaolin Zhang  wrote:
> > based on the common shared memory, vgpu pv command transport buffer
> > (CTB)
> > protocol is implemented which is a simple pv command buffer ring
> > with pv
> > command descriptor used to perform guest-2-gvt single direction
> > commucation
> > between guest and host GVTg.
> > 
> > with this CTB, guest can send PV command with PV data to host to
> > perform PV
> > commands in host side.
> > 
> > Signed-off-by: Xiaolin Zhang 
> > ---
> >  drivers/gpu/drm/i915/i915_pvinfo.h |   1 +
> >  drivers/gpu/drm/i915/i915_vgpu.c   | 195
> > -
> >  drivers/gpu/drm/i915/i915_vgpu.h   |  53 ++
> >  3 files changed, 247 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h
> > b/drivers/gpu/drm/i915/i915_pvinfo.h
> > index 1d44876..ded93c5 100644
> > --- a/drivers/gpu/drm/i915/i915_pvinfo.h
> > +++ b/drivers/gpu/drm/i915/i915_pvinfo.h
> > @@ -49,6 +49,7 @@ enum vgt_g2v_type {
> > VGT_G2V_EXECLIST_CONTEXT_CREATE,
> > VGT_G2V_EXECLIST_CONTEXT_DESTROY,
> > VGT_G2V_SHARED_PAGE_REGISTER,
> > +   VGT_G2V_PV_SEND_TRIGGER,
> > VGT_G2V_MAX,
> >  };
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_vgpu.c
> > b/drivers/gpu/drm/i915/i915_vgpu.c
> > index 8b2b451..e856eff 100644
> > --- a/drivers/gpu/drm/i915/i915_vgpu.c
> > +++ b/drivers/gpu/drm/i915/i915_vgpu.c
> > @@ -370,6 +370,183 @@ int intel_vgt_balloon(struct i915_ggtt *ggtt)
> >   * i915 vgpu PV support for Linux
> >   */
> >  
> > +/**
> > + * wait_for_desc_update - Wait for the command buffer descriptor
> > update.
> > + * @desc:  buffer descriptor
> > + * @fence: response fence
> > + * @status:placeholder for status
> > + *
> > + * GVTg will update command buffer descriptor with new fence and
> > status
> > + * after processing the command identified by the fence. Wait for
> > + * specified fence and then read from the descriptor status of the
> > + * command.
> > + *
> > + * Return:
> > + * *   0 response received (status is valid)
> > + * *   -ETIMEDOUT no response within hardcoded timeout
> > + */
> > +static int wait_for_desc_update(struct vgpu_pv_ct_buffer_desc
> > *desc,
> > +   u32 fence, u32 *status)
> > +{
> > +   int err;
> > +
> > +#define done (READ_ONCE(desc->fence) == fence)
> > +   err = wait_for_us(done, 5);
> > +   if (err)
> > +   err = wait_for(done, 10);
> > +#undef done
> > +
> > +   if (unlikely(err)) {
> > +   DRM_ERROR("CT: fence %u failed; reported fence=%u\n",
> > +   fence, desc->fence);
> 
> drm_err() please.
Sure. will change all the similar code. thanks. 
> 
> > +   }
> > +
> > +   *status = desc->status;
> 
> Please have a blank line before the return. Recommended throughout
> the
> series.
Sure, will do it in whole series. thanks. 
> 
> > +   return err;
> > +}
> > +
> > +/**
> > + * CTB Guest to GVT request
> > + *
> > + * Format of the CTB Guest to GVT request message is as follows::
> > + *
> > + *  ++-+-+-+-+
> > + *  |   msg[0]   |   [1]   |   [2]   |   ...   |  [n-1]  |
> > + *  ++-+-+-+-+
> > + *  |   MESSAGE  |   MESSAGE PAYLOAD |
> > + *  +   HEADER   +-+-+-+-+
> > + *  ||0|1|   ...   |n|
> > + *  ++=+=+=+=+
> > + *  |  len >= 1  |  FENCE  | request specific data   |
> > + *  +--+-+-+-+-+-+
> > + *
> > + *   ^-len---^
> > + */
> > +static int pv_command_buffer_write(struct i915_virtual_gpu_pv *pv,
> > +   const u32 *action, u32 len /* in dwords */, u32 fence)
> > +{
> > +   struct vgpu_pv_ct_buffer_desc *desc = pv->ctb.desc;
> > +   u32 head = desc->head / 4;  /* in dwords */
> > +   u32 tail = desc->tail / 4;  /* in dwords */
> > +   u32 size = desc->size / 4;  /* in dwords */
> > +   u32 used;   /* in dwords */
> > +   u32 header;
> > +   u32 *cmds = pv->ctb.cmds;
> > +   unsigned int i;
> > +
> > +   GEM_BUG_ON(desc->size % 4);
> > +   GEM_BUG_ON(desc->head % 4);
> > +   GEM_BUG_ON(desc->tail % 4);
> > +   GEM_BUG_ON(tail >= size);
> > +
> > +/* tail == head condition indicates empty */
> > +   if (tail < head)
> > +   used = (size - head) + tail;
> > +   else
> > +   used = tail - head;
> > +
> > +   /* make sure there is a space including extra dw for the fence
> > */
> > +   if (unlikely(used + len + 1 >= size))
> > +   return -ENOSPC;
> > +
> > +   /*
> > +* Write the message. The format is the following:
> > +* DW0: header (including action code)
> > +* DW1: fence
> > +* DW2+: action data
> > +*/
> > +   header = (len << 

Re: [Intel-gfx] [PATCH v1 02/12] drm/i915: vgpu shared memory setup for pv support

2020-09-20 Thread Zhang, Xiaolin
On Thu, 2020-09-10 at 16:16 +0300, Jani Nikula wrote:
> On Sat, 05 Sep 2020, Xiaolin Zhang  wrote:
> > To support vgpu pv features, a common shared memory is setup used
> > for
> > communication and data exchange between guest and host GVTg to
> > reduce
> > data access overhead and trap cost.
> > 
> > guest i915 will allocate this common memory (1 page size) and then
> > pass
> > it's physical address to host GVTg through PVINFO register so that
> > host
> > GVTg can access this shared guest page meory without trap cost with
> > hyperviser's facility.
> > 
> > guest i915 will send VGT_G2V_SHARED_PAGE_SETUP notification to host
> > GVTg
> > once shared memory setup succcessfully finished.
> > 
> > the layout of the shared_page also defined as well, the first part
> > is the
> > PV vervsion information used for compabilty support.
> > 
> > Signed-off-by: Xiaolin Zhang 
> > ---
> >  drivers/gpu/drm/i915/i915_drv.c|  2 +
> >  drivers/gpu/drm/i915/i915_drv.h|  4 +-
> >  drivers/gpu/drm/i915/i915_pvinfo.h |  5 +-
> >  drivers/gpu/drm/i915/i915_vgpu.c   | 94
> > ++
> >  drivers/gpu/drm/i915/i915_vgpu.h   | 14 ++
> >  5 files changed, 117 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c
> > b/drivers/gpu/drm/i915/i915_drv.c
> > index 00292a8..5fbb4ab 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -1071,6 +1071,8 @@ static void i915_driver_release(struct
> > drm_device *dev)
> >  
> > disable_rpm_wakeref_asserts(rpm);
> >  
> > +   intel_vgpu_destroy(dev_priv);
> > +
> > i915_gem_driver_release(dev_priv);
> >  
> > intel_memory_regions_driver_release(dev_priv);
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > b/drivers/gpu/drm/i915/i915_drv.h
> > index 16d1b51..3cde2c5f 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -809,7 +809,9 @@ struct i915_virtual_gpu {
> > bool active;
> > u32 caps;
> > u32 pv_caps;
> > -};
> > +
> > +   struct i915_virtual_gpu_pv *pv;
> > +} __packed;
> 
> I'm unsure why this struct should be packed.
Thanks your point it out. it is not necessary. will remove this next
version. 
> 
> >  
> >  struct intel_cdclk_config {
> > unsigned int cdclk, vco, ref, bypass;
> > diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h
> > b/drivers/gpu/drm/i915/i915_pvinfo.h
> > index 8b0dc25..1d44876 100644
> > --- a/drivers/gpu/drm/i915/i915_pvinfo.h
> > +++ b/drivers/gpu/drm/i915/i915_pvinfo.h
> > @@ -48,6 +48,7 @@ enum vgt_g2v_type {
> > VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY,
> > VGT_G2V_EXECLIST_CONTEXT_CREATE,
> > VGT_G2V_EXECLIST_CONTEXT_DESTROY,
> > +   VGT_G2V_SHARED_PAGE_REGISTER,
> > VGT_G2V_MAX,
> >  };
> >  
> > @@ -112,7 +113,9 @@ struct vgt_if {
> >  
> > u32 pv_caps;
> >  
> > -   u32  rsv7[0x200 - 25];/* pad to one page */
> > +   u64 shared_page_gpa;
> > +
> > +   u32  rsv7[0x200 - 27];/* pad to one page */
> >  } __packed;
> >  
> >  #define vgtif_offset(x) (offsetof(struct vgt_if, x))
> > diff --git a/drivers/gpu/drm/i915/i915_vgpu.c
> > b/drivers/gpu/drm/i915/i915_vgpu.c
> > index 10960125..8b2b451 100644
> > --- a/drivers/gpu/drm/i915/i915_vgpu.c
> > +++ b/drivers/gpu/drm/i915/i915_vgpu.c
> > @@ -110,6 +110,17 @@ void intel_vgpu_detect(struct drm_i915_private
> > *dev_priv)
> > pci_iounmap(pdev, shared_area);
> >  }
> >  
> > +void intel_vgpu_destroy(struct drm_i915_private *i915)
> > +{
> > +   struct i915_virtual_gpu_pv *pv = i915->vgpu.pv;
> > +
> > +   if (!intel_vgpu_active(i915) || !pv)
> > +   return;
> > +
> > +   __free_page(virt_to_page(pv->shared_page));
> > +   kfree(pv);
> > +}
> > +
> >  void intel_vgpu_register(struct drm_i915_private *i915)
> >  {
> > /*
> > @@ -360,6 +371,83 @@ int intel_vgt_balloon(struct i915_ggtt *ggtt)
> >   */
> >  
> >  /*
> > + * shared_page setup for VGPU PV features
> > + */
> > +static int intel_vgpu_setup_shared_page(struct drm_i915_private
> > *i915,
> > +   void __iomem *shared_area)
> > +{
> > +   void __iomem *addr;
> > +   struct i915_virtual_gpu_pv *pv;
> > +   struct gvt_shared_page *base;
> > +   u64 gpa;
> > +   u16 ver_maj, ver_min;
> > +   int ret = 0;
> > +
> > +   /* We allocate 1 page shared between guest and GVT for data
> > exchange.
> > +*   ___
> > +*  |version|
> > +*  |___PAGE/8
> > +*  |   |
> > +*  |___PAGE/4
> > +*  |   |
> > +*  |   |
> > +*  |   |
> > +*  |___PAGE/2
> > +*  |   |
> > +*  |   |
> > +*  |   |
> > +*  |  

Re: [Intel-gfx] [PATCH v1 01/12] drm/i915: introduced vgpu pv capability

2020-09-20 Thread Zhang, Xiaolin
On Thu, 2020-09-10 at 16:10 +0300, Jani Nikula wrote:
> On Sat, 05 Sep 2020, Xiaolin Zhang  wrote:
> > to enable vgpu pv feature, pv capability is introduced for guest by
> > new pv_caps member in struct i915_virtual_gpu and for host GVT by
> > new pv_caps register in struct vgt_if.
> > 
> > both of them are used to control different pv feature support in
> > each
> > domain and the final pv caps runtime negotiated between guest and
> > host.
> > 
> > it also adds VGT_CAPS_PV capability BIT useb by guest to query host
> > GVTg
> > whether support any PV feature or not.
> > 
> > Signed-off-by: Xiaolin Zhang 
> > ---
> >  drivers/gpu/drm/i915/i915_debugfs.c |  3 ++
> >  drivers/gpu/drm/i915/i915_drv.h |  1 +
> >  drivers/gpu/drm/i915/i915_pvinfo.h  |  5 ++-
> >  drivers/gpu/drm/i915/i915_vgpu.c| 63
> > -
> >  drivers/gpu/drm/i915/i915_vgpu.h| 10 ++
> >  5 files changed, 80 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
> > b/drivers/gpu/drm/i915/i915_debugfs.c
> > index 7842199..fd1e0fc 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -48,6 +48,7 @@
> >  #include "i915_trace.h"
> >  #include "intel_pm.h"
> >  #include "intel_sideband.h"
> > +#include "i915_vgpu.h"
> 
> Please keep includes sorted.
Sure. thanks for review. 
> 
> >  
> >  static inline struct drm_i915_private *node_to_i915(struct
> > drm_info_node *node)
> >  {
> > @@ -60,6 +61,8 @@ static int i915_capabilities(struct seq_file *m,
> > void *data)
> > struct drm_printer p = drm_seq_file_printer(m);
> >  
> > seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(i915));
> > +   if (intel_vgpu_active(i915))
> > +   seq_printf(m, "vgpu pv_caps: 0x%x\n", i915-
> > >vgpu.pv_caps);
> >  
> > intel_device_info_print_static(INTEL_INFO(i915), );
> > intel_device_info_print_runtime(RUNTIME_INFO(i915), );
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > b/drivers/gpu/drm/i915/i915_drv.h
> > index a455752..16d1b51 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -808,6 +808,7 @@ struct i915_virtual_gpu {
> > struct mutex lock; /* serialises sending of g2v_notify command
> > pkts */
> > bool active;
> > u32 caps;
> > +   u32 pv_caps;
> >  };
> >  
> >  struct intel_cdclk_config {
> > diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h
> > b/drivers/gpu/drm/i915/i915_pvinfo.h
> > index 683e97a..8b0dc25 100644
> > --- a/drivers/gpu/drm/i915/i915_pvinfo.h
> > +++ b/drivers/gpu/drm/i915/i915_pvinfo.h
> > @@ -57,6 +57,7 @@ enum vgt_g2v_type {
> >  #define VGT_CAPS_FULL_PPGTTBIT(2)
> >  #define VGT_CAPS_HWSP_EMULATIONBIT(3)
> >  #define VGT_CAPS_HUGE_GTT  BIT(4)
> > +#define VGT_CAPS_PVBIT(5)
> >  
> >  struct vgt_if {
> > u64 magic;  /* VGT_MAGIC */
> > @@ -109,7 +110,9 @@ struct vgt_if {
> > u32 execlist_context_descriptor_lo;
> > u32 execlist_context_descriptor_hi;
> >  
> > -   u32  rsv7[0x200 - 24];/* pad to one page */
> > +   u32 pv_caps;
> > +
> > +   u32  rsv7[0x200 - 25];/* pad to one page */
> >  } __packed;
> >  
> >  #define vgtif_offset(x) (offsetof(struct vgt_if, x))
> > diff --git a/drivers/gpu/drm/i915/i915_vgpu.c
> > b/drivers/gpu/drm/i915/i915_vgpu.c
> > index 70fca72..10960125 100644
> > --- a/drivers/gpu/drm/i915/i915_vgpu.c
> > +++ b/drivers/gpu/drm/i915/i915_vgpu.c
> > @@ -98,7 +98,13 @@ void intel_vgpu_detect(struct drm_i915_private
> > *dev_priv)
> >  
> > dev_priv->vgpu.active = true;
> > mutex_init(_priv->vgpu.lock);
> > -   drm_info(_priv->drm, "Virtual GPU for Intel GVT-g
> > detected.\n");
> > +
> > +   if (!intel_vgpu_detect_pv_caps(dev_priv, shared_area)) {
> > +   DRM_INFO("Virtual GPU for Intel GVT-g detected.\n");
> > +   goto out;
> > +   }
> > +
> > +   DRM_INFO("Virtual GPU for Intel GVT-g detected with PV
> > Optimized.\n");
> >  
> >  out:
> > pci_iounmap(pdev, shared_area);
> > @@ -134,6 +140,18 @@ bool intel_vgpu_has_huge_gtt(struct
> > drm_i915_private *dev_priv)
> > return dev_priv->vgpu.caps & VGT_CAPS_HUGE_GTT;
> >  }
> >  
> > +static bool intel_vgpu_check_pv_cap(struct drm_i915_private
> > *dev_priv,
> > +   enum pv_caps cap)
> > +{
> > +   return (dev_priv->vgpu.active && (dev_priv->vgpu.caps &
> > VGT_CAPS_PV)
> > +   && (dev_priv->vgpu.pv_caps & cap));
> > +}
> > +
> > +static bool intel_vgpu_has_pv_caps(struct drm_i915_private
> > *dev_priv)
> > +{
> > +   return dev_priv->vgpu.caps & VGT_CAPS_PV;
> > +}
> > +
> >  struct _balloon_info_ {
> > /*
> >  * There are up to 2 regions per mappable/unmappable graphic
> > @@ -336,3 +354,46 @@ int intel_vgt_balloon(struct i915_ggtt *ggtt)
> > drm_err(_priv->drm, "VGT balloon fail\n");
> > return ret;
> >  }
> > +
> > +/*
> > + * i915 vgpu PV support for Linux
> > + */
> > +
> > +/*
> > + * 

Re: [Intel-gfx] [PATCH v2] drm/i915: to make vgpu ppgtt notificaiton as atomic operation

2019-08-23 Thread Zhang, Xiaolin
On 08/23/2019 03:58 PM, Chris Wilson wrote:
> Quoting Xiaolin Zhang (2019-08-23 07:57:31)
>> vgpu ppgtt notification was split into 2 steps, the first step is to
>> update PVINFO's pdp register and then write PVINFO's g2v_notify register
>> with action code to tirgger ppgtt notification to GVT side.
>>
>> currently these steps were not atomic operations due to no any protection,
>> so it is easy to enter race condition state during the MTBF, stress and
>> IGT test to cause GPU hang.
>>
>> the solution is to add a lock to make vgpu ppgtt notication as atomic
>> operation.
>>
>> Cc: sta...@vger.kernel.org
>> Signed-off-by: Xiaolin Zhang 
>> ---
>>  drivers/gpu/drm/i915/i915_drv.h | 1 +
>>  drivers/gpu/drm/i915/i915_gem_gtt.c | 4 
>>  drivers/gpu/drm/i915/i915_vgpu.c| 1 +
>>  3 files changed, 6 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index eb31c16..32e17c4 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -961,6 +961,7 @@ struct i915_frontbuffer_tracking {
>>  };
>>  
>>  struct i915_virtual_gpu {
>> +   struct mutex lock;
>> bool active;
>> u32 caps;
>>  };
>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
>> b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> index 2cd2dab..ff0b178 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> @@ -833,6 +833,8 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt 
>> *ppgtt, bool create)
>> enum vgt_g2v_type msg;
>> int i;
>>  
>> +   mutex_lock(_priv->vgpu.lock);
>> +
>> if (create)
>> atomic_inc(px_used(ppgtt->pd)); /* never remove */
>> else
>> @@ -860,6 +862,8 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt 
>> *ppgtt, bool create)
>>  
>> I915_WRITE(vgtif_reg(g2v_notify), msg);
>>  
> How do you know the operation is complete and it is now safe to
> overwrite the data regs?
> -Chris
>
by design, the data reg value is copied out to use, so as long as the
action and data is operated together, how long the operation is not a
issue and  it is safe to overwrite the data regs with new action next time.

-BRs, Xiaolin

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v8 2/9] drm/i915: vgpu shared memory setup for pv optimization

2019-07-23 Thread Zhang, Xiaolin
On 07/23/2019 06:37 PM, Chris Wilson wrote:
> Quoting Xiaolin Zhang (2019-07-23 12:31:57)
>> To enable vgpu pv features, we need to setup a shared memory page
>> which will be used for data exchange directly accessed between both
>> guest and backend i915 driver to avoid emulation trap cost.
>>
>> guest i915 will allocate this page memory and then pass it's physical
>> address to backend i915 driver through PVINFO register so that backend i915
>> driver can access this shared page meory without any trap cost with the
>> help form hyperviser's read guest gpa functionality.
>>
>> guest i915 will send VGT_G2V_SHARED_PAGE_SETUP notification to host GVT
>> once shared memory setup finished.
>>
>> the layout of the shared_page also defined as well in this patch which
>> is used for pv features implementation.
>>
>> v0: RFC.
>> v1: addressed RFC comment to move both shared_page_lock and shared_page
>> to i915_virtual_gpu structure.
>> v2: packed i915_virtual_gpu structure.
>> v3: added SHARED_PAGE_SETUP g2v notification for pv shared_page setup
>> v4: added intel_vgpu_setup_shared_page() in i915_vgpu_pv.c.
>> v5: per engine desc data in shared memory.
>> v6: added version support in shared memory (Zhenyu).
>> v7: rebase.
>>
>> Signed-off-by: Xiaolin Zhang 
>> ---
>>  drivers/gpu/drm/i915/i915_drv.h|  4 +-
>>  drivers/gpu/drm/i915/i915_pvinfo.h |  5 ++-
>>  drivers/gpu/drm/i915/i915_vgpu.c   | 85 
>> ++
>>  drivers/gpu/drm/i915/i915_vgpu.h   | 17 
>>  4 files changed, 109 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index fa5dc47..48cf141 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -1080,7 +1080,9 @@ struct i915_virtual_gpu {
>> bool active;
>> u32 caps;
>> u32 pv_caps;
>> -};
>> +
>> +   struct i915_virtual_gpu_pv *pv;
>> +} __packed;
>>  
>>  /* used in computing the new watermarks state */
>>  struct intel_wm_config {
>> diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h 
>> b/drivers/gpu/drm/i915/i915_pvinfo.h
>> index ad398b4..3c63603 100644
>> --- a/drivers/gpu/drm/i915/i915_pvinfo.h
>> +++ b/drivers/gpu/drm/i915/i915_pvinfo.h
>> @@ -48,6 +48,7 @@ enum vgt_g2v_type {
>> VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY,
>> VGT_G2V_EXECLIST_CONTEXT_CREATE,
>> VGT_G2V_EXECLIST_CONTEXT_DESTROY,
>> +   VGT_G2V_SHARED_PAGE_SETUP,
>> VGT_G2V_MAX,
>>  };
>>  
>> @@ -112,7 +113,9 @@ struct vgt_if {
>>  
>> u32 pv_caps;
>>  
>> -   u32  rsv7[0x200 - 25];/* pad to one page */
>> +   u64 shared_page_gpa;
>> +
>> +   u32  rsv7[0x200 - 27];/* pad to one page */
>>  } __packed;
>>  
>>  #define vgtif_offset(x) (offsetof(struct vgt_if, x))
>> diff --git a/drivers/gpu/drm/i915/i915_vgpu.c 
>> b/drivers/gpu/drm/i915/i915_vgpu.c
>> index 9b37dd1..998cf6a 100644
>> --- a/drivers/gpu/drm/i915/i915_vgpu.c
>> +++ b/drivers/gpu/drm/i915/i915_vgpu.c
>> @@ -145,6 +145,7 @@ static void vgt_deballoon_space(struct i915_ggtt *ggtt,
>>   */
>>  void intel_vgt_deballoon(struct i915_ggtt *ggtt)
> This addendum is nothing to do with the i915_ggtt cleanup.
>
>>  {
>> +   struct drm_i915_private *i915 = ggtt->vm.i915;
>> int i;
>>  
>> if (!intel_vgpu_active(ggtt->vm.i915))
>> @@ -154,6 +155,9 @@ void intel_vgt_deballoon(struct i915_ggtt *ggtt)
>>  
>> for (i = 0; i < 4; i++)
>> vgt_deballoon_space(ggtt, _info.space[i]);
>> +
>> +   if (i915->vgpu.pv)
>> +   free_page((unsigned long)i915->vgpu.pv->shared_page);
> __free_page(shared_page);
Thanks, will change this and move to another place in next revision.
-BRs, Xiaolin
>
>>  }
>>  
>>  static int vgt_balloon_space(struct i915_ggtt *ggtt,
>> @@ -309,6 +313,81 @@ int intel_vgt_balloon(struct i915_ggtt *ggtt)
>>   * i915 vgpu PV support for Linux
>>   */
>>  
>> +/*
>> + * shared_page setup for VGPU PV features
>> + */
>> +static int intel_vgpu_setup_shared_page(struct drm_i915_private *dev_priv,
>> +   void __iomem *shared_area)
>> +{
>> +   void __iomem *addr;
>> +   struct i915_virtual_gpu_pv *pv;
>> +   struct gvt_shared_page *base;
>> +   u64 gpa;
>> +   u16 ver_maj, ver_min;
>> +
>> +   /* We allocate 1 page shared between guest and GVT for data exchange.
>> +*   ___.
>> +*  |head   |   |
>> +*  |___|.. PAGE/8
>> +*  |PV ELSP|
>> +*  :___PAGE/4
>> +*  |desc (SEND)|
>> +*  |   |
>> +*  :___PAGE/2
>> +*  |cmds (SEND)|
>> +*  |   |
>> +*  |   |
>> +  

Re: [Intel-gfx] [PATCH v8 1/9] drm/i915: introduced vgpu pv capability

2019-07-23 Thread Zhang, Xiaolin
On 07/23/2019 05:30 PM, Chris Wilson wrote:
> Quoting Xiaolin Zhang (2019-07-23 12:31:56)
>> diff --git a/drivers/gpu/drm/i915/i915_vgpu.c 
>> b/drivers/gpu/drm/i915/i915_vgpu.c
>> index dbd1fa3..9b37dd1 100644
>> --- a/drivers/gpu/drm/i915/i915_vgpu.c
>> +++ b/drivers/gpu/drm/i915/i915_vgpu.c
>> @@ -95,7 +95,14 @@ void i915_detect_vgpu(struct drm_i915_private *dev_priv)
>> dev_priv->vgpu.caps = readl(shared_area + vgtif_offset(vgt_caps));
>>  
>> dev_priv->vgpu.active = true;
>> -   DRM_INFO("Virtual GPU for Intel GVT-g detected.\n");
>> +
>> +   if (!intel_vgpu_check_pv_caps(dev_priv, shared_area)) {
>> +   DRM_INFO("Virtual GPU for Intel GVT-g detected.\n");
>> +   return;
>> +   }
>> +
>> +   DRM_INFO("Virtual GPU for Intel GVT-g detected with pv_caps 0x%x.\n",
>> +   dev_priv->vgpu.pv_caps);
> This is a user-facing message, avoid using any jargon. Instead of
> pv_caps:%x, expand it to a set of strings if you think it's important
> for the user to know. It's probably not! But you probably want to
> include the caps in debugfs/i915_capabilities.
> -Chris
>
Chris, thanks your time to review and it is good point.
I will polish the message like this "xxx GVT-g detected with PV
Optimizations."
and will include the pv caps in debugfs/i915_capabilities. do you think
is it better?
-BRs, Xiaolin
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v7 5/9] drm/i915: vgpu context submission pv optimization

2019-07-22 Thread Zhang, Xiaolin
On 07/08/2019 06:41 PM, Chris Wilson wrote:
> Quoting Xiaolin Zhang (2019-07-08 02:35:18)
>> +static void pv_submit(struct intel_engine_cs *engine,
>> +  struct i915_request **out,
>> +  struct i915_request **end)
>> +{
>> +   struct intel_engine_execlists * const execlists = >execlists;
>> +   struct i915_virtual_gpu_pv *pv = engine->i915->vgpu.pv;
>> +   struct pv_submission *pv_elsp = pv->pv_elsp[engine->hw_id];
>> +   struct i915_request *rq;
>> +
>> +   u64 descs[2];
>> +   int n, err;
>> +
>> +   memset(descs, 0, sizeof(descs));
>> +   n = 0;
>> +   do {
>> +   rq = *out++;
>> +   descs[n++] = execlists_update_context(rq);
>> +   } while (out != end);
>> +
>> +   for (n = 0; n < execlists_num_ports(execlists); n++)
>> +   pv_elsp->descs[n] = descs[n];
> You can polish this a bit, minor nit.
Sure.
>
>> +   writel(PV_ACTION_ELSP_SUBMISSION, execlists->submit_reg);
>> +
>> +#define done (READ_ONCE(pv_elsp->submitted) == true)
>> +   err = wait_for_us(done, 1);
>> +   if (err)
>> +   err = wait_for(done, 1);
> Strictly, you need to use wait_for_atomic_us() [under a spinlock here],
> and there's no need for a second pass since you are not allowed to sleep
> anyway. So just set the timeout to 1000us.
Sure.
>> +#undef done
>> +
>> +   if (unlikely(err))
>> +   DRM_ERROR("PV (%s) workload submission failed\n", 
>> engine->name);
>> +
>> +   pv_elsp->submitted = false;
> However, that looks solid wrt to serialisation of this engine with its
> pv host, without cross-interference (at least in the comms channel).
>
> If you want to get fancy, you should be able to simply not dequeue until
> !pv_elsp->submitted so the wait-for-ack occurs naturally. So long as the
> pv host plays nicely, we should always see submitted acked before the
> request is signaled. Give or take problems with preemption and the pv
> host being a black box that may allow requests to complete and so our
> submission be a no-op (and so not generate an interrupt to allow further
> submission). Indeed, I would strongly recommend you use the delayed ack
> plus jiffie timer to avoid the no-op submission problem.
I will implement this suggestion. thanks your feedback, Chris.
-BRs, Xiaolin
> If you want to prove this in a bunch of mocked up selftests that provide
> the pv channel ontop of the local driver
> -Chris
> ___
> intel-gvt-dev mailing list
> intel-gvt-...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH] drm/i915/execlists: Disable preemption under GVT

2019-07-10 Thread Zhang, Xiaolin
On 07/09/2019 05:12 PM, Chris Wilson wrote:
> Preempt-to-busy uses a GPU semaphore to enforce an idle-barrier across
> preemption, but mediated gvt does not fully support semaphores.
>
> v2: Fiddle around with the flags and settle on using has-semaphores for
> the core bits so that we retain the ability to preempt our own
> semaphores.
Chris,
With this patch, vgpu guest can boot up successfully with BAT test passed.
But I want to point out there is other GPU hang issue pop up after vgpu
guest boot issue addressed. I am not pretty sure is it related or not. 
Basically it is easy to trigger with glxears with vblank_mode 0 and the
GPU hang time is random and the call trace is below: (guest kernel log
is attached in case it is useful.).
[ 1192.680497] Asynchronous wait on fence i915:compiz[1866]:b30 timed
out (hint:intel_atomic_commit_ready+0x0/0x50 [i915])
[ 1193.512989] hangcheck rcs0
[ 1193.513650] hangcheck Awake? 4
[ 1193.514299] hangcheck Hangcheck: 9986 ms ago
[ 1193.515071] hangcheck Reset count: 0 (global 0)
[ 1193.515854] hangcheck Requests:
[ 1193.516410] hangcheck RING_START: 0x
[ 1193.517138] hangcheck RING_HEAD:  0x3198
[ 1193.517876] hangcheck RING_TAIL:  0x3198
[ 1193.518611] hangcheck RING_CTL:   0x
[ 1193.519380] hangcheck RING_MODE:  0x0200 [idle]
[ 1193.520149] hangcheck RING_IMR: fefe
[ 1193.520799] hangcheck ACTHD:  0x_000a6650
[ 1193.521545] hangcheck BBADDR: 0x_
[ 1193.522321] hangcheck DMA_FADDR: 0x_
[ 1193.523392] hangcheck IPEIR: 0x
[ 1193.524171] hangcheck IPEHR: 0x
[ 1193.525050] hangcheck Execlist status: 0x00040012 0003, entries 6
[ 1193.526049] hangcheck Execlist CSB read 5, write 5, tasklet
queued? no (enabled)
[ 1193.527154] hangcheck Active[0: ring:{start:dff03000,
hwsp:dff661c0, seqno:00012175}, rq:  1b:12178-  prio=4097 @ 11649ms:
glxgears[2160]
[ 1193.528852] hangcheck Pending[0] ring:{start:dff03000,
hwsp:dff661c0, seqno:00012175}, rq:  1b:12178-  prio=4097 @ 11649ms:
glxgears[2160]
[ 1193.532515] hangcheck Pending[1] ring:{start:dff39000,
hwsp:dff66140, seqno:004f7b5e}, rq:  14:4f7b60  prio=4097 @ 11655ms:
Xorg[865]
[ 1193.536009] hangcheck E  1b:12178-  prio=4097 @ 11658ms:
glxgears[2160]
[ 1193.537187] hangcheck E  14:4f7b60  prio=4097 @ 11658ms:
Xorg[865]
[ 1193.538192] hangcheck Queue priority hint: 4097
[ 1193.538894] hangcheck Q  1a:b30-  prio=4097 @ 11650ms:
compiz[1866]
[ 1193.539810] hangcheck Q  1b:1217a  prio=2 @ 11660ms:
glxgears[2160]
[ 1193.542485] hangcheck HWSP:
[ 1193.543703] hangcheck []    
   
[ 1193.546729] hangcheck *
[ 1193.547230] hangcheck [0040] 0014 0003 8002 0001
0014 0001 0018 0003
[ 1193.550607] hangcheck [0060] 0001  0014 0001
   0005
[ 1193.552274] hangcheck [0080]    
   
[ 1193.553937] hangcheck *
[ 1193.554381] hangcheck Idle? no
[ 1193.554902] hangcheck Signals:
[ 1193.555419] hangcheck [1b:12178] @ 11678ms
[ 1193.864797] i915 :00:04.0: GPU HANG: ecode 9:0:0x, hang
on rcs0
[ 1193.869234] [drm] GPU hangs can indicate a bug anywhere in the entire
gfx stack, including userspace.
[ 1193.871096] [drm] Please file a _new_ bug report on
bugs.freedesktop.org against DRI -> DRM/Intel
[ 1193.872483] [drm] drm/i915 developers can then reassign to the right
component if it's not a kernel issue.
[ 1193.873927] [drm] The gpu crash dump is required to analyze gpu
hangs, so please always attach it.
[ 1193.875395] [drm] GPU crash dump saved to /sys/class/drm/card0/error

BRs, Xiaolin
>
> Signed-off-by: Chris Wilson 
> Cc: Zhenyu Wang 
> Cc: Xiaolin Zhang 
> Cc: Tvrtko Ursulin 
> Cc: Mika Kuoppala 
> ---
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c |  4 ++--
>  drivers/gpu/drm/i915/gt/intel_lrc.c   | 24 +--
>  drivers/gpu/drm/i915/gt/selftest_lrc.c|  6 ++
>  3 files changed, 26 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 56310812da21..614ed8c488ef 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -825,6 +825,8 @@ int intel_engine_init_common(struct intel_engine_cs 
> *engine)
>   struct drm_i915_private *i915 = engine->i915;
>   int ret;
>  
> + engine->set_default_submission(engine);
> +
>   /* We may need to do things with the shrinker which
>* require us to immediately switch back to the default
>* context. This can cause a problem as pinning the
> @@ -852,8 +854,6 @@ int intel_engine_init_common(struct intel_engine_cs 
> *engine)
>  
>   engine->emit_fini_breadcrumb_dw = ret;
>  

Re: [Intel-gfx] [PATCH v5] drm/i915/execlists: Disable preemption under GVT

2019-06-24 Thread Zhang, Xiaolin
On 06/24/2019 05:48 PM, Chris Wilson wrote:
> Preempt-to-busy uses a GPU semaphore to enforce an idle-barrier across
> preemption, but mediated gvt does not fully support semaphores.
>
> Signed-off-by: Chris Wilson 
> ---
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c |  4 ++--
>  drivers/gpu/drm/i915/gt/intel_lrc.c   | 21 -
>  drivers/gpu/drm/i915/gt/selftest_lrc.c|  3 +++
>  3 files changed, 21 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 4961f74fd902..d6fbc47727c1 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -830,6 +830,8 @@ int intel_engine_init_common(struct intel_engine_cs 
> *engine)
>   struct drm_i915_private *i915 = engine->i915;
>   int ret;
>  
> + engine->set_default_submission(engine);
> +
>   /* We may need to do things with the shrinker which
>* require us to immediately switch back to the default
>* context. This can cause a problem as pinning the
> @@ -857,8 +859,6 @@ int intel_engine_init_common(struct intel_engine_cs 
> *engine)
>  
>   engine->emit_fini_breadcrumb_dw = ret;
>  
> - engine->set_default_submission(engine);
> -
>   return 0;
>  
>  err_unpin:
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index c8a0c9b32764..8017efb36f7b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -294,6 +294,9 @@ static inline bool need_preempt(const struct 
> intel_engine_cs *engine,
>  {
>   int last_prio;
>  
> + if (!intel_engine_has_preemption(engine))
> + return false;
> +
>   /*
>* Check if the current priority hint merits a preemption attempt.
>*
> @@ -890,6 +893,9 @@ need_timeslice(struct intel_engine_cs *engine, const 
> struct i915_request *rq)
>  {
>   int hint;
>  
> + if (!intel_engine_has_preemption(engine))
> + return false;
> +
>   if (list_is_last(>sched.link, >active.requests))
>   return false;
>  
> @@ -2600,7 +2606,8 @@ static u32 *gen8_emit_fini_breadcrumb(struct 
> i915_request *request, u32 *cs)
>   *cs++ = MI_USER_INTERRUPT;
>  
>   *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> - cs = emit_preempt_busywait(request, cs);
> + if (intel_engine_has_preemption(request->engine))
> + cs = emit_preempt_busywait(request, cs);
>  
>   request->tail = intel_ring_offset(request, cs);
>   assert_ring_tail_valid(request->ring, request->tail);
> @@ -2624,7 +2631,8 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct 
> i915_request *request, u32 *cs)
>   *cs++ = MI_USER_INTERRUPT;
>  
>   *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> - cs = emit_preempt_busywait(request, cs);
> + if (intel_engine_has_preemption(request->engine))
> + cs = emit_preempt_busywait(request, cs);
>  
>   request->tail = intel_ring_offset(request, cs);
>   assert_ring_tail_valid(request->ring, request->tail);
> @@ -2672,10 +2680,11 @@ void intel_execlists_set_default_submission(struct 
> intel_engine_cs *engine)
>   engine->unpark = NULL;
>  
>   engine->flags |= I915_ENGINE_SUPPORTS_STATS;
> - if (!intel_vgpu_active(engine->i915))
> + if (!intel_vgpu_active(engine->i915)) {
>   engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
> - if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
> - engine->flags |= I915_ENGINE_HAS_PREEMPTION;
> + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
> + engine->flags |= I915_ENGINE_HAS_PREEMPTION;
> + }
>  }
>  
>  static void execlists_destroy(struct intel_engine_cs *engine)
> @@ -3463,6 +3472,8 @@ intel_execlists_create_virtual(struct i915_gem_context 
> *ctx,
>   ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
>   ve->base.emit_fini_breadcrumb_dw =
>   sibling->emit_fini_breadcrumb_dw;
> +
> + ve->base.flags = sibling->flags;
>   }
>  
>   return >context;
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c 
> b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 0c97f953e908..b99b589a6c81 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -270,6 +270,9 @@ static int live_timeslice_preempt(void *arg)
>   enum intel_engine_id id;
>  
>   for_each_engine(engine, i915, id) {
> + if (!intel_engine_has_preemption(engine))
> + continue;
> +
>   memset(vaddr, 0, PAGE_SIZE);
>  
>   err = slice_semaphore_queue(engine, vma, count);

Tested-by:  Xiaolin Zhang 

with this patch, linux guest can boot up successfully with sanity test passed. 
But it is easy to get the GPU hang when running "vblank_mode=0 glxgears" in 

Re: [Intel-gfx] [PATCH v6 3/8] drm/i915: vgpu ppgtt update pv optimization

2019-06-10 Thread Zhang, Xiaolin
On 06/10/2019 03:44 PM, Chris Wilson wrote:
> Quoting Zhang, Xiaolin (2019-06-10 02:32:18)
>> On 06/04/2019 05:01 PM, Chris Wilson wrote:
>>> Quoting Xiaolin Zhang (2019-06-03 07:02:44)
>>>> +static void gen8_ppgtt_clear_4lvl_pv(struct i915_address_space *vm,
>>>> + u64 start, u64 length)
>>>> +{
>>>> +   struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>>>> +   struct i915_pml4 *pml4 = >pml4;
>>>> +   struct drm_i915_private *dev_priv = vm->i915;
>>>> +   struct pv_ppgtt_update *pv_ppgtt =
>>>> +   _priv->vgpu.shared_page->buf.pv_ppgtt;
>>>> +   u64 orig_start = start;
>>>> +   u64 orig_length = length;
>>>> +
>>>> +   gen8_ppgtt_clear_4lvl(vm, start, length);
>>>> +
>>>> +   pv_ppgtt->pdp = px_dma(pml4);
>>>> +   pv_ppgtt->start = orig_start;
>>>> +   pv_ppgtt->length = orig_length;
>>>> +   I915_WRITE(vgtif_reg(g2v_notify), VGT_G2V_PPGTT_L4_CLEAR);
>>>> +}
>>>> +
>>>> +static void gen8_ppgtt_insert_4lvl_pv(struct i915_address_space *vm,
>>>> +  struct i915_vma *vma,
>>>> +  enum i915_cache_level cache_level,
>>>> +  u32 flags)
>>>> +{
>>>> +   struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>>>> +   struct drm_i915_private *dev_priv = vm->i915;
>>>> +   struct pv_ppgtt_update *pv_ppgtt =
>>>> +   _priv->vgpu.shared_page->buf.pv_ppgtt;
>>>> +
>>>> +   gen8_ppgtt_insert_4lvl(vm, vma, cache_level, flags);
>>>> +
>>>> +   pv_ppgtt->pdp = px_dma(>pml4);
>>>> +   pv_ppgtt->start = vma->node.start;
>>>> +   pv_ppgtt->length = vma->node.size;
>>>> +   pv_ppgtt->cache_level = cache_level;
>>>> +   I915_WRITE(vgtif_reg(g2v_notify), VGT_G2V_PPGTT_L4_INSERT);
>>>> +}
>>> For this to work, a vgpu mmio write must be trapped and suspend the
>>> client while the host processes the trap. Otherwise, we would be
>>> overwriting the side-channel before the host processes the command.
>>>
>>> That sounds horrible.
>>> -Chris
>> Chris, thanks your comment. do you think is the spin_lock to protect
>> this VGPU MMIO write enough to eliminate the side-channel effect?
> I would suggest you consider using a pair of command/response rings.
> -Chris
>
Chris, Thanks your suggestion and prompt response. I will rework them
for that direction.
BRs, Xiaolin


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v6 3/8] drm/i915: vgpu ppgtt update pv optimization

2019-06-09 Thread Zhang, Xiaolin
On 06/04/2019 05:01 PM, Chris Wilson wrote:
> Quoting Xiaolin Zhang (2019-06-03 07:02:44)
>> +static void gen8_ppgtt_clear_4lvl_pv(struct i915_address_space *vm,
>> + u64 start, u64 length)
>> +{
>> +   struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>> +   struct i915_pml4 *pml4 = >pml4;
>> +   struct drm_i915_private *dev_priv = vm->i915;
>> +   struct pv_ppgtt_update *pv_ppgtt =
>> +   _priv->vgpu.shared_page->buf.pv_ppgtt;
>> +   u64 orig_start = start;
>> +   u64 orig_length = length;
>> +
>> +   gen8_ppgtt_clear_4lvl(vm, start, length);
>> +
>> +   pv_ppgtt->pdp = px_dma(pml4);
>> +   pv_ppgtt->start = orig_start;
>> +   pv_ppgtt->length = orig_length;
>> +   I915_WRITE(vgtif_reg(g2v_notify), VGT_G2V_PPGTT_L4_CLEAR);
>> +}
>> +
>> +static void gen8_ppgtt_insert_4lvl_pv(struct i915_address_space *vm,
>> +  struct i915_vma *vma,
>> +  enum i915_cache_level cache_level,
>> +  u32 flags)
>> +{
>> +   struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>> +   struct drm_i915_private *dev_priv = vm->i915;
>> +   struct pv_ppgtt_update *pv_ppgtt =
>> +   _priv->vgpu.shared_page->buf.pv_ppgtt;
>> +
>> +   gen8_ppgtt_insert_4lvl(vm, vma, cache_level, flags);
>> +
>> +   pv_ppgtt->pdp = px_dma(>pml4);
>> +   pv_ppgtt->start = vma->node.start;
>> +   pv_ppgtt->length = vma->node.size;
>> +   pv_ppgtt->cache_level = cache_level;
>> +   I915_WRITE(vgtif_reg(g2v_notify), VGT_G2V_PPGTT_L4_INSERT);
>> +}
> For this to work, a vgpu mmio write must be trapped and suspend the
> client while the host processes the trap. Otherwise, we would be
> overwriting the side-channel before the host processes the command.
>
> That sounds horrible.
> -Chris
Chris, thanks your comment. do you think is the spin_lock to protect
this VGPU MMIO write enough to eliminate the side-channel effect?
Xiaolin


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v5 4/8] drm/i915: vgpu context submission pv optimization

2019-05-21 Thread Zhang, Xiaolin
On 04/29/2019 06:03 PM, Chris Wilson wrote:
> Quoting Xiaolin Zhang (2019-04-29 04:10:54)
>> +static void pv_submit(struct intel_engine_cs *engine)
>> +{
>> +   struct intel_engine_execlists * const execlists = >execlists;
>> +   struct execlist_port *port = execlists->port;
>> +   unsigned int n;
>> +   struct gvt_shared_page *shared_page = engine->i915->vgpu.shared_page;
>> +   u64 descs[2];
>> +
>> +   for (n = 0; n < execlists_num_ports(execlists); n++) {
>> +   struct i915_request *rq;
>> +   unsigned int count = 0;
>> +
>> +   descs[n] = 0;
>> +   rq = port_unpack([n], );
>> +   if (rq && count == 0) {
>> +   port_set([n], port_pack(rq, ++count));
>> +   descs[n] = execlists_update_context(rq);
>> +   }
>> +   }
>> +
>> +   spin_lock(>i915->vgpu.shared_page_lock[engine->id]);
> Only one engine at a time now accesses that portion of pv_elsp, so the
> spin lock is not required per-se, aiui.
>
> The problem is that there is no coordination between pv_submit and the
> other side of the pipe, as far as I can see. So it seems very possible
> for us to overwrite entries before they have been read. I expect to see
> some ack mechanism for VGT_G2V_PV_SUBMISSION.
>
>> +   for (n = 0; n < execlists_num_ports(execlists); n++)
>> +   shared_page->pv_elsp[engine->id].descs[n] = descs[n];
> I'd recommend not using engine->id, that's just our internal id and may
> vary between host/guest. Use engine->hw_id? Or negotiate using pv setup
> and store pv_id?
>
> But starting to look more solid over all. We just need to finish
> splitting out the various similar-but-quite-different-really submission
> backends. :)
> -Chris
>
Chris, Thanks your great comments and will be addressed next version
(will remove spin lock, use hw_id,  add ack mechanism and refine patches). 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v4 0/8] i915 vgpu PV to improve vgpu performance

2019-04-10 Thread Zhang, Xiaolin
On 03/30/2019 12:05 AM, Chris Wilson wrote:
> Quoting Xiaolin Zhang (2019-03-29 13:32:36)
>> To improve vgpu performance, it could implement some PV optimization
>> such as to reduce the mmio access trap numbers or eliminate certain piece
>> of HW emulation within guest driver to reduce vm exit/vm enter cost.
> Where's the CI for this patchset? The lack of interrupts to drive
> submission should have shown up, and if not, we need some testcases to
> make sure it doesn't happen again. Everytime I see a gvt patch, I ask if
> we can get some coverage in intel-gfx-ci :)
> -Chris
>
The CI for this patchset was not generated due to build failure. will
try next version to avoid build issue by rebasing to tip of branch.

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v4 4/8] drm/i915: vgpu context submission pv optimization

2019-04-10 Thread Zhang, Xiaolin
On 03/30/2019 03:14 AM, Chris Wilson wrote:
> Quoting Xiaolin Zhang (2019-03-29 13:32:40)
>> +   spin_lock(>i915->vgpu.shared_page_lock);
>> +   shared_page->ring_id = engine->id;
>> +   for (n = 0; n < execlists_num_ports(execlists); n++)
>> +   shared_page->descs[n] = descs[n];
>> +
>> +   __raw_i915_write32(uncore, vgtif_reg(g2v_notify),
>> +   VGT_G2V_PV_SUBMISSION);
> There's no serialisation with the consumer here. The
> shared_page->descs[] will be clobbered by a second engine before they
> are read by pv. There should be a wait-for-ack here, or separate
> channels for each engine. In execlists, we avoid this by not dequeuing
> until after we get a response/ack from HW.
>
> Preemption? I guess preemption was turned off, or else you have to worry
> about the same engine overwriting the desc[] (if no ack).
> -Chris
>
Chris, thanks you to point it out my flaw here. will use the per-engine
channel for notification.  preemption is turned off and no support in
current version.

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v4 4/8] drm/i915: vgpu context submission pv optimization

2019-04-10 Thread Zhang, Xiaolin
On 03/29/2019 11:40 PM, Chris Wilson wrote:
> Quoting Xiaolin Zhang (2019-03-29 13:32:40)
>> diff --git a/drivers/gpu/drm/i915/i915_irq.c 
>> b/drivers/gpu/drm/i915/i915_irq.c
>> index 2f78829..28e8ee0 100644
>> --- a/drivers/gpu/drm/i915/i915_irq.c
>> +++ b/drivers/gpu/drm/i915/i915_irq.c
>> @@ -37,6 +37,7 @@
>>  #include "i915_drv.h"
>>  #include "i915_trace.h"
>>  #include "intel_drv.h"
>> +#include "i915_vgpu.h"
>>  
>>  /**
>>   * DOC: interrupt handling
>> @@ -1470,6 +1471,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, 
>> u32 iir)
>> if (iir & GT_RENDER_USER_INTERRUPT) {
>> intel_engine_breadcrumbs_irq(engine);
>> tasklet |= USES_GUC_SUBMISSION(engine->i915);
>> +   tasklet |= USES_PV_SUBMISSION(engine->i915);
> We should move this to an engine->flag.
sure, I will remove this next version.
>
>> }
>>  
>> if (tasklet)
>> +static void vgpu_pv_set_default_submission(struct intel_engine_cs *engine)
>> +{
>> +   /*
>> +* We inherit a bunch of functions from execlists that we'd like
>> +* to keep using:
>> +*
>> +*engine->submit_request = execlists_submit_request;
>> +*engine->cancel_requests = execlists_cancel_requests;
>> +*engine->schedule = execlists_schedule;
>> +*
>> +* But we need to override the actual submission backend in order
>> +* to talk to the GVT with PV notification message.
>> +*/
>> +   intel_execlists_set_default_submission(engine);
>> +
>> +   engine->execlists.tasklet.func = vgpu_pv_submission_tasklet;
> You need to pin the breadcrumbs irq, or it will not fire on every
> request.
indeed, I should do. thanks your great point.
> I'd push for this to live in intel_pv_submission.c or
> intel_vgpu_submission.c
this will create new file and make a change  in Makefile. if this kind
of change is OK,
I will create one to use intel_pv_submission.c name.
>> @@ -401,6 +551,12 @@ void intel_vgpu_config_pv_caps(struct drm_i915_private 
>> *dev_priv,
>> ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl_pv;
>> ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl_pv;
>> }
>> +
>> +   if (cap == PV_SUBMISSION) {
>> +   engine = (struct intel_engine_cs *)data;
>> +   engine->set_default_submission = 
>> vgpu_pv_set_default_submission;
>> +   engine->set_default_submission(engine);
>> +   }
>>  }
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index c1b9780..0a66714 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -2352,6 +2352,9 @@ logical_ring_default_vfuncs(struct intel_engine_cs 
>> *engine)
>>  */
>> }
>> engine->emit_bb_start = gen8_emit_bb_start;
>> +
>> +   if (intel_vgpu_active(engine->i915))
>> +   intel_vgpu_config_pv_caps(engine->i915, PV_SUBMISSION, 
>> engine);
> That pair is ugly. Should clean up the engine initialisation so that it
> doesn't involve placing a chunk of code in a foreign class.
> -Chris
>
Yep, will move it to other location.


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v2 5/5] drm/i915: ppgtt update pvmmio optimization

2018-10-31 Thread Zhang, Xiaolin
Ping review, thanks very much.

BRs, Xiaolin

-Original Message-
From: Zhang, Xiaolin
Sent: Friday, October 19, 2018 3:27 PM
To: intel-gfx@lists.freedesktop.org
Cc: intel-gvt-...@lists.freedesktop.org; Zhang, Xiaolin 
; Zhenyu Wang ; Wang, Zhi A 
; Chris Wilson ; Joonas 
Lahtinen ; He; He, Min ; 
Jiang; Jiang, Fei ; Gong; Gong, Zhipeng 
; Yuan; Yuan, Hang ; Lv, Zhiyuan 

Subject: [PATCH v2 5/5] drm/i915: ppgtt update pvmmio optimization

This patch extends g2v notification to notify host GVT-g of ppgtt update from 
guest, including alloc_4lvl, clear_4lv4 and insert_4lvl. It uses shared page to 
pass the additional params.
This patch also add one new pvmmio level to control ppgtt update.

Use PVMMIO_PPGTT_UPDATE to control this level of pvmmio optimization.

v0: RFC
v1: rebase
v2: added pv callbacks for vm.{allocate_va_range, insert_entries, clear_range} 
within ppgtt.

Cc: Zhenyu Wang 
Cc: Zhi Wang 
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: He, Min 
Cc: Jiang, Fei 
Cc: Gong, Zhipeng 
Cc: Yuan, Hang 
Cc: Zhiyuan Lv 
Signed-off-by: Xiaolin Zhang 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 67 +
 drivers/gpu/drm/i915/i915_pvinfo.h  |  3 ++
 drivers/gpu/drm/i915/i915_vgpu.c|  3 +-
 3 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 98d9a1e..b529f53 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -956,6 +956,25 @@ static void gen8_ppgtt_clear_4lvl(struct 
i915_address_space *vm,
}
 }

+static void gen8_ppgtt_clear_4lvl_pv(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+   struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+   struct i915_pml4 *pml4 = >pml4;
+   struct drm_i915_private *dev_priv = vm->i915;
+   struct pv_ppgtt_update *pv_ppgtt =
+   _priv->vgpu.shared_page->pv_ppgtt;
+   u64 orig_start = start;
+   u64 orig_length = length;
+
+   gen8_ppgtt_clear_4lvl(vm, start, length);
+
+   pv_ppgtt->pdp = px_dma(pml4);
+   pv_ppgtt->start = orig_start;
+   pv_ppgtt->length = orig_length;
+   I915_WRITE(vgtif_reg(g2v_notify), VGT_G2V_PPGTT_L4_CLEAR); }
+
 static inline struct sgt_dma {
struct scatterlist *sg;
dma_addr_t dma, max;
@@ -1197,6 +1216,25 @@ static void gen8_ppgtt_insert_4lvl(struct 
i915_address_space *vm,
}
 }

+static void gen8_ppgtt_insert_4lvl_pv(struct i915_address_space *vm,
+  struct i915_vma *vma,
+  enum i915_cache_level cache_level,
+  u32 flags)
+{
+   struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+   struct drm_i915_private *dev_priv = vm->i915;
+   struct pv_ppgtt_update *pv_ppgtt =
+   _priv->vgpu.shared_page->pv_ppgtt;
+
+   gen8_ppgtt_insert_4lvl(vm, vma, cache_level, flags);
+
+   pv_ppgtt->pdp = px_dma(>pml4);
+   pv_ppgtt->start = vma->node.start;
+   pv_ppgtt->length = vma->node.size;
+   pv_ppgtt->cache_level = cache_level;
+   I915_WRITE(vgtif_reg(g2v_notify), VGT_G2V_PPGTT_L4_INSERT); }
+
 static void gen8_free_page_tables(struct i915_address_space *vm,
  struct i915_page_directory *pd)
 {
@@ -1466,6 +1504,30 @@ static int gen8_ppgtt_alloc_4lvl(struct 
i915_address_space *vm,
return -ENOMEM;
 }

+static int gen8_ppgtt_alloc_4lvl_pv(struct i915_address_space *vm,
+u64 start, u64 length)
+{
+   struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+   struct i915_pml4 *pml4 = >pml4;
+   struct drm_i915_private *dev_priv = vm->i915;
+   struct pv_ppgtt_update *pv_ppgtt =
+   _priv->vgpu.shared_page->pv_ppgtt;
+   int ret;
+   u64 orig_start = start;
+   u64 orig_length = length;
+
+   ret = gen8_ppgtt_alloc_4lvl(vm, start, length);
+   if (ret)
+   return ret;
+
+   pv_ppgtt->pdp = px_dma(pml4);
+   pv_ppgtt->start = orig_start;
+   pv_ppgtt->length = orig_length;
+   I915_WRITE(vgtif_reg(g2v_notify), VGT_G2V_PPGTT_L4_ALLOC);
+
+   return 0;
+}
+
 static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
  struct i915_page_directory_pointer *pdp,
  u64 start, u64 length,
@@ -1631,6 +1693,11 @@ static struct i915_hw_ppgtt *gen8_ppgtt_create(struct 
drm_i915_private *i915)
ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl;
ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl;
ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl;
+   if (PVMMIO_LEVEL_ENABLE(i915, PVMMIO_PPGTT_UPDATE)) {
+   ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4

Re: [Intel-gfx] [PATCH v2 2/5] drm/i915: get ready of memory for pvmmio

2018-10-31 Thread Zhang, Xiaolin
Ping review. Thanks very much. 

BRs, Xiaolin 

-Original Message-
From: Zhang, Xiaolin 
Sent: Friday, October 19, 2018 3:27 PM
To: intel-gfx@lists.freedesktop.org
Cc: intel-gvt-...@lists.freedesktop.org; Zhang, Xiaolin 
; Zhenyu Wang ; Wang, Zhi A 
; Chris Wilson ; Joonas 
Lahtinen ; He; He, Min ; 
Jiang; Jiang, Fei ; Gong; Gong, Zhipeng 
; Yuan; Yuan, Hang ; Lv, Zhiyuan 

Subject: [PATCH v2 2/5] drm/i915: get ready of memory for pvmmio

To enable pvmmio feature, we need to prepare one 4K shared page which will be 
accessed by both guest and backend i915 driver.

guest i915 allocate one page memory and then the guest physical address is 
passed to backend i915 driver through PVINFO register so that backend i915 
driver can access this shared page without hypeviser trap cost for shared data 
exchagne via hyperviser read_gpa functionality.

v0: RFC
v1: addressed RFC comment to move both shared_page_lock and shared_page to 
i915_virtual_gpu structure
v2: packed i915_virtual_gpu structure

Cc: Zhenyu Wang 
Cc: Zhi Wang 
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: He, Min 
Cc: Jiang, Fei 
Cc: Gong, Zhipeng 
Cc: Yuan, Hang 
Cc: Zhiyuan Lv 
Signed-off-by: Xiaolin Zhang 
---
 drivers/gpu/drm/i915/i915_drv.c|  2 ++
 drivers/gpu/drm/i915/i915_drv.h|  4 +++-
 drivers/gpu/drm/i915/i915_pvinfo.h | 24 +++-
 drivers/gpu/drm/i915/i915_vgpu.c   | 24 +++-
 4 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c 
index baac35f..557ab67 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -987,6 +987,8 @@ static void i915_mmio_cleanup(struct drm_i915_private 
*dev_priv)
 
intel_teardown_mchbar(dev_priv);
pci_iounmap(pdev, dev_priv->regs);
+   if (intel_vgpu_active(dev_priv) && dev_priv->vgpu.shared_page)
+   free_page((unsigned long)dev_priv->vgpu.shared_page);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h 
index 7b2d7cb..d7a972f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1345,7 +1345,9 @@ struct i915_virtual_gpu {
bool active;
u32 caps;
u32 pv_caps;
-};
+   spinlock_t shared_page_lock;
+   struct gvt_shared_page *shared_page;
+} __packed;
 
 /* used in computing the new watermarks state */  struct intel_wm_config { 
diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h 
b/drivers/gpu/drm/i915/i915_pvinfo.h
index 26709e8..179d558 100644
--- a/drivers/gpu/drm/i915/i915_pvinfo.h
+++ b/drivers/gpu/drm/i915/i915_pvinfo.h
@@ -49,6 +49,24 @@ enum vgt_g2v_type {
VGT_G2V_MAX,
 };
 
+struct pv_ppgtt_update {
+   u64 pdp;
+   u64 start;
+   u64 length;
+   u32 cache_level;
+};
+
+/*
+ * shared page(4KB) between gvt and VM, could be allocated by guest 
+driver
+ * or a fixed location in PCI bar 0 region  */ struct gvt_shared_page {
+   u32 elsp_data[4];
+   u32 reg_addr;
+   u32 disable_irq;
+   struct pv_ppgtt_update pv_ppgtt;
+};
+
 #define VGPU_PVMMIO(vgpu) vgpu_vreg_t(vgpu, vgtif_reg(enable_pvmmio))
 
 /*
@@ -121,8 +139,12 @@ struct vgt_if {
u32 execlist_context_descriptor_lo;
u32 execlist_context_descriptor_hi;
u32 enable_pvmmio;
+   struct {
+   u32 lo;
+   u32 hi;
+   } shared_page_gpa;
 
-   u32  rsv7[0x200 - 25];/* pad to one page */
+   u32  rsv7[0x200 - 27];/* pad to one page */
 } __packed;
 
 #define vgtif_reg(x) \
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index 907bbd2..cb409d5 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -62,6 +62,7 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv)  {
u64 magic;
u16 version_major;
+   u64 gpa;
 
BUILD_BUG_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE);
 
@@ -91,7 +92,28 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv)
dev_priv->vgpu.pv_caps);
dev_priv->vgpu.pv_caps = __raw_i915_read32(dev_priv,
vgtif_reg(enable_pvmmio));
-
+   if (intel_vgpu_active(dev_priv) && dev_priv->vgpu.pv_caps) {
+   dev_priv->vgpu.shared_page =  (struct gvt_shared_page *)
+   get_zeroed_page(GFP_KERNEL);
+   if (!dev_priv->vgpu.shared_page) {
+   DRM_ERROR("out of memory for shared page memory\n");
+   return;
+   }
+   gpa = __pa(dev_priv->vgpu.shared_page);
+   __raw_i915_write32(dev_priv, vgtif_reg(shared_page_gpa.lo),
+   lower_32_bits(gpa));
+   __raw_i915_write32(dev_priv, vgtif_reg(shared_page_gpa.hi),
+   upper_32_bits(gpa));

Re: [Intel-gfx] [PATCH v2 4/5] drm/i915: master irq pvmmio optimization

2018-10-31 Thread Zhang, Xiaolin
Ping review. Thanks very much. 

BRs, Xiaolin

-Original Message-
From: Zhang, Xiaolin 
Sent: Friday, October 19, 2018 3:27 PM
To: intel-gfx@lists.freedesktop.org
Cc: intel-gvt-...@lists.freedesktop.org; Zhang, Xiaolin 
; Zhenyu Wang ; Wang, Zhi A 
; Chris Wilson ; Joonas 
Lahtinen ; He; He, Min ; 
Jiang; Jiang, Fei ; Gong; Gong, Zhipeng 
; Yuan; Yuan, Hang ; Lv, Zhiyuan 

Subject: [PATCH v2 4/5] drm/i915: master irq pvmmio optimization

Master irq register is accessed twice every irq handling, then trapped to SOS 
very frequently. Optimize it by moving master irq register to share page, 
writing don't need be trapped.

When need enable irq to let SOS inject irq timely, use another pvmmio register 
to achieve this purpose. So avoid one trap when we disable master irq.

Use PVMMIO_MASTER_IRQ to control this level of pvmmio optimization.

v0: RFC
v1: rebase
v2: added pv version callbacks for irq_{irq_handler, irq_preinstall, 
irq_postinstall}

Cc: Zhenyu Wang 
Cc: Zhi Wang 
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: He, Min 
Cc: Jiang, Fei 
Cc: Gong, Zhipeng 
Cc: Yuan, Hang 
Cc: Zhiyuan Lv 
Signed-off-by: Xiaolin Zhang 
---
 drivers/gpu/drm/i915/i915_irq.c| 82 --
 drivers/gpu/drm/i915/i915_pvinfo.h |  3 +-
 drivers/gpu/drm/i915/i915_vgpu.c   |  2 +-
 3 files changed, 81 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c 
index 5d1f537..95ed2e7 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2938,6 +2938,40 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
return IRQ_HANDLED;
 }
 
+static irqreturn_t gen8_irq_handler_pv(int irq, void *arg) {
+   struct drm_i915_private *dev_priv = to_i915(arg);
+   u32 master_ctl;
+   u32 gt_iir[4];
+
+   if (!intel_irqs_enabled(dev_priv))
+   return IRQ_NONE;
+
+   master_ctl = I915_READ_FW(GEN8_MASTER_IRQ);
+   master_ctl &= ~GEN8_MASTER_IRQ_CONTROL;
+   if (!master_ctl)
+   return IRQ_NONE;
+
+   dev_priv->vgpu.shared_page->disable_irq = 1;
+
+   /* Find, clear, then process each source of interrupt */
+   gen8_gt_irq_ack(dev_priv, master_ctl, gt_iir);
+
+   /* IRQs are synced during runtime_suspend, we don't require a wakeref */
+   if (master_ctl & ~GEN8_GT_IRQS) {
+   disable_rpm_wakeref_asserts(dev_priv);
+   gen8_de_irq_handler(dev_priv, master_ctl);
+   enable_rpm_wakeref_asserts(dev_priv);
+   }
+
+   dev_priv->vgpu.shared_page->disable_irq = 0;
+   __raw_i915_write32(dev_priv, vgtif_reg(check_pending_irq), 1);
+
+   gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
+
+   return IRQ_HANDLED;
+}
+
 struct wedge_me {
struct delayed_work work;
struct drm_i915_private *i915;
@@ -3626,13 +3660,11 @@ static void gen8_gt_irq_reset(struct drm_i915_private 
*dev_priv)
GEN8_IRQ_RESET_NDX(GT, 3);
 }
 
-static void gen8_irq_reset(struct drm_device *dev)
+static void gen8_irq_reset_common(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = to_i915(dev);
int pipe;
 
-   gen8_master_intr_disable(dev_priv->regs);
-
gen8_gt_irq_reset(dev_priv);
 
I915_WRITE(EDP_PSR_IMR, 0x);
@@ -3651,6 +3683,22 @@ static void gen8_irq_reset(struct drm_device *dev)
ibx_irq_reset(dev_priv);
 }
 
+static void gen8_irq_reset(struct drm_device *dev) {
+   struct drm_i915_private *dev_priv = to_i915(dev);
+
+   gen8_master_intr_disable(dev_priv->regs);
+   gen8_irq_reset_common(dev);
+}
+
+static void gen8_irq_reset_pv(struct drm_device *dev) {
+   struct drm_i915_private *dev_priv = to_i915(dev);
+
+   dev_priv->vgpu.shared_page->disable_irq = 1;
+   gen8_irq_reset_common(dev);
+}
+
 static void gen11_gt_irq_reset(struct drm_i915_private *dev_priv)  {
/* Disable RCS, BCS, VCS and VECS class engines. */ @@ -4262,7 +4310,7 
@@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv)
}
 }
 
-static int gen8_irq_postinstall(struct drm_device *dev)
+static int gen8_irq_postinstall_common(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = to_i915(dev);
 
@@ -4275,11 +4323,32 @@ static int gen8_irq_postinstall(struct drm_device *dev)
if (HAS_PCH_SPLIT(dev_priv))
ibx_irq_postinstall(dev);
 
+   return 0;
+}
+
+static int gen8_irq_postinstall(struct drm_device *dev) {
+   struct drm_i915_private *dev_priv = to_i915(dev);
+
+   gen8_irq_postinstall_common(dev);
+
gen8_master_intr_enable(dev_priv->regs);
 
return 0;
 }
 
+static int gen8_irq_postinstall_pv(struct drm_device *dev) {
+   struct drm_i915_private *dev_priv = to_i915(dev);
+
+   gen8_irq_postinstall_common(dev);
+
+   dev_priv->vgpu.shared_page->disable_irq = 0;
+   __raw_i915_write32(

Re: [Intel-gfx] [PATCH v2 3/5] drm/i915: context submission pvmmio optimization

2018-10-31 Thread Zhang, Xiaolin
Ping review, thanks very much. 

BRs, Xiaolin 

-Original Message-
From: Zhang, Xiaolin 
Sent: Friday, October 19, 2018 3:27 PM
To: intel-gfx@lists.freedesktop.org
Cc: intel-gvt-...@lists.freedesktop.org; Zhang, Xiaolin 
; Zhenyu Wang ; Wang, Zhi A 
; Chris Wilson ; Joonas 
Lahtinen ; He; He, Min ; 
Jiang; Jiang, Fei ; Gong; Gong, Zhipeng 
; Yuan; Yuan, Hang ; Lv, Zhiyuan 

Subject: [PATCH v2 3/5] drm/i915: context submission pvmmio optimization

It is performance optimization to reduce mmio trap numbers from 4 to
1 durning ELSP porting writing (context submission).

When context subission, to cache elsp_data[4] values in the shared page, the 
last elsp_data[0] port writing will be trapped to gvt for real context 
submission.

Use PVMMIO_ELSP_SUBMIT to control this level of pvmmio optimization.

v0: RFC
v1: rebase
v2: added pv ops for pv context submission. to maximize code resuse, introduced 
2 more ops (submit_ports & preempt_context) instead of 1 op
(set_default_submission) in engine structure. pv version of submit_ports and 
preempt_context implemented.

Cc: Zhenyu Wang 
Cc: Zhi Wang 
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: He, Min 
Cc: Jiang, Fei 
Cc: Gong, Zhipeng 
Cc: Yuan, Hang 
Cc: Zhiyuan Lv 
Signed-off-by: Xiaolin Zhang 
---
 drivers/gpu/drm/i915/i915_vgpu.c|  2 +
 drivers/gpu/drm/i915/intel_lrc.c| 88 +++--
 drivers/gpu/drm/i915/intel_ringbuffer.h |  3 ++
 3 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index cb409d5..9870ea6 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -66,6 +66,8 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv)
 
BUILD_BUG_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE);
 
+   dev_priv->vgpu.pv_caps = PVMMIO_ELSP_SUBMIT;
+
magic = __raw_i915_read64(dev_priv, vgtif_reg(magic));
if (magic != VGT_MAGIC)
return;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 22b57b8..9e6ccf9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -460,6 +460,60 @@ static void execlists_submit_ports(struct intel_engine_cs 
*engine)
execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);  }
 
+static void execlists_submit_ports_pv(struct intel_engine_cs *engine) {
+   struct intel_engine_execlists *execlists = >execlists;
+   struct execlist_port *port = execlists->port;
+   u32 __iomem *elsp =
+   engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+   u32 *elsp_data;
+   unsigned int n;
+   u32 descs[4];
+   int i = 0;
+
+   /*
+* ELSQ note: the submit queue is not cleared after being submitted
+* to the HW so we need to make sure we always clean it up. This is
+* currently ensured by the fact that we always write the same number
+* of elsq entries, keep this in mind before changing the loop below.
+*/
+   for (n = execlists_num_ports(execlists); n--; ) {
+   struct i915_request *rq;
+   unsigned int count;
+   u64 desc;
+
+   rq = port_unpack([n], );
+   if (rq) {
+   GEM_BUG_ON(count > !n);
+   if (!count++)
+   execlists_context_schedule_in(rq);
+   port_set([n], port_pack(rq, count));
+   desc = execlists_update_context(rq);
+   } else {
+   GEM_BUG_ON(!n);
+   desc = 0;
+   }
+   GEM_BUG_ON(i >= 4);
+   descs[i] = upper_32_bits(desc);
+   descs[i + 1] = lower_32_bits(desc);
+   i += 2;
+   }
+
+   spin_lock(>i915->vgpu.shared_page_lock);
+   elsp_data = engine->i915->vgpu.shared_page->elsp_data;
+   *elsp_data = descs[0];
+   *(elsp_data + 1) = descs[1];
+   *(elsp_data + 2) = descs[2];
+   writel(descs[3], elsp);
+   spin_unlock(>i915->vgpu.shared_page_lock);
+
+   /* we need to manually load the submit queue */
+   if (execlists->ctrl_reg)
+   writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+
+   execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); }
+
 static bool ctx_single_port_submission(const struct intel_context *ce)  {
return (IS_ENABLED(CONFIG_DRM_I915_GVT) && @@ -497,7 +551,6 @@ static 
void inject_preempt_context(struct intel_engine_cs *engine)
 
GEM_BUG_ON(execlists->preempt_complete_status !=
   upper_32_bits(ce->lrc_desc));
-
/*
 * Switch to our empty preempt context so
 * the state of the GPU is known (idle).
@@ -516,6 +569,27 @@ static void inject_preempt_context(struct intel_engine_cs 
*engine)
execlists_

Re: [Intel-gfx] [PATCH v2 1/5] drm/i915: introduced pv capability for vgpu

2018-10-31 Thread Zhang, Xiaolin
Ping review. Thanks very much. 

BRs
Xiaolin

-Original Message-
From: Zhang, Xiaolin 
Sent: Friday, October 19, 2018 3:27 PM
To: intel-gfx@lists.freedesktop.org
Cc: intel-gvt-...@lists.freedesktop.org; Zhang, Xiaolin 
; Zhenyu Wang ; Wang, Zhi A 
; Chris Wilson ; Joonas 
Lahtinen ; He; He, Min ; 
Jiang; Jiang, Fei ; Gong; Gong, Zhipeng 
; Yuan; Yuan, Hang ; Lv, Zhiyuan 

Subject: [PATCH v2 1/5] drm/i915: introduced pv capability for vgpu

This u32 pv_caps field is used to control the different level pvmmio feature 
for MMIO emulation in GVT.

This field is default zero, no pvmmio feature enabled.

it also add VGT_CAPS_PVMMIO capability BIT for guest to check GVTg can support 
PV feature or not.

v0: RFC, introudced enable_pvmmio module parameter.
v1: addressed RFC comment to remove enable_pvmmio module parameter by pv 
capability check.
v2: rebase

Cc: Zhenyu Wang 
Cc: Zhi Wang 
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: He, Min 
Cc: Jiang, Fei 
Cc: Gong, Zhipeng 
Cc: Yuan, Hang 
Cc: Zhiyuan Lv 
Signed-off-by: Xiaolin Zhang 
---
 drivers/gpu/drm/i915/i915_drv.h| 11 +++
 drivers/gpu/drm/i915/i915_pvinfo.h | 17 -
 drivers/gpu/drm/i915/i915_vgpu.c   | 19 +--
 3 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h 
index 3017ef0..7b2d7cb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -56,6 +56,7 @@
 
 #include "i915_params.h"
 #include "i915_reg.h"
+#include "i915_pvinfo.h"
 #include "i915_utils.h"
 
 #include "intel_bios.h"
@@ -1343,6 +1344,7 @@ struct i915_workarounds {  struct i915_virtual_gpu {
bool active;
u32 caps;
+   u32 pv_caps;
 };
 
 /* used in computing the new watermarks state */ @@ -2853,6 +2855,11 @@ static 
inline bool intel_vgpu_active(struct drm_i915_private *dev_priv)
return dev_priv->vgpu.active;
 }
 
+static inline bool intel_vgpu_has_pvmmio(struct drm_i915_private 
+*dev_priv) {
+   return dev_priv->vgpu.caps & VGT_CAPS_PVMMIO; }
+
 u32 i915_pipestat_enable_mask(struct drm_i915_private *dev_priv,
  enum pipe pipe);
 void
@@ -3878,4 +3885,8 @@ static inline int intel_hws_csb_write_index(struct 
drm_i915_private *i915)
return I915_HWS_CSB_WRITE_INDEX;
 }
 
+#define PVMMIO_LEVEL_ENABLE(dev_priv, level)   \
+   (intel_vgpu_active(dev_priv) && intel_vgpu_has_pvmmio(dev_priv) \
+   && (dev_priv->vgpu.pv_caps & level))
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h 
b/drivers/gpu/drm/i915/i915_pvinfo.h
index eeaa3d5..26709e8 100644
--- a/drivers/gpu/drm/i915/i915_pvinfo.h
+++ b/drivers/gpu/drm/i915/i915_pvinfo.h
@@ -49,12 +49,26 @@ enum vgt_g2v_type {
VGT_G2V_MAX,
 };
 
+#define VGPU_PVMMIO(vgpu) vgpu_vreg_t(vgpu, vgtif_reg(enable_pvmmio))
+
 /*
  * VGT capabilities type
  */
 #define VGT_CAPS_FULL_48BIT_PPGTT  BIT(2)
 #define VGT_CAPS_HWSP_EMULATIONBIT(3)
 #define VGT_CAPS_HUGE_GTT  BIT(4)
+#define VGT_CAPS_PVMMIOBIT(5)
+
+/*
+ * define different levels of PVMMIO optimization  */ enum 
+pvmmio_levels {
+   PVMMIO_ELSP_SUBMIT = 0x1,
+   PVMMIO_PLANE_UPDATE = 0x2,
+   PVMMIO_PLANE_WM_UPDATE = 0x4,
+   PVMMIO_MASTER_IRQ = 0x8,
+   PVMMIO_PPGTT_UPDATE = 0x10,
+};
 
 struct vgt_if {
u64 magic;  /* VGT_MAGIC */
@@ -106,8 +120,9 @@ struct vgt_if {
 
u32 execlist_context_descriptor_lo;
u32 execlist_context_descriptor_hi;
+   u32 enable_pvmmio;
 
-   u32  rsv7[0x200 - 24];/* pad to one page */
+   u32  rsv7[0x200 - 25];/* pad to one page */
 } __packed;
 
 #define vgtif_reg(x) \
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index 869cf4a..907bbd2 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -76,9 +76,24 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv)
}
 
dev_priv->vgpu.caps = __raw_i915_read32(dev_priv, vgtif_reg(vgt_caps));
-
dev_priv->vgpu.active = true;
-   DRM_INFO("Virtual GPU for Intel GVT-g detected.\n");
+
+   if (!intel_vgpu_has_pvmmio(dev_priv)) {
+   DRM_INFO("Virtual GPU for Intel GVT-g detected\n");
+   return;
+   }
+
+   /* If guest wants to enable pvmmio, it needs to enable it explicitly
+* through vgt_if interface, and then read back the enable state from
+* gvt layer.
+*/
+   __raw_i915_write32(dev_priv, vgtif_reg(enable_pvmmio),
+   dev_priv->vgpu.pv_caps);
+   dev_priv->vgpu.pv_caps = __raw_i915_read32(dev_priv,
+   vgtif_reg(enable_pvmmio));
+
+   DRM_INFO("Virtual GPU for Intel GVT-g detected with pvmmio 0x%x\n",
+ 

Re: [Intel-gfx] [PATCH v2 0/5] i915 pvmmio to improve GVTg performance

2018-10-24 Thread Zhang, Xiaolin
Would like to ask ping for review patch set v2. thanks very much.

BRs, Xiaolin


On 10/19/2018 03:27 PM, Zhang, Xiaolin wrote:
> To improve GVTg performance, it could reduce the mmio access trap
> numbers within guest driver in some certain scenarios since mmio
> access trap will introuduce vm exit/vm enter cost.
>
> the solution in this patch set is to setup a shared memory region
> which accessed both by guest and GVTg without trap cost. the shared
> memory region is allocated by guest driver and guest driver will
> pass the region's memory guest physical address to GVTg through
> PVINFO register and later GVTg can access this region directly without
> trap cost to achieve data exchange purpose between guest and GVTg.
>
> in this patch set, 3 kind of pvmmio optimization implemented which is
> controlled by enable_pvmmio PVINO register with different level flag.
> 1. workload submission (context submission): reduce 4 traps to 1 trap.
> 2. master irq: reduce 2 traps to 1 trap. 
> 3. ppgtt update: eliminate the cost of ppgtt write protection. 
>
> based on the experiment, the performance was gained 4 percent (average)
> improvment with regard to both media and 3D workload benchmarks.
>
> based on the pvmmio framework, it could achive more sceneario optimization
> such as globle GTT update, display plane and water mark update with guest.
>
> v0: RFC patch set
> v1: addressed RFC review comments
> v2: addressed v1 review comments, added pv callbacks for pv operations
>
> Xiaolin Zhang (5):
>   drm/i915: introduced pv capability for vgpu
>   drm/i915: get ready of memory for pvmmio
>   drm/i915: context submission pvmmio optimization
>   drm/i915: master irq pvmmio optimization
>   drm/i915: ppgtt update pvmmio optimization
>
>  drivers/gpu/drm/i915/i915_drv.c |  2 +
>  drivers/gpu/drm/i915/i915_drv.h | 15 +-
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 67 +
>  drivers/gpu/drm/i915/i915_irq.c | 82 --
>  drivers/gpu/drm/i915/i915_pvinfo.h  | 43 +++-
>  drivers/gpu/drm/i915/i915_vgpu.c| 44 -
>  drivers/gpu/drm/i915/intel_lrc.c| 88 
> +++--
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  3 ++
>  8 files changed, 333 insertions(+), 11 deletions(-)
>

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [v1 10/10] drm/i915/gvt: GVTg support ppgtt pvmmio optimization

2018-10-14 Thread Zhang, Xiaolin
On 10/11/2018 04:07 PM, Zhao, Yakui wrote:
>
> On 2018年10月11日 14:14, Xiaolin Zhang wrote:
>> This patch handles ppgtt update from g2v notification.
>>
>> It read out ppgtt pte entries from guest pte tables page and
>> convert them to host pfns.
>>
>> It creates local ppgtt tables and insert the content pages
>> into the local ppgtt tables directly, which does not track
>> the usage of guest page table and removes the cost of write
>> protection from the original shadow page mechansim.
> It is possible that Guest VGPU writes the ppgtt entry by using 2M/64K 
> page mode.
>
> If so, the gvtg should also handle it in PVMMIO mode.
it is possible that guest vgpu can support huge page mode. currently it
is a gap for pvppgtt since this feature is only valid for non-huge-page
mode.  it is WIP to support guest huge page mode.
BRs, Xiaolin
>> v1: rebase
>> v0: RFC
>>
>> Signed-off-by: Xiaolin Zhang 
>> ---
>>   drivers/gpu/drm/i915/gvt/gtt.c  | 318 
>> 
>>   drivers/gpu/drm/i915/gvt/gtt.h  |   9 +
>>   drivers/gpu/drm/i915/gvt/handlers.c |  13 +-
>>   3 files changed, 338 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
>> index 58e166e..8d3e21a 100644
>> --- a/drivers/gpu/drm/i915/gvt/gtt.c
>> +++ b/drivers/gpu/drm/i915/gvt/gtt.c
>> @@ -1744,6 +1744,26 @@ static int ppgtt_handle_guest_write_page_table_bytes(
>>  return 0;
>>   }
>>   
>> +static void invalidate_mm_pv(struct intel_vgpu_mm *mm)
>> +{
>> +struct intel_vgpu *vgpu = mm->vgpu;
>> +struct intel_gvt *gvt = vgpu->gvt;
>> +struct intel_gvt_gtt *gtt = >gtt;
>> +struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
>> +struct intel_gvt_gtt_entry se;
>> +
>> +i915_ppgtt_close(>ppgtt->vm);
>> +i915_ppgtt_put(mm->ppgtt);
>> +
>> +ppgtt_get_shadow_root_entry(mm, , 0);
>> +if (!ops->test_present())
>> +return;
>> +se.val64 = 0;
>> +ppgtt_set_shadow_root_entry(mm, , 0);
>> +
>> +mm->ppgtt_mm.shadowed  = false;
>> +}
>> +
>>   static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
>>   {
>>  struct intel_vgpu *vgpu = mm->vgpu;
>> @@ -1756,6 +1776,11 @@ static void invalidate_ppgtt_mm(struct intel_vgpu_mm 
>> *mm)
>>  if (!mm->ppgtt_mm.shadowed)
>>  return;
>>   
>> +if (VGPU_PVMMIO(mm->vgpu) & PVMMIO_PPGTT_UPDATE) {
>> +invalidate_mm_pv(mm);
>> +return;
>> +}
>> +
>>  for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) {
>>  ppgtt_get_shadow_root_entry(mm, , index);
>>   
>> @@ -1773,6 +1798,26 @@ static void invalidate_ppgtt_mm(struct intel_vgpu_mm 
>> *mm)
>>  mm->ppgtt_mm.shadowed = false;
>>   }
>>   
>> +static int shadow_mm_pv(struct intel_vgpu_mm *mm)
>> +{
>> +struct intel_vgpu *vgpu = mm->vgpu;
>> +struct intel_gvt *gvt = vgpu->gvt;
>> +struct intel_gvt_gtt_entry se;
>> +
>> +mm->ppgtt = i915_ppgtt_create(gvt->dev_priv, NULL);
>> +if (IS_ERR(mm->ppgtt)) {
>> +gvt_vgpu_err("fail to create ppgtt for pdp 0x%llx\n",
>> +px_dma(>ppgtt->pml4));
>> +return PTR_ERR(mm->ppgtt);
>> +}
>> +
>> +se.type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY;
>> +se.val64 = px_dma(>ppgtt->pml4);
>> +ppgtt_set_shadow_root_entry(mm, , 0);
>> +mm->ppgtt_mm.shadowed  = true;
>> +
>> +return 0;
>> +}
>>   
>>   static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
>>   {
>> @@ -1787,6 +1832,9 @@ static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
>>  if (mm->ppgtt_mm.shadowed)
>>  return 0;
>>   
>> +if (VGPU_PVMMIO(mm->vgpu) & PVMMIO_PPGTT_UPDATE)
>> +return shadow_mm_pv(mm);
>> +
>>  mm->ppgtt_mm.shadowed = true;
>>   
>>  for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
>> @@ -2767,3 +2815,273 @@ void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu)
>>  intel_vgpu_destroy_all_ppgtt_mm(vgpu);
>>  intel_vgpu_reset_ggtt(vgpu, true);
>>   }
>> +
>> +int intel_vgpu_g2v_pv_ppgtt_alloc_4lvl(struct intel_vgpu *vgpu,
>> +u64 pdps[])
>> +{
>> +struct intel_vgpu_mm *mm;
>> +int ret = 0;
>> +u32 offset;
>> +struct pv_ppgtt_update pv_ppgtt;
>> +
>> +offset = offsetof(struct gvt_shared_page, pv_ppgtt);
>> +intel_gvt_read_shared_page(vgpu, offset, _ppgtt, sizeof(pv_ppgtt));
>> +
>> +mm = intel_vgpu_find_ppgtt_mm(vgpu, _ppgtt.pdp);
>> +if (!mm) {
>> +gvt_vgpu_err("failed to find pdp 0x%llx\n", pv_ppgtt.pdp);
>> +ret = -EINVAL;
>> +} else {
>> +ret = mm->ppgtt->vm.allocate_va_range(>ppgtt->vm,
>> +pv_ppgtt.start, pv_ppgtt.length);
>> +if (ret)
>> +gvt_vgpu_err("failed to alloc %llx\n", pv_ppgtt.pdp);
>> +}
>> +
>> +return ret;
>> +}
>> +
>> +int intel_vgpu_g2v_pv_ppgtt_clear_4lvl(struct intel_vgpu *vgpu,
>> +u64 pdps[])
>> +{
>> +struct 

Re: [Intel-gfx] [v1 03/10] drm/i915: context submission pvmmio optimization

2018-10-14 Thread Zhang, Xiaolin
On 10/11/2018 05:12 PM, Chris Wilson wrote:
> Quoting Xiaolin Zhang (2018-10-11 07:14:05)
>> It is performance optimization to reduce mmio trap numbers from 4 to
>> 1 durning ELSP porting writing (context submission).
>>
>> When context subission, to cache elsp_data[4] values in
>> the shared page, the last elsp_data[0] port writing will be trapped
>> to gvt for real context submission.
>>
>> Use PVMMIO_ELSP_SUBMIT to control this level of pvmmio optimization.
>>
>> v1: rebase
>> v0: RFC
>>
>> Signed-off-by: Xiaolin Zhang 
>> ---
>>  drivers/gpu/drm/i915/i915_vgpu.c |  2 ++
>>  drivers/gpu/drm/i915/intel_lrc.c | 37 -
> Hint: intel_vgpu_submission.c and go wild. You do not need to emulate
> execlists at all, an async interface along the lines of guc would
> strangely enough be more akin to what you want.
> -Chris
>
can't understand your comment very well. so far, vgpu only support
execlist workload submission only, this pv optimization is only valid
for execlist submission and can't support guc.

BRs, Xiaolin

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [v1 02/10] drm/i915: get ready of memory for pvmmio

2018-10-14 Thread Zhang, Xiaolin
On 10/11/2018 04:04 PM, Chris Wilson wrote:
> Quoting Xiaolin Zhang (2018-10-11 07:24:19)
>> To enable pvmmio feature, we need to prepare one 4K shared page
>> which will be accessed by both guest and backend i915 driver.
>>
>> guest i915 allocate one page memory and then the guest physical address is
>> passed to backend i915 driver through PVINFO register so that backend i915
>> driver can access this shared page without hypeviser trap cost for shared
>> data exchagne via hyperviser read_gpa functionality.
>>
>> v1: addressed RFC comment to move both shared_page_lock and shared_page
>> to i915_virtual_gpu structure
>> v0: RFC
>>
>> Signed-off-by: Xiaolin Zhang 
>> ---
>>  drivers/gpu/drm/i915/i915_drv.c|  8 
>>  drivers/gpu/drm/i915/i915_drv.h|  2 ++
>>  drivers/gpu/drm/i915/i915_pvinfo.h | 24 +++-
>>  drivers/gpu/drm/i915/i915_vgpu.c   | 19 ++-
>>  4 files changed, 51 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c 
>> b/drivers/gpu/drm/i915/i915_drv.c
>> index 19302342..9b25bb7 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -987,6 +987,10 @@ static void i915_mmio_cleanup(struct drm_i915_private 
>> *dev_priv)
>>  
>> intel_teardown_mchbar(dev_priv);
>> pci_iounmap(pdev, dev_priv->regs);
>> +   if (intel_vgpu_active(dev_priv) && dev_priv->vgpu.shared_page) {
>> +   free_page((unsigned long)dev_priv->vgpu.shared_page);
>> +   dev_priv->vgpu.shared_page = NULL;
>> +   }
> intel_vgpu_(teardown,cleanup,fini)
>
> setting to NULL? fetch_and_zero() if you must, but here it is a little
> pointless.
agree with you it is a little pointless to set value to NULL. I will
move it to intel_gvt_cleanup() which do kind of vgpu cleanup gatekeeper.
BRs, Xiaolin
>>  }
>>  
>>  /**
>> @@ -1029,6 +1033,10 @@ static int i915_driver_init_mmio(struct 
>> drm_i915_private *dev_priv)
>> return 0;
>>  
>>  err_uncore:
>> +   if (intel_vgpu_active(dev_priv) && dev_priv->vgpu.shared_page) {
>> +   free_page((unsigned long)dev_priv->vgpu.shared_page);
>> +   dev_priv->vgpu.shared_page = NULL;
> Same again. Remember that shared_page will never be !0 unless active
> anyway.
>
>> +   }
>> intel_uncore_fini(dev_priv);
>>  err_bridge:
>> pci_dev_put(dev_priv->bridge_dev);
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index d22154a..2c131d4 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -1345,6 +1345,8 @@ struct i915_virtual_gpu {
>> bool active;
>> u32 caps;
>> u32 pv_caps;
>> +   spinlock_t shared_page_lock;
>> +   struct gvt_shared_page *shared_page;
> Ugh, be considerate of struct packing and/or cachelines.
Thanks your comment, will consider this in next version.
>>  };
>>  
>>  /* used in computing the new watermarks state */
>> diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h 
>> b/drivers/gpu/drm/i915/i915_pvinfo.h
>> index 26709e8..179d558 100644
>> --- a/drivers/gpu/drm/i915/i915_pvinfo.h
>> +++ b/drivers/gpu/drm/i915/i915_pvinfo.h
>> @@ -49,6 +49,24 @@ enum vgt_g2v_type {
>> VGT_G2V_MAX,
>>  };
>>  
>> +struct pv_ppgtt_update {
>> +   u64 pdp;
>> +   u64 start;
>> +   u64 length;
>> +   u32 cache_level;
>> +};
> Not used. Only introduce interfaces as they are being used, or just
> before.
it is used in gvt_shared_page structure.
>
>> +/*
>> + * shared page(4KB) between gvt and VM, could be allocated by guest driver
>> + * or a fixed location in PCI bar 0 region
>> + */
>> +struct gvt_shared_page {
>> +   u32 elsp_data[4];
>> +   u32 reg_addr;
>> +   u32 disable_irq;
>> +   struct pv_ppgtt_update pv_ppgtt;
>> +};
>> +
>>  #define VGPU_PVMMIO(vgpu) vgpu_vreg_t(vgpu, vgtif_reg(enable_pvmmio))
>>  
>>  /*
>> @@ -121,8 +139,12 @@ struct vgt_if {
>> u32 execlist_context_descriptor_lo;
>> u32 execlist_context_descriptor_hi;
>> u32 enable_pvmmio;
>> +   struct {
>> +   u32 lo;
>> +   u32 hi;
>> +   } shared_page_gpa;
>>  
>> -   u32  rsv7[0x200 - 25];/* pad to one page */
>> +   u32  rsv7[0x200 - 27];/* pad to one page */
>>  } __packed;
>>  
>>  #define vgtif_reg(x) \
>> diff --git a/drivers/gpu/drm/i915/i915_vgpu.c 
>> b/drivers/gpu/drm/i915/i915_vgpu.c
>> index 907bbd2..609eefe 100644
>> --- a/drivers/gpu/drm/i915/i915_vgpu.c
>> +++ b/drivers/gpu/drm/i915/i915_vgpu.c
>> @@ -62,6 +62,7 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv)
>>  {
>> u64 magic;
>> u16 version_major;
>> +   u64 shared_page_gpa;
> Scope.
can't understand your meaning here, could you explain a little bit more?
BRs, Xiaolin
>
>> BUILD_BUG_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE);
>>  
>> @@ -91,7 +92,23 @@ void i915_check_vgpu(struct drm_i915_private 

Re: [Intel-gfx] [RFC 01/10] drm/i915/gvt: add module parameter enable_pvmmio

2018-10-10 Thread Zhang, Xiaolin
On 10/09/2018 10:34 AM, Zhenyu Wang wrote:
> On 2018.09.28 14:09:45 +0800, Zhang, Xiaolin wrote:
>> On 09/27/2018 07:03 PM, Joonas Lahtinen wrote:
>>> Quoting Xiaolin Zhang (2018-09-27 19:37:46)
>>>> This int type module parameter is used to control the different
>>>> level pvmmio feature for MMIO emulation in GVT.
>>>>
>>>> This parameter is default zero, no pvmmio feature enabled.
>>>>
>>>> Its permission type is 0400 which means user could only change its
>>>> value through the cmdline, this is to prevent the dynamic modification
>>>> during runtime which would break the pvmmio internal logic.
>>>>
>>>> Signed-off-by: Xiaolin Zhang 
>>> This shouldn't really be a module parameter. We should detect the
>>> capability from the vGPU device and use it always when possible.
>>>
>>> Regards, Joonas
>>>
>> for pv optimization, we should touch both guest driver and GVTg.  this
>> parameter is used for
>>
>> guest pv capability because GVTg with pv capability will support both pv
>> and non pv capability guest.
>>
> That's the purpose of 'vgt_caps' in PVINFO to do capability check between
> host/guest. You need a new cap bit definition for PVMMIO and maybe another
> field for different PVMMIO level capability check. New parameter is not 
> useful here.
>
> Thanks
>
Sounds good to me. Will to use it in the next version.  thanks very much.

BRs, Xiaolin

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC 02/10] drm/i915/gvt: get ready of memory for pvmmio

2018-09-28 Thread Zhang, Xiaolin
On 09/27/2018 03:18 PM, Chris Wilson wrote:
> Quoting Xiaolin Zhang (2018-09-27 17:37:47)
>> To enable pvmmio feature, we need to prepare one 4K shared page
>> which will be accessed by both guest and backend i915 driver.
>>
>> guest i915 allocate one page memory and then the guest physical address is
>> passed to backend i915 driver through PVINFO register so that backend i915
>> driver can access this shared page without hypeviser trap cost for shared
>> data exchagne via hyperviser read_gpa functionality.
>>
>> Signed-off-by: Xiaolin Zhang 
>> ---
>>  drivers/gpu/drm/i915/i915_drv.c|  5 +
>>  drivers/gpu/drm/i915/i915_drv.h|  3 +++
>>  drivers/gpu/drm/i915/i915_pvinfo.h | 25 -
>>  drivers/gpu/drm/i915/i915_vgpu.c   | 17 +
>>  4 files changed, 49 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c 
>> b/drivers/gpu/drm/i915/i915_drv.c
>> index ade9bca..815a4dd 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -885,6 +885,7 @@ static int i915_driver_init_early(struct 
>> drm_i915_private *dev_priv)
>> return -ENODEV;
>>  
>> spin_lock_init(_priv->irq_lock);
>> +   spin_lock_init(_priv->shared_page_lock);
> No. Do we not have a more appropriate struct for this to find a home in.
> No one will ever uess that 'shared_page_lock' refers to vgpu.
> -Chris
thanks your comment. I just find another place "struct i915_virtual_gpu"
and guess it is better than this, do you think it is right place for them?

BRs, Xiaolin
> ___
> intel-gvt-dev mailing list
> intel-gvt-...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev
>

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC 01/10] drm/i915/gvt: add module parameter enable_pvmmio

2018-09-28 Thread Zhang, Xiaolin
On 09/27/2018 07:03 PM, Joonas Lahtinen wrote:
> Quoting Xiaolin Zhang (2018-09-27 19:37:46)
>> This int type module parameter is used to control the different
>> level pvmmio feature for MMIO emulation in GVT.
>>
>> This parameter is default zero, no pvmmio feature enabled.
>>
>> Its permission type is 0400 which means user could only change its
>> value through the cmdline, this is to prevent the dynamic modification
>> during runtime which would break the pvmmio internal logic.
>>
>> Signed-off-by: Xiaolin Zhang 
> This shouldn't really be a module parameter. We should detect the
> capability from the vGPU device and use it always when possible.
>
> Regards, Joonas
>
for pv optimization, we should touch both guest driver and GVTg.  this
parameter is used for

guest pv capability because GVTg with pv capability will support both pv
and non pv capability guest.

BRs, Xiaolin

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC 00/10] i915 pvmmio to improve GVTg performance

2018-09-28 Thread Zhang, Xiaolin
On 09/27/2018 07:07 PM, Joonas Lahtinen wrote:
> Quoting Xiaolin Zhang (2018-09-27 19:37:45)
>> To improve GVTg performance, it could reduce the mmio access trap
>> numbers within guest driver in some certain scenarios since mmio
>> access trap will introuduce vm exit/vm enter cost.
>>
>> the solution in this patch set is to setup a shared memory region
>> which accessed both by guest and GVTg without trap cost. the shared
>> memory region is allocated by guest driver and guest driver will
>> pass the region's memory guest physical address to GVTg through
>> PVINFO register and later GVTg can access this region directly without
>> trap cost to achieve data exchange purpose between guest and GVTg.
>>
>> in this patch set, 3 kind of pvmmio optimization implemented which is
>> controlled by enable_pvmmio PVINO register with different level flag.
>> 1. workload submission (context submission): reduce 4 traps to 1 trap.
>> 2. master irq: reduce 2 traps to 1 trap. 
>> 3. ppgtt update: eliminate the cost of ppgtt write protection. 
>>
>> based on the experiment, the performance was gained 4 percent (average)
>> improvment with regard to both media and 3D workload benchmarks. 
>>
>> based on the pvmmio framework, it could achive more sceneario optimization
>> such as globle GTT update, display plane and water mark update with guest.
> Overall comments:
>
> The patches should be properly prefixed and split down. We should have
> "drm/i915:" patches that touch i915 portions, and those should not touch
> any gvt parts. Then there should be "drm/i915/gvt:" parts which don't
> touch anything from i915, and would be reviewed in the GVT list.
>
> We'd then proceed to merge the i915 changes and the GVT changes would be
> merged in the GVT tree.
>
> Regards, Joonas
>
thanks your comment, it makes sense and will be addressed in next version.

BRs, Xiaolin



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC 03/10] drm/i915/gvt: context submission pvmmio optimization

2018-09-27 Thread Zhang, Xiaolin
On 09/27/2018 03:21 PM, Chris Wilson wrote:
> Quoting Xiaolin Zhang (2018-09-27 17:37:48)
>> It is performance optimization to reduce mmio trap numbers from 4 to
>> 1 durning ELSP porting writing (context submission).
>>
>> When context subission, to cache elsp_data[4] values in
>> the shared page, the last elsp_data[0] port writing will be trapped
>> to gvt for real context submission.
>>
>> Use PVMMIO_ELSP_SUBMIT to control this level of pvmmio optimization.
>> Signed-off-by: Xiaolin Zhang 
>> ---
>>  drivers/gpu/drm/i915/i915_params.h |  2 +-
>>  drivers/gpu/drm/i915/intel_lrc.c   | 37 
>> -
>>  2 files changed, 37 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_params.h 
>> b/drivers/gpu/drm/i915/i915_params.h
>> index 0f6a38b..6c81c87 100644
>> --- a/drivers/gpu/drm/i915/i915_params.h
>> +++ b/drivers/gpu/drm/i915/i915_params.h
>> @@ -69,7 +69,7 @@
>> param(bool, enable_dp_mst, true) \
>> param(bool, enable_dpcd_backlight, false) \
>> param(bool, enable_gvt, false) \
>> -   param(int, enable_pvmmio, 0)
>> +   param(int, enable_pvmmio, PVMMIO_ELSP_SUBMIT)
>>  
>>  #define MEMBER(T, member, ...) T member;
>>  struct i915_params {
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index 4b28225..cdc713c 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -451,7 +451,12 @@ static void execlists_submit_ports(struct 
>> intel_engine_cs *engine)
>>  {
>> struct intel_engine_execlists *execlists = >execlists;
>> struct execlist_port *port = execlists->port;
>> +   u32 __iomem *elsp =
>> +   engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
>> +   u32 *elsp_data;
>> unsigned int n;
>> +   u32 descs[4];
>> +   int i = 0;
>>  
>> /*
>>  * We can skip acquiring intel_runtime_pm_get() here as it was taken
>> @@ -494,8 +499,24 @@ static void execlists_submit_ports(struct 
>> intel_engine_cs *engine)
>> GEM_BUG_ON(!n);
>> desc = 0;
>> }
>> +   if (PVMMIO_LEVEL_ENABLE(engine->i915, PVMMIO_ELSP_SUBMIT)) {
>> +   GEM_BUG_ON(i >= 4);
>> +   descs[i] = upper_32_bits(desc);
>> +   descs[i + 1] = lower_32_bits(desc);
>> +   i += 2;
>> +   } else {
>> +   write_desc(execlists, desc, n);
>> +   }
>> +   }
>>  
>> -   write_desc(execlists, desc, n);
>> +   if (PVMMIO_LEVEL_ENABLE(engine->i915, PVMMIO_ELSP_SUBMIT)) {
>> +   spin_lock(>i915->shared_page_lock);
>> +   elsp_data = engine->i915->shared_page->elsp_data;
>> +   *elsp_data = descs[0];
>> +   *(elsp_data + 1) = descs[1];
>> +   *(elsp_data + 2) = descs[2];
>> +   writel(descs[3], elsp);
>> +   spin_unlock(>i915->shared_page_lock);
>> }
>>  
>> /* we need to manually load the submit queue */
>> @@ -538,11 +559,25 @@ static void inject_preempt_context(struct 
>> intel_engine_cs *engine)
>> struct intel_engine_execlists *execlists = >execlists;
>> struct intel_context *ce =
>> to_intel_context(engine->i915->preempt_context, engine);
>> +   u32 __iomem *elsp =
>> +   engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
>> +   u32 *elsp_data;
>> unsigned int n;
>>  
>> GEM_BUG_ON(execlists->preempt_complete_status !=
>>upper_32_bits(ce->lrc_desc));
>>  
>> +   if (PVMMIO_LEVEL_ENABLE(engine->i915, PVMMIO_ELSP_SUBMIT)) {
>> +   spin_lock(>i915->shared_page_lock);
>> +   elsp_data = engine->i915->shared_page->elsp_data;
>> +   *elsp_data = 0;
>> +   *(elsp_data + 1) = 0;
>> +   *(elsp_data + 2) = upper_32_bits(ce->lrc_desc);
>> +   writel(lower_32_bits(ce->lrc_desc), elsp);
>> +   spin_unlock(>i915->shared_page_lock);
>> +   return;
>> +   }
>> +
> Really?
> -Chris
in normal case,  write_desc will update port[0] & port[1] descriptor and
in total 4 mmio will be accessed and trapped.
in PVMMIO_ELSP_SUBMIT case,  cache descs[0] ~ descs[4] to elsp_data[4]
in shared_page, just use one mmio access
"writel(descs[3], elsp)" to trap to GVTg for workload submission and in
GVGT the desc[0]~desc[2] will be read from shared_page.
>

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v1] drm/i915: Enhanced for initialize partially filled pagetables

2017-09-29 Thread Zhang, Xiaolin
On 09/28/2017 10:25 PM, Joonas Lahtinen wrote:

On Thu, 2017-09-28 at 10:09 +0800, Xiaolin Zhang wrote:


if vgpu active, the page table entry should be initialized after
allocation and then the hypersivor can ping pages succesuffly,
otherwise hypervisor will ping pages failed and the host will print
a lot of annoying errors such as “ERROR gvt: guest page write error -22,
gfn 0x7ada8, pa 0x7ada89a8, var 0x6, len 1” when create linux guest.

Signed-off-by: Xiaolin Zhang 



Why does the hypervisor try to access the entries prior to them being
made valid for hardware?

Regards, Joonas


Hi Joonas,

thanks your comment.

I think what you ask is the point we got the error message in gvt since the 
current gvt

implementation is that page under write protection and trapped should be valid

with correct shadow page setup and p2m translation. Actually, to work with

“initialize partially filled pagetables" , there is a certain refine work to do 
in gvt side

(maybe less or maybe large). but before refine work done, this patch is trying 
to bring back

gvt behavior as before “initialize partially filled pagetables"patch.

BRs, Xiaolin
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx