[Intel-gfx] [PATCH v10 4/9] drm/i915: vgpu ppgtt update pv optimization

2019-09-16 Thread Xiaolin Zhang
This patch extends vgpu ppgtt g2v notification to notify host
GVT-g of ppgtt update from guest including alloc_4lvl, clear_4lv4
and insert_4lvl.

These updates use the shared memory page to pass struct pv_ppgtt_update
from guest to GVT which is used for pv optimiation implemeation within
host GVT side.

This patch also add one new pv_caps level to control ppgtt update.

Use PV_PPGTT_UPDATE to control this level of pv optimization.

v0: RFC.
v1: rebased.
v2: added pv callbacks for vm.{allocate_va_range, insert_entries,
clear_range} within ppgtt.
v3: rebased, disable huge page ppgtt support when using PVMMIO ppgtt
update due to complex and performance impact.
v4: moved alloc/insert/clear_4lvl pv callbacks into i915_vgpu_pv.c and
added a single intel_vgpu_config_pv_caps() for vgpu pv callbacks setup.
v5: rebase.
v6: rebase.
v7: rebase.

Signed-off-by: Xiaolin Zhang 
---
 drivers/gpu/drm/i915/i915_gem.c |  3 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c | 10 +++--
 drivers/gpu/drm/i915/i915_gem_gtt.h |  8 
 drivers/gpu/drm/i915/i915_vgpu.c| 79 +
 drivers/gpu/drm/i915/i915_vgpu.h| 25 
 5 files changed, 120 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2da9544..1ade8e0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1309,7 +1309,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
int ret;
 
/* We need to fallback to 4K pages if host doesn't support huge gtt. */
-   if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv))
+   if ((intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv))
+   || intel_vgpu_enabled_pv_caps(dev_priv, PV_PPGTT_UPDATE))
mkwrite_device_info(dev_priv)->page_sizes =
I915_GTT_PAGE_SIZE_4K;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e62e9d1..00b187a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1015,7 +1015,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * 
const vm,
return start;
 }
 
-static void gen8_ppgtt_clear(struct i915_address_space *vm,
+void gen8_ppgtt_clear(struct i915_address_space *vm,
 u64 start, u64 length)
 {
GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
@@ -1126,7 +1126,7 @@ static int __gen8_ppgtt_alloc(struct i915_address_space * 
const vm,
return ret;
 }
 
-static int gen8_ppgtt_alloc(struct i915_address_space *vm,
+int gen8_ppgtt_alloc(struct i915_address_space *vm,
u64 start, u64 length)
 {
u64 from;
@@ -1326,7 +1326,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
} while (iter->sg);
 }
 
-static void gen8_ppgtt_insert(struct i915_address_space *vm,
+void gen8_ppgtt_insert(struct i915_address_space *vm,
  struct i915_vma *vma,
  enum i915_cache_level cache_level,
  u32 flags)
@@ -1517,8 +1517,10 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct 
drm_i915_private *i915)
ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
ppgtt->vm.clear_range = gen8_ppgtt_clear;
 
-   if (intel_vgpu_active(i915))
+   if (intel_vgpu_active(i915)) {
+   intel_vgpu_config_pv_caps(i915, PV_PPGTT_UPDATE, ppgtt);
gen8_ppgtt_notify_vgt(ppgtt, true);
+   }
 
ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h 
b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 8fd2234..f5536bf 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -588,6 +588,14 @@ static inline void i915_vm_put(struct i915_address_space 
*vm)
 void gen6_ppgtt_unpin(struct i915_ppgtt *base);
 void gen6_ppgtt_unpin_all(struct i915_ppgtt *base);
 
+void gen8_ppgtt_clear(struct i915_address_space *vm,
+   u64 start, u64 length);
+void gen8_ppgtt_insert(struct i915_address_space *vm,
+   struct i915_vma *vma,
+   enum i915_cache_level cache_level, u32 flags);
+int gen8_ppgtt_alloc(struct i915_address_space *vm,
+   u64 start, u64 length);
+
 void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv);
 void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index 6e29a52..e458892 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -96,6 +96,9 @@ void i915_detect_vgpu(struct drm_i915_private *dev_priv)
dev_priv->vgpu.active = true;
mutex_init(&dev_priv->vgpu.lock);
 
+   /* guest driver PV capability */
+   dev_priv->vgpu.pv_caps = PV_PPGTT_UPDATE;
+
if (!intel_vgpu_check_pv_caps(dev_priv, shared_ar

Re: [Intel-gfx] [PATCH v10 4/9] drm/i915: vgpu ppgtt update pv optimization

2019-09-17 Thread Chris Wilson
Quoting Xiaolin Zhang (2019-09-17 06:48:15)
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
> b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index e62e9d1..00b187a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1015,7 +1015,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space 
> * const vm,
> return start;
>  }
>  
> -static void gen8_ppgtt_clear(struct i915_address_space *vm,
> +void gen8_ppgtt_clear(struct i915_address_space *vm,
>  u64 start, u64 length)
>  {
> GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
> @@ -1126,7 +1126,7 @@ static int __gen8_ppgtt_alloc(struct i915_address_space 
> * const vm,
> return ret;
>  }
>  
> -static int gen8_ppgtt_alloc(struct i915_address_space *vm,
> +int gen8_ppgtt_alloc(struct i915_address_space *vm,
> u64 start, u64 length)
>  {
> u64 from;
> @@ -1326,7 +1326,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
> } while (iter->sg);
>  }
>  
> -static void gen8_ppgtt_insert(struct i915_address_space *vm,
> +void gen8_ppgtt_insert(struct i915_address_space *vm,
>   struct i915_vma *vma,
>   enum i915_cache_level cache_level,
>   u32 flags)

I think it is unwise to export these. I would suggest focusing on
wrapping the function pointers, for which we need to improve the
constructors.

> diff --git a/drivers/gpu/drm/i915/i915_vgpu.c 
> b/drivers/gpu/drm/i915/i915_vgpu.c
> index 6e29a52..e458892 100644
> --- a/drivers/gpu/drm/i915/i915_vgpu.c
> +++ b/drivers/gpu/drm/i915/i915_vgpu.c
> @@ -96,6 +96,9 @@ void i915_detect_vgpu(struct drm_i915_private *dev_priv)
> dev_priv->vgpu.active = true;
> mutex_init(&dev_priv->vgpu.lock);
>  
> +   /* guest driver PV capability */
> +   dev_priv->vgpu.pv_caps = PV_PPGTT_UPDATE;
> +
> if (!intel_vgpu_check_pv_caps(dev_priv, shared_area)) {
> DRM_INFO("Virtual GPU for Intel GVT-g detected.\n");
> goto out;
> @@ -322,6 +325,82 @@ int intel_vgt_balloon(struct i915_ggtt *ggtt)
>   * i915 vgpu PV support for Linux
>   */
>  
> +static int vgpu_ppgtt_pv_update(struct drm_i915_private *dev_priv,
> +   u32 action, u64 pdp, u64 start, u64 length, u32 cache_level)
> +{
> +   u32 data[8];
> +
> +   data[0] = action;
> +   data[1] = lower_32_bits(pdp);
> +   data[2] = upper_32_bits(pdp);
> +   data[3] = lower_32_bits(start);
> +   data[4] = upper_32_bits(start);
> +   data[5] = lower_32_bits(length);
> +   data[6] = upper_32_bits(length);
> +   data[7] = cache_level;
> +
> +   return intel_vgpu_pv_send(dev_priv, data, ARRAY_SIZE(data));
> +}
> +
> +static void gen8_ppgtt_clear_4lvl_pv(struct i915_address_space *vm,
> +   u64 start, u64 length)
> +{
> +   struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> +   struct drm_i915_private *dev_priv = vm->i915;
> +
> +   gen8_ppgtt_clear(vm, start, length);
> +   vgpu_ppgtt_pv_update(dev_priv, PV_ACTION_PPGTT_L4_CLEAR,
> +   px_dma(ppgtt->pd), start, length, 0);
> +}
> +
> +static void gen8_ppgtt_insert_4lvl_pv(struct i915_address_space *vm,
> +   struct i915_vma *vma,
> +   enum i915_cache_level cache_level, u32 flags)
> +{
> +   struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> +   struct drm_i915_private *dev_priv = vm->i915;
> +   u64 start = vma->node.start;
> +   u64 length = vma->node.size;
> +
> +   gen8_ppgtt_insert(vm, vma, cache_level, flags);
> +   vgpu_ppgtt_pv_update(dev_priv, PV_ACTION_PPGTT_L4_INSERT,
> +   px_dma(ppgtt->pd), start, length, cache_level);
> +}
> +
> +static int gen8_ppgtt_alloc_4lvl_pv(struct i915_address_space *vm,
> +   u64 start, u64 length)
> +{
> +   struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> +   struct drm_i915_private *dev_priv = vm->i915;
> +   int ret;
> +
> +   ret = gen8_ppgtt_alloc(vm, start, length);
> +   if (ret)
> +   return ret;
> +
> +   return vgpu_ppgtt_pv_update(dev_priv, PV_ACTION_PPGTT_L4_ALLOC,
> +   px_dma(ppgtt->pd), start, length, 0);

Unbalanced error path.

> +}
> +
> +/*
> + * config guest driver PV ops for different PV features
> + */
> +void intel_vgpu_config_pv_caps(struct drm_i915_private *dev_priv,
> +   enum pv_caps cap, void *data)
> +{
> +   struct i915_ppgtt *ppgtt;
> +
> +   if (!intel_vgpu_enabled_pv_caps(dev_priv, cap))
> +   return;
> +
> +   if (cap == PV_PPGTT_UPDATE) {
> +   ppgtt = (struct i915_ppgtt *)data;
> +   ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl_pv;
> +   ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl_pv;
> +   ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl_pv;
> +   }

I'd rather we j