Re: [PATCH v6 7/9] drm/i915: Reduce the number of objects subject to memcpy recover

2021-09-23 Thread Thomas Hellström



On 9/23/21 11:44 AM, Matthew Auld wrote:

On 22/09/2021 07:25, Thomas Hellström wrote:

We really only need memcpy restore for objects that affect the
operability of the migrate context. That is, primarily the page-table
objects of the migrate VM.

Add an object flag, I915_BO_ALLOC_PM_EARLY for objects that need early
restores using memcpy and a way to assign LMEM page-table object flags
to be used by the vms.

Restore objects without this flag with the gpu blitter and only objects
carrying the flag using TTM memcpy.

Initially mark the migrate, gt, gtt and vgpu vms to use this flag, and
defer for a later audit which vms actually need it. Most importantly, 
user-

allocated vms with pinned page-table objects can be restored using the
blitter.

Performance-wise memcpy restore is probably as fast as gpu restore if 
not

faster, but using gpu restore will help tackling future restrictions in
mappable LMEM size.

v4:
- Don't mark the aliasing ppgtt page table flags for early resume, but
   rather the ggtt page table flags as intended. (Matthew Auld)
- The check for user buffer objects during early resume is pointless, 
since

   they are never marked I915_BO_ALLOC_PM_EARLY. (Matthew Auld)
v5:
- Mark GuC LMEM objects with I915_BO_ALLOC_PM_EARLY to have them 
restored

   before we fire up the migrate context.

Cc: Matthew Brost 
Signed-off-by: Thomas Hellström 
Reviewed-by: Matthew Auld 
---
  drivers/gpu/drm/i915/gem/i915_gem_context.c  |  4 ++--
  drivers/gpu/drm/i915/gem/i915_gem_object_types.h |  9 ++---
  drivers/gpu/drm/i915/gem/i915_gem_pm.c   |  6 +-
  drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c   |  5 +++--
  drivers/gpu/drm/i915/gem/selftests/huge_pages.c  |  2 +-
  drivers/gpu/drm/i915/gt/gen6_ppgtt.c |  2 +-
  drivers/gpu/drm/i915/gt/gen8_ppgtt.c |  5 +++--
  drivers/gpu/drm/i915/gt/gen8_ppgtt.h |  4 +++-
  drivers/gpu/drm/i915/gt/intel_ggtt.c |  3 ++-
  drivers/gpu/drm/i915/gt/intel_gt.c   |  2 +-
  drivers/gpu/drm/i915/gt/intel_gtt.c  |  3 ++-
  drivers/gpu/drm/i915/gt/intel_gtt.h  |  9 +++--
  drivers/gpu/drm/i915/gt/intel_migrate.c  |  2 +-
  drivers/gpu/drm/i915/gt/intel_ppgtt.c    | 13 -
  drivers/gpu/drm/i915/gt/selftest_hangcheck.c |  2 +-
  drivers/gpu/drm/i915/gt/uc/intel_guc.c   |  3 ++-
  drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c |  7 +--
  drivers/gpu/drm/i915/gvt/scheduler.c |  2 +-
  drivers/gpu/drm/i915/selftests/i915_gem_gtt.c    |  4 ++--
  19 files changed, 56 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c

index c2ab0e22db0a..8208fd5b72c3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1287,7 +1287,7 @@ i915_gem_create_context(struct drm_i915_private 
*i915,

  } else if (HAS_FULL_PPGTT(i915)) {
  struct i915_ppgtt *ppgtt;
  -    ppgtt = i915_ppgtt_create(>gt);
+    ppgtt = i915_ppgtt_create(>gt, 0);
  if (IS_ERR(ppgtt)) {
  drm_dbg(>drm, "PPGTT setup failed (%ld)\n",
  PTR_ERR(ppgtt));
@@ -1465,7 +1465,7 @@ int i915_gem_vm_create_ioctl(struct drm_device 
*dev, void *data,

  if (args->flags)
  return -EINVAL;
  -    ppgtt = i915_ppgtt_create(>gt);
+    ppgtt = i915_ppgtt_create(>gt, 0);
  if (IS_ERR(ppgtt))
  return PTR_ERR(ppgtt);
  diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h

index 118691ce81d7..fa2ba9e2a4d0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -294,13 +294,16 @@ struct drm_i915_gem_object {
  #define I915_BO_ALLOC_USER    BIT(3)
  /* Object is allowed to lose its contents on suspend / resume, even 
if pinned */

  #define I915_BO_ALLOC_PM_VOLATILE BIT(4)
+/* Object needs to be restored early using memcpy during resume */
+#define I915_BO_ALLOC_PM_EARLY    BIT(5)
  #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
   I915_BO_ALLOC_VOLATILE | \
   I915_BO_ALLOC_CPU_CLEAR | \
   I915_BO_ALLOC_USER | \
- I915_BO_ALLOC_PM_VOLATILE)
-#define I915_BO_READONLY  BIT(5)
-#define I915_TILING_QUIRK_BIT 6 /* unknown swizzling; do not 
release! */

+ I915_BO_ALLOC_PM_VOLATILE | \
+ I915_BO_ALLOC_PM_EARLY)
+#define I915_BO_READONLY  BIT(6)
+#define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not 
release! */

    /**
   * @mem_flags - Mutable placement-related flags
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_pm.c

index 12b37b4c1192..726b40e1fbb0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -97,8 

Re: [PATCH v6 7/9] drm/i915: Reduce the number of objects subject to memcpy recover

2021-09-23 Thread Matthew Auld

On 22/09/2021 07:25, Thomas Hellström wrote:

We really only need memcpy restore for objects that affect the
operability of the migrate context. That is, primarily the page-table
objects of the migrate VM.

Add an object flag, I915_BO_ALLOC_PM_EARLY for objects that need early
restores using memcpy and a way to assign LMEM page-table object flags
to be used by the vms.

Restore objects without this flag with the gpu blitter and only objects
carrying the flag using TTM memcpy.

Initially mark the migrate, gt, gtt and vgpu vms to use this flag, and
defer for a later audit which vms actually need it. Most importantly, user-
allocated vms with pinned page-table objects can be restored using the
blitter.

Performance-wise memcpy restore is probably as fast as gpu restore if not
faster, but using gpu restore will help tackling future restrictions in
mappable LMEM size.

v4:
- Don't mark the aliasing ppgtt page table flags for early resume, but
   rather the ggtt page table flags as intended. (Matthew Auld)
- The check for user buffer objects during early resume is pointless, since
   they are never marked I915_BO_ALLOC_PM_EARLY. (Matthew Auld)
v5:
- Mark GuC LMEM objects with I915_BO_ALLOC_PM_EARLY to have them restored
   before we fire up the migrate context.

Cc: Matthew Brost 
Signed-off-by: Thomas Hellström 
Reviewed-by: Matthew Auld 
---
  drivers/gpu/drm/i915/gem/i915_gem_context.c  |  4 ++--
  drivers/gpu/drm/i915/gem/i915_gem_object_types.h |  9 ++---
  drivers/gpu/drm/i915/gem/i915_gem_pm.c   |  6 +-
  drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c   |  5 +++--
  drivers/gpu/drm/i915/gem/selftests/huge_pages.c  |  2 +-
  drivers/gpu/drm/i915/gt/gen6_ppgtt.c |  2 +-
  drivers/gpu/drm/i915/gt/gen8_ppgtt.c |  5 +++--
  drivers/gpu/drm/i915/gt/gen8_ppgtt.h |  4 +++-
  drivers/gpu/drm/i915/gt/intel_ggtt.c |  3 ++-
  drivers/gpu/drm/i915/gt/intel_gt.c   |  2 +-
  drivers/gpu/drm/i915/gt/intel_gtt.c  |  3 ++-
  drivers/gpu/drm/i915/gt/intel_gtt.h  |  9 +++--
  drivers/gpu/drm/i915/gt/intel_migrate.c  |  2 +-
  drivers/gpu/drm/i915/gt/intel_ppgtt.c| 13 -
  drivers/gpu/drm/i915/gt/selftest_hangcheck.c |  2 +-
  drivers/gpu/drm/i915/gt/uc/intel_guc.c   |  3 ++-
  drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c |  7 +--
  drivers/gpu/drm/i915/gvt/scheduler.c |  2 +-
  drivers/gpu/drm/i915/selftests/i915_gem_gtt.c|  4 ++--
  19 files changed, 56 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index c2ab0e22db0a..8208fd5b72c3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1287,7 +1287,7 @@ i915_gem_create_context(struct drm_i915_private *i915,
} else if (HAS_FULL_PPGTT(i915)) {
struct i915_ppgtt *ppgtt;
  
-		ppgtt = i915_ppgtt_create(>gt);

+   ppgtt = i915_ppgtt_create(>gt, 0);
if (IS_ERR(ppgtt)) {
drm_dbg(>drm, "PPGTT setup failed (%ld)\n",
PTR_ERR(ppgtt));
@@ -1465,7 +1465,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void 
*data,
if (args->flags)
return -EINVAL;
  
-	ppgtt = i915_ppgtt_create(>gt);

+   ppgtt = i915_ppgtt_create(>gt, 0);
if (IS_ERR(ppgtt))
return PTR_ERR(ppgtt);
  
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h

index 118691ce81d7..fa2ba9e2a4d0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -294,13 +294,16 @@ struct drm_i915_gem_object {
  #define I915_BO_ALLOC_USERBIT(3)
  /* Object is allowed to lose its contents on suspend / resume, even if pinned 
*/
  #define I915_BO_ALLOC_PM_VOLATILE BIT(4)
+/* Object needs to be restored early using memcpy during resume */
+#define I915_BO_ALLOC_PM_EARLYBIT(5)
  #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
 I915_BO_ALLOC_VOLATILE | \
 I915_BO_ALLOC_CPU_CLEAR | \
 I915_BO_ALLOC_USER | \
-I915_BO_ALLOC_PM_VOLATILE)
-#define I915_BO_READONLY  BIT(5)
-#define I915_TILING_QUIRK_BIT 6 /* unknown swizzling; do not release! */
+I915_BO_ALLOC_PM_VOLATILE | \
+I915_BO_ALLOC_PM_EARLY)
+#define I915_BO_READONLY  BIT(6)
+#define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not release! */
  
  	/**

 * @mem_flags - Mutable placement-related flags
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 12b37b4c1192..726b40e1fbb0 100644
---