If set, force the allocation to be placed in the mappable portion of
LMEM. One big restriction here is that system memory must be given as a
potential placement for the object, that way we can always spill the
object into system memory if we can't make space.

XXX: Still very much WIP and needs IGTs. Including now just for the sake
of having more complete picture.

Signed-off-by: Matthew Auld <matthew.a...@intel.com>
Cc: Thomas Hellström <thomas.hellst...@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_create.c | 28 ++++++++++++-------
 include/uapi/drm/i915_drm.h                | 31 +++++++++++++++++++++-
 2 files changed, 49 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index cc9ddb943f96..2737977f81a4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -238,6 +238,7 @@ struct create_ext {
        struct drm_i915_private *i915;
        struct intel_memory_region *placements[INTEL_REGION_UNKNOWN];
        unsigned int n_placements;
+       unsigned int placement_mask;
        unsigned long flags;
 };
 
@@ -334,6 +335,7 @@ static int set_placements(struct 
drm_i915_gem_create_ext_memory_regions *args,
        for (i = 0; i < args->num_regions; i++)
                ext_data->placements[i] = placements[i];
 
+       ext_data->placement_mask = mask;
        return 0;
 
 out_dump:
@@ -408,7 +410,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
        struct drm_i915_gem_object *obj;
        int ret;
 
-       if (args->flags)
+       if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
                return -EINVAL;
 
        ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
@@ -424,14 +426,22 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
                ext_data.n_placements = 1;
        }
 
-       /*
-        * TODO: add a userspace hint to force CPU_ACCESS for the object, which
-        * can override this.
-        */
-       if (!IS_DG1(i915) && (ext_data.n_placements > 1 ||
-                             ext_data.placements[0]->type !=
-                             INTEL_MEMORY_SYSTEM))
-               ext_data.flags |= I915_BO_ALLOC_GPU_ONLY;
+       if (args->flags & I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) {
+               if (ext_data.n_placements == 1)
+                       return -EINVAL;
+
+               /*
+                * We always need to be able to spill to system memory, if we
+                * can't place in the mappable part of LMEM.
+                */
+               if (!(ext_data.placement_mask & BIT(INTEL_REGION_SMEM)))
+                       return -EINVAL;
+       } else {
+               if (!IS_DG1(i915) && (ext_data.n_placements > 1 ||
+                                     ext_data.placements[0]->type !=
+                                     INTEL_MEMORY_SYSTEM))
+                       ext_data.flags |= I915_BO_ALLOC_GPU_ONLY;
+       }
 
        obj = __i915_gem_object_create_user_ext(i915, args->size,
                                                ext_data.placements,
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 914ebd9290e5..925685bd261e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -3157,7 +3157,36 @@ struct drm_i915_gem_create_ext {
         * Object handles are nonzero.
         */
        __u32 handle;
-       /** @flags: MBZ */
+       /**
+        * @flags: Optional flags.
+        *
+        * Supported values:
+        *
+        * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the kernel that
+        * the object will need to be accessed via the CPU.
+        *
+        * Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and
+        * only strictly required on platforms where only some of the device
+        * memory is directly visible or mappable through the CPU, like on DG2+.
+        *
+        * One of the placements MUST also be I915_MEMORY_CLASS_SYSTEM, to
+        * ensure we can always spill the allocation to system memory, if we
+        * can't place the object in the mappable part of
+        * I915_MEMORY_CLASS_DEVICE.
+        *
+        * Note that buffers that need to be captured with EXEC_OBJECT_CAPTURE,
+        * will need to enable this hint, if the object can also be placed in
+        * I915_MEMORY_CLASS_DEVICE, starting from DG2+. The execbuf call will
+        * throw an error otherwise. This also means that such objects will need
+        * I915_MEMORY_CLASS_SYSTEM set as a possible placement.
+        *
+        * Without this hint, the kernel will assume that non-mappable
+        * I915_MEMORY_CLASS_DEVICE is preferred for this object. Note that the
+        * kernel can still migrate the object to the mappable part, as a last
+        * resort, if userspace ever CPU faults this object, but this might be
+        * expensive, and so ideally should be avoided.
+        */
+#define I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS (1 << 0)
        __u32 flags;
        /**
         * @extensions: The chain of extensions to apply to this object.
-- 
2.34.1

Reply via email to