On eviction, TTM requires that migration fences from the same region are
ordered using dma_fence_is_later(). For request-based fences we therefore
need to use the same context for the migration, but now that we use a
dma_fence_work for error recovery, and, in addition, might need to coalesce
the migration fence with async unbind fences, Create a coalesce fence for
this.

Chain the coalesce fence on the migration fence and attach it to a region
timeline.

Signed-off-by: Thomas Hellström <thomas.hellst...@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c    | 84 ++++++++++++++++++----
 drivers/gpu/drm/i915/intel_memory_region.c | 43 +++++++++++
 drivers/gpu/drm/i915/intel_memory_region.h |  7 ++
 3 files changed, 119 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 79d4d50aa4e5..625ce52e8662 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -672,9 +672,10 @@ static void __i915_ttm_move_fallback(struct 
ttm_buffer_object *bo, bool clear,
        }
 }
 
-static int __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
-                          struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm,
-                          struct i915_refct_sgt *dst_rsgt, bool allow_accel)
+static struct dma_fence *
+__i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
+               struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm,
+               struct i915_refct_sgt *dst_rsgt, bool allow_accel)
 {
        struct i915_ttm_memcpy_work *copy_work;
        struct dma_fence *fence;
@@ -689,7 +690,7 @@ static int __i915_ttm_move(struct ttm_buffer_object *bo, 
bool clear,
                /* Don't fail with -ENOMEM. Move sync instead. */
                __i915_ttm_move_fallback(bo, clear, dst_mem, dst_ttm, dst_rsgt,
                                         allow_accel);
-               return 0;
+               return NULL;
        }
 
        dma_fence_work_init(&copy_work->base, &i915_ttm_memcpy_ops);
@@ -714,14 +715,45 @@ static int __i915_ttm_move(struct ttm_buffer_object *bo, 
bool clear,
        fence = dma_fence_get(&copy_work->base.dma);
        dma_fence_work_commit_imm(&copy_work->base);
 
-       /*
-        * We're synchronizing here for now. For async moves, return the
-        * fence.
-        */
-       dma_fence_wait(fence, false);
-       dma_fence_put(fence);
+       return fence;
+}
 
-       return ret;
+/**
+ * struct i915_coalesce_fence - A dma-fence used to coalesce multiple fences
+ * similar to struct dm_fence_array, and at the same time being timeline-
+ * attached.
+ * @base: struct dma_fence_work base.
+ * @cb: Callback for timeline attachment.
+ */
+struct i915_coalesce_fence {
+       struct dma_fence_work base;
+       struct i915_sw_dma_fence_cb cb;
+};
+
+/* No .work or .release callback. Just coalescing. */
+static const struct dma_fence_work_ops i915_coalesce_fence_ops = {
+       .name = "Coalesce fence",
+};
+
+static struct dma_fence *
+i915_ttm_coalesce_fence(struct dma_fence *fence, struct intel_memory_region 
*mr)
+{
+       struct i915_coalesce_fence *coalesce =
+               kmalloc(sizeof(*coalesce), GFP_KERNEL);
+
+       if (!coalesce) {
+               dma_fence_wait(fence, false);
+               dma_fence_put(fence);
+               return NULL;
+       }
+
+       dma_fence_work_init(&coalesce->base, &i915_coalesce_fence_ops);
+       dma_fence_work_chain(&coalesce->base, fence);
+       dma_fence_work_timeline_attach(&mr->tl, &coalesce->base, &coalesce->cb);
+       dma_fence_get(&coalesce->base.dma);
+       dma_fence_work_commit_imm(&coalesce->base);
+       dma_fence_put(fence);
+       return &coalesce->base.dma;
 }
 
 static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
@@ -734,6 +766,7 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool 
evict,
                ttm_manager_type(bo->bdev, dst_mem->mem_type);
        struct ttm_tt *ttm = bo->ttm;
        struct i915_refct_sgt *dst_rsgt;
+       struct dma_fence *fence = NULL;
        bool clear;
        int ret;
 
@@ -765,7 +798,23 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, 
bool evict,
 
        clear = !cpu_maps_iomem(bo->resource) && (!ttm || 
!ttm_tt_is_populated(ttm));
        if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC)))
-               __i915_ttm_move(bo, clear, dst_mem, bo->ttm, dst_rsgt, true);
+               fence = __i915_ttm_move(bo, clear, dst_mem, bo->ttm, dst_rsgt, 
true);
+       if (fence && evict) {
+               struct intel_memory_region *mr =
+                       i915_ttm_region(bo->bdev, bo->resource->mem_type);
+
+               /*
+                * Attach to the region timeline and for future async unbind,
+                * which requires a timeline. Also future async unbind fences
+                * can be attached here.
+                */
+               fence = i915_ttm_coalesce_fence(fence, mr);
+       }
+
+       if (fence) {
+               dma_fence_wait(fence, false);
+               dma_fence_put(fence);
+       }
 
        ttm_bo_move_sync_cleanup(bo, dst_mem);
        i915_ttm_adjust_domains_after_move(obj);
@@ -1223,6 +1272,7 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
                .interruptible = intr,
        };
        struct i915_refct_sgt *dst_rsgt;
+       struct dma_fence *fence;
        int ret;
 
        assert_object_held(dst);
@@ -1238,10 +1288,14 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object 
*dst,
                return ret;
 
        dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource);
-       __i915_ttm_move(src_bo, false, dst_bo->resource, dst_bo->ttm,
-                       dst_rsgt, allow_accel);
-
+       fence = __i915_ttm_move(src_bo, false, dst_bo->resource, dst_bo->ttm,
+                               dst_rsgt, allow_accel);
        i915_refct_sgt_put(dst_rsgt);
 
+       if (fence) {
+               dma_fence_wait(fence, false);
+               dma_fence_put(fence);
+       }
+
        return 0;
 }
diff --git a/drivers/gpu/drm/i915/intel_memory_region.c 
b/drivers/gpu/drm/i915/intel_memory_region.c
index e7f7e6627750..aa1733e840f7 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/intel_memory_region.c
@@ -7,6 +7,9 @@
 #include "i915_drv.h"
 #include "i915_ttm_buddy_manager.h"
 
+static const struct dma_fence_work_timeline_ops tl_ops;
+static void intel_region_timeline_release_work(struct work_struct *work);
+
 static const struct {
        u16 class;
        u16 instance;
@@ -127,6 +130,10 @@ intel_memory_region_create(struct drm_i915_private *i915,
        }
 
        kref_init(&mem->kref);
+
+       INIT_WORK(&mem->tl_put_work, intel_region_timeline_release_work);
+       dma_fence_work_timeline_init(&mem->tl, NULL, &tl_ops);
+
        return mem;
 
 err_free:
@@ -238,6 +245,42 @@ void intel_memory_regions_driver_release(struct 
drm_i915_private *i915)
        }
 }
 
+static void intel_region_timeline_get(struct dma_fence_work_timeline *tl)
+{
+       struct intel_memory_region *mr = container_of(tl, typeof(*mr), tl);
+
+       intel_memory_region_get(mr);
+}
+
+static void intel_region_timeline_release_work(struct work_struct *work)
+{
+       struct intel_memory_region *mr =
+               container_of(work, typeof(*mr), tl_put_work);
+
+       __intel_memory_region_destroy(&mr->kref);
+}
+
+static void intel_region_timeline_release(struct kref *ref)
+{
+       struct intel_memory_region *mr = container_of(ref, typeof(*mr), kref);
+
+       /* May be called from hardirq context, so queue the final release. */
+       queue_work(system_unbound_wq, &mr->tl_put_work);
+}
+
+static void intel_region_timeline_put(struct dma_fence_work_timeline *tl)
+{
+       struct intel_memory_region *mr = container_of(tl, typeof(*mr), tl);
+
+       kref_put(&mr->kref, intel_region_timeline_release);
+}
+
+static const struct dma_fence_work_timeline_ops tl_ops = {
+       .name = "Region timeline",
+       .get = intel_region_timeline_get,
+       .put = intel_region_timeline_put,
+};
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/intel_memory_region.c"
 #include "selftests/mock_region.c"
diff --git a/drivers/gpu/drm/i915/intel_memory_region.h 
b/drivers/gpu/drm/i915/intel_memory_region.h
index 3feae3353d33..928819e2edba 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.h
+++ b/drivers/gpu/drm/i915/intel_memory_region.h
@@ -13,6 +13,8 @@
 #include <drm/drm_mm.h>
 #include <drm/i915_drm.h>
 
+#include "i915_sw_fence_work.h"
+
 struct drm_i915_private;
 struct drm_i915_gem_object;
 struct drm_printer;
@@ -94,6 +96,11 @@ struct intel_memory_region {
        bool is_range_manager;
 
        void *region_private;
+
+       /** Timeline for TTM eviction fences */
+       struct dma_fence_work_timeline tl;
+       /** Work struct for _region_put() from atomic / irq context */
+       struct work_struct tl_put_work;
 };
 
 struct intel_memory_region *
-- 
2.31.1

Reply via email to