From: Christian K?nig <christian.koe...@amd.com>

This patch allows concurrent access of different engines to the same BO
as long as everybody only reads from it. Since TTM can't (yet) handle
multiple fences for one BO we still sync the fence after executing the IB.

Signed-off-by: Christian K?nig <christian.koenig at amd.com>
---
 drivers/gpu/drm/radeon/radeon.h     |  2 ++
 drivers/gpu/drm/radeon/radeon_cs.c  | 24 +++++++++++++++++++++++-
 drivers/gpu/drm/radeon/radeon_ttm.c |  8 ++++++++
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 4579361..c0f7773 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -478,6 +478,7 @@ struct radeon_bo {
        u32                             tiling_flags;
        u32                             pitch;
        int                             surface_reg;
+       struct radeon_fence             *written;
        /* list of all virtual address to which this bo
         * is associated to
         */
@@ -1017,6 +1018,7 @@ struct radeon_cs_reloc {
        unsigned                        allowed_domains;
        uint32_t                        tiling_flags;
        uint32_t                        handle;
+       bool                            written;
 };

 struct radeon_cs_chunk {
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index 2be4fc5..3aa7e48 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -166,6 +166,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser 
*p)

                p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
                p->relocs[i].handle = r->handle;
+               p->relocs[i].written = !!r->write_domain;

                radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
                                      priority);
@@ -236,7 +237,16 @@ static void radeon_cs_sync_rings(struct radeon_cs_parser 
*p)
                        continue;

                fence = bo->tbo.sync_obj;
-               radeon_semaphore_sync_to(p->ib.presync, fence);
+
+               if (bo->written && radeon_fence_signaled(bo->written))
+                       radeon_fence_unref(&bo->written);
+
+               /* if either this CS or the last one write to
+                  the BO we sync before executing the IB */
+               if (reloc->written || bo->written)
+                       radeon_semaphore_sync_to(p->ib.presync, fence);
+               else
+                       radeon_semaphore_sync_to(p->ib.postsync, fence);
        }
 }

@@ -406,6 +416,18 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser 
*parser, int error, bo
                 */
                list_sort(NULL, &parser->validated, cmp_size_smaller_first);

+               /* remember which BOs we write to */
+               for (i = 0; i < parser->nrelocs; i++) {
+                       struct radeon_cs_reloc *reloc = &parser->relocs[i];
+                       struct radeon_bo *bo = reloc->robj;
+
+                       if (!bo || !reloc->written)
+                               continue;
+
+                       radeon_fence_unref(&bo->written);
+                       bo->written = radeon_fence_ref(parser->ib.fence);
+               }
+
                ttm_eu_fence_buffer_objects(&parser->ticket,
                                            &parser->validated,
                                            parser->ib.fence);
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index 72afe82..76be612 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -228,10 +228,12 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
        struct radeon_device *rdev;
        uint64_t old_start, new_start;
        struct radeon_fence *fence;
+       struct radeon_bo *rbo;
        int r, ridx;

        rdev = radeon_get_rdev(bo->bdev);
        ridx = radeon_copy_ring_index(rdev);
+       rbo = container_of(bo, struct radeon_bo, tbo);
        old_start = old_mem->start << PAGE_SHIFT;
        new_start = new_mem->start << PAGE_SHIFT;

@@ -269,6 +271,12 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
        r = radeon_copy(rdev, old_start, new_start,
                        new_mem->num_pages * (PAGE_SIZE / 
RADEON_GPU_PAGE_SIZE), /* GPU pages */
                        &fence);
+
+       if (!r) {
+               radeon_fence_unref(&rbo->written);
+               rbo->written = radeon_fence_ref(fence);
+       }
+
        /* FIXME: handle copy error */
        r = ttm_bo_move_accel_cleanup(bo, (void *)fence,
                                      evict, no_wait_gpu, new_mem);
-- 
1.9.1

Reply via email to