From: Rob Clark <robdcl...@chromium.org>

Avoid allocating memory in job_run() by embedding the fence in the
submit object.  Since msm gpu fences are always 1:1 with msm_gem_submit
we can just use the fence's refcnt to track the submit.  And since we
can get the fence ctx from the submit we can just drop the msm_fence
struct altogether.  This uses the new dma_fence_init_noref() to deal
with the fact that the fence's refcnt is initialized when the submit is
created, long before job_run().

Signed-off-by: Rob Clark <robdcl...@chromium.org>
---
 drivers/gpu/drm/msm/msm_fence.c      | 43 +++++++++++-----------------
 drivers/gpu/drm/msm/msm_fence.h      |  2 +-
 drivers/gpu/drm/msm/msm_gem.h        | 10 +++----
 drivers/gpu/drm/msm/msm_gem_submit.c |  8 ++----
 drivers/gpu/drm/msm/msm_gpu.c        |  4 +--
 drivers/gpu/drm/msm/msm_ringbuffer.c |  4 +--
 6 files changed, 30 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c
index 56641408ea74..3a56e32abc3b 100644
--- a/drivers/gpu/drm/msm/msm_fence.c
+++ b/drivers/gpu/drm/msm/msm_fence.c
@@ -8,7 +8,7 @@
 
 #include "msm_drv.h"
 #include "msm_fence.h"
-
+#include "msm_gpu.h"
 
 struct msm_fence_context *
 msm_fence_context_alloc(struct drm_device *dev, volatile uint32_t *fenceptr,
@@ -65,14 +65,9 @@ void msm_update_fence(struct msm_fence_context *fctx, 
uint32_t fence)
        spin_unlock_irqrestore(&fctx->spinlock, flags);
 }
 
-struct msm_fence {
-       struct dma_fence base;
-       struct msm_fence_context *fctx;
-};
-
-static inline struct msm_fence *to_msm_fence(struct dma_fence *fence)
+static inline struct msm_gem_submit *fence_to_submit(struct dma_fence *fence)
 {
-       return container_of(fence, struct msm_fence, base);
+       return container_of(fence, struct msm_gem_submit, hw_fence);
 }
 
 static const char *msm_fence_get_driver_name(struct dma_fence *fence)
@@ -82,35 +77,31 @@ static const char *msm_fence_get_driver_name(struct 
dma_fence *fence)
 
 static const char *msm_fence_get_timeline_name(struct dma_fence *fence)
 {
-       struct msm_fence *f = to_msm_fence(fence);
-       return f->fctx->name;
+       struct msm_gem_submit *submit = fence_to_submit(fence);
+       return submit->ring->fctx->name;
 }
 
 static bool msm_fence_signaled(struct dma_fence *fence)
 {
-       struct msm_fence *f = to_msm_fence(fence);
-       return msm_fence_completed(f->fctx, f->base.seqno);
+       struct msm_gem_submit *submit = fence_to_submit(fence);
+       return msm_fence_completed(submit->ring->fctx, fence->seqno);
+}
+
+static void msm_fence_release(struct dma_fence *fence)
+{
+       __msm_gem_submit_destroy(fence_to_submit(fence));
 }
 
 static const struct dma_fence_ops msm_fence_ops = {
        .get_driver_name = msm_fence_get_driver_name,
        .get_timeline_name = msm_fence_get_timeline_name,
        .signaled = msm_fence_signaled,
+       .release = msm_fence_release,
 };
 
-struct dma_fence *
-msm_fence_alloc(struct msm_fence_context *fctx)
+void
+msm_fence_init(struct msm_fence_context *fctx, struct dma_fence *f)
 {
-       struct msm_fence *f;
-
-       f = kzalloc(sizeof(*f), GFP_KERNEL);
-       if (!f)
-               return ERR_PTR(-ENOMEM);
-
-       f->fctx = fctx;
-
-       dma_fence_init(&f->base, &msm_fence_ops, &fctx->spinlock,
-                      fctx->context, ++fctx->last_fence);
-
-       return &f->base;
+       dma_fence_init_noref(f, &msm_fence_ops, &fctx->spinlock,
+                            fctx->context, ++fctx->last_fence);
 }
diff --git a/drivers/gpu/drm/msm/msm_fence.h b/drivers/gpu/drm/msm/msm_fence.h
index 7f1798c54cd1..3c8c55398e9b 100644
--- a/drivers/gpu/drm/msm/msm_fence.h
+++ b/drivers/gpu/drm/msm/msm_fence.h
@@ -61,7 +61,7 @@ void msm_fence_context_free(struct msm_fence_context *fctx);
 bool msm_fence_completed(struct msm_fence_context *fctx, uint32_t fence);
 void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence);
 
-struct dma_fence * msm_fence_alloc(struct msm_fence_context *fctx);
+void msm_fence_init(struct msm_fence_context *fctx, struct dma_fence *f);
 
 static inline bool
 fence_before(uint32_t a, uint32_t b)
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index c4844cf3a585..e06afed99d5b 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -259,10 +259,10 @@ struct msm_gem_submit {
        struct ww_acquire_ctx ticket;
        uint32_t seqno;         /* Sequence number of the submit on the ring */
 
-       /* Hw fence, which is created when the scheduler executes the job, and
+       /* Hw fence, which is initialized when the scheduler executes the job, 
and
         * is signaled when the hw finishes (via seqno write from cmdstream)
         */
-       struct dma_fence *hw_fence;
+       struct dma_fence hw_fence;
 
        /* Userspace visible fence, which is signaled by the scheduler after
         * the hw_fence is signaled.
@@ -309,16 +309,16 @@ static inline struct msm_gem_submit *to_msm_submit(struct 
drm_sched_job *job)
        return container_of(job, struct msm_gem_submit, base);
 }
 
-void __msm_gem_submit_destroy(struct kref *kref);
+void __msm_gem_submit_destroy(struct msm_gem_submit *submit);
 
 static inline void msm_gem_submit_get(struct msm_gem_submit *submit)
 {
-       kref_get(&submit->ref);
+       dma_fence_get(&submit->hw_fence);
 }
 
 static inline void msm_gem_submit_put(struct msm_gem_submit *submit)
 {
-       kref_put(&submit->ref, __msm_gem_submit_destroy);
+       dma_fence_put(&submit->hw_fence);
 }
 
 void msm_submit_retire(struct msm_gem_submit *submit);
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c 
b/drivers/gpu/drm/msm/msm_gem_submit.c
index be4bf77103cd..522c8c82e827 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -47,7 +47,7 @@ static struct msm_gem_submit *submit_create(struct drm_device 
*dev,
                return ERR_PTR(ret);
        }
 
-       kref_init(&submit->ref);
+       kref_init(&submit->hw_fence.refcount);
        submit->dev = dev;
        submit->aspace = queue->ctx->aspace;
        submit->gpu = gpu;
@@ -65,10 +65,9 @@ static struct msm_gem_submit *submit_create(struct 
drm_device *dev,
        return submit;
 }
 
-void __msm_gem_submit_destroy(struct kref *kref)
+/* Called when the hw_fence is destroyed: */
+void __msm_gem_submit_destroy(struct msm_gem_submit *submit)
 {
-       struct msm_gem_submit *submit =
-                       container_of(kref, struct msm_gem_submit, ref);
        unsigned i;
 
        if (submit->fence_id) {
@@ -78,7 +77,6 @@ void __msm_gem_submit_destroy(struct kref *kref)
        }
 
        dma_fence_put(submit->user_fence);
-       dma_fence_put(submit->hw_fence);
 
        put_pid(submit->pid);
        msm_submitqueue_put(submit->queue);
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 380249500325..a82d11dd5fcf 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -716,7 +716,7 @@ static void retire_submits(struct msm_gpu *gpu)
                         * been signalled, then later submits are not signalled
                         * either, so we are also done.
                         */
-                       if (submit && dma_fence_is_signaled(submit->hw_fence)) {
+                       if (submit && dma_fence_is_signaled(&submit->hw_fence)) 
{
                                retire_submit(gpu, ring, submit);
                        } else {
                                break;
@@ -760,7 +760,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct 
msm_gem_submit *submit)
 
        msm_gpu_hw_init(gpu);
 
-       submit->seqno = submit->hw_fence->seqno;
+       submit->seqno = submit->hw_fence.seqno;
 
        msm_rd_dump_submit(priv->rd, submit, NULL);
 
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c 
b/drivers/gpu/drm/msm/msm_ringbuffer.c
index 57a8e9564540..5c54befa2427 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -18,7 +18,7 @@ static struct dma_fence *msm_job_run(struct drm_sched_job 
*job)
        struct msm_gpu *gpu = submit->gpu;
        int i;
 
-       submit->hw_fence = msm_fence_alloc(fctx);
+       msm_fence_init(fctx, &submit->hw_fence);
 
        for (i = 0; i < submit->nr_bos; i++) {
                struct drm_gem_object *obj = &submit->bos[i].obj->base;
@@ -37,7 +37,7 @@ static struct dma_fence *msm_job_run(struct drm_sched_job 
*job)
 
        mutex_unlock(&gpu->lock);
 
-       return dma_fence_get(submit->hw_fence);
+       return dma_fence_get(&submit->hw_fence);
 }
 
 static void msm_job_free(struct drm_sched_job *job)
-- 
2.39.2

Reply via email to