lima: add GPU schedule using DRM_SCHED

Qiang Yu Fri, 18 May 2018 23:55:16 -0700

Signed-off-by: Qiang Yu <yuq...@gmail.com>
---
 drivers/gpu/drm/lima/lima_sched.c | 497 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/lima/lima_sched.h | 126 ++++++++
 2 files changed, 623 insertions(+)
 create mode 100644 drivers/gpu/drm/lima/lima_sched.c
 create mode 100644 drivers/gpu/drm/lima/lima_sched.h


diff --git a/drivers/gpu/drm/lima/lima_sched.c 
b/drivers/gpu/drm/lima/lima_sched.c
new file mode 100644
index 000000000000..190932955e9b
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -0,0 +1,497 @@
+/*
+ * Copyright (C) 2017-2018 Lima Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/kthread.h>
+#include <linux/slab.h>
+
+#include "lima_drv.h"
+#include "lima_sched.h"
+#include "lima_vm.h"
+#include "lima_mmu.h"
+#include "lima_l2_cache.h"
+
+struct lima_fence {
+       struct dma_fence base;
+       struct lima_sched_pipe *pipe;
+};
+
+static struct kmem_cache *lima_fence_slab = NULL;
+
+int lima_sched_slab_init(void)
+{
+       lima_fence_slab = kmem_cache_create(
+               "lima_fence", sizeof(struct lima_fence), 0,
+               SLAB_HWCACHE_ALIGN, NULL);
+       if (!lima_fence_slab)
+               return -ENOMEM;
+
+       return 0;
+}
+
+void lima_sched_slab_fini(void)
+{
+       if (lima_fence_slab)
+               kmem_cache_destroy(lima_fence_slab);
+}
+
+static inline struct lima_fence *to_lima_fence(struct dma_fence *fence)
+{
+       return container_of(fence, struct lima_fence, base);
+}
+
+static const char *lima_fence_get_driver_name(struct dma_fence *fence)
+{
+       return "lima";
+}
+
+static const char *lima_fence_get_timeline_name(struct dma_fence *fence)
+{
+       struct lima_fence *f = to_lima_fence(fence);
+
+       return f->pipe->base.name;
+}
+
+static bool lima_fence_enable_signaling(struct dma_fence *fence)
+{
+       return true;
+}
+
+static void lima_fence_release_rcu(struct rcu_head *rcu)
+{
+       struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
+       struct lima_fence *fence = to_lima_fence(f);
+
+       kmem_cache_free(lima_fence_slab, fence);
+}
+
+static void lima_fence_release(struct dma_fence *fence)
+{
+       struct lima_fence *f = to_lima_fence(fence);
+
+       call_rcu(&f->base.rcu, lima_fence_release_rcu);
+}
+
+static const struct dma_fence_ops lima_fence_ops = {
+       .get_driver_name = lima_fence_get_driver_name,
+       .get_timeline_name = lima_fence_get_timeline_name,
+       .enable_signaling = lima_fence_enable_signaling,
+       .wait = dma_fence_default_wait,
+       .release = lima_fence_release,
+};
+
+static struct lima_fence *lima_fence_create(struct lima_sched_pipe *pipe)
+{
+       struct lima_fence *fence;
+
+       fence = kmem_cache_zalloc(lima_fence_slab, GFP_KERNEL);
+       if (!fence)
+              return NULL;
+
+       fence->pipe = pipe;
+       dma_fence_init(&fence->base, &lima_fence_ops, &pipe->fence_lock,
+                      pipe->fence_context, ++pipe->fence_seqno);
+
+       return fence;
+}
+
+static inline struct lima_sched_task *to_lima_task(struct drm_sched_job *job)
+{
+       return container_of(job, struct lima_sched_task, base);
+}
+
+static inline struct lima_sched_pipe *to_lima_pipe(struct drm_gpu_scheduler 
*sched)
+{
+       return container_of(sched, struct lima_sched_pipe, base);
+}
+
+int lima_sched_task_init(struct lima_sched_task *task,
+                        struct lima_sched_context *context,
+                        struct lima_vm *vm)
+{
+       int err;
+
+       err = drm_sched_job_init(&task->base, context->base.sched,
+                                &context->base, context);
+       if (err)
+               return err;
+
+       task->vm = lima_vm_get(vm);
+       return 0;
+}
+
+void lima_sched_task_fini(struct lima_sched_task *task)
+{
+       dma_fence_put(&task->base.s_fence->finished);
+       lima_vm_put(task->vm);
+}
+
+int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence 
*fence)
+{
+       int i, new_dep = 4;
+
+       if (task->dep && task->num_dep == task->max_dep)
+               new_dep = task->max_dep * 2;
+
+       if (task->max_dep < new_dep) {
+               void *dep = krealloc(task->dep, sizeof(*task->dep) * new_dep, 
GFP_KERNEL);
+               if (!dep)
+                       return -ENOMEM;
+               task->max_dep = new_dep;
+               task->dep = dep;
+       }
+
+       dma_fence_get(fence);
+       for (i = 0; i < task->num_dep; i++) {
+               if (task->dep[i]->context == fence->context &&
+                   dma_fence_is_later(fence, task->dep[i])) {
+                       dma_fence_put(task->dep[i]);
+                       task->dep[i] = fence;
+                       return 0;
+               }
+       }
+
+       task->dep[task->num_dep++] = fence;
+       return 0;
+}
+
+int lima_sched_context_init(struct lima_sched_pipe *pipe,
+                           struct lima_sched_context *context,
+                           atomic_t *guilty)
+{
+       struct drm_sched_rq *rq = pipe->base.sched_rq + 
DRM_SCHED_PRIORITY_NORMAL;
+       int err;
+
+       context->fences =
+               kzalloc(sizeof(*context->fences) * lima_sched_max_tasks, 
GFP_KERNEL);
+       if (!context->fences)
+               return -ENOMEM;
+
+       mutex_init(&context->lock);
+       err = drm_sched_entity_init(&pipe->base, &context->base, rq,
+                                   lima_sched_max_tasks, guilty);
+       if (err) {
+               kfree(context->fences);
+               context->fences = NULL;
+               return err;
+       }
+
+       return 0;
+}
+
+void lima_sched_context_fini(struct lima_sched_pipe *pipe,
+                            struct lima_sched_context *context)
+{
+       drm_sched_entity_fini(&pipe->base, &context->base);
+
+       mutex_destroy(&context->lock);
+
+       if (context->fences)
+               kfree(context->fences);
+}
+
+static uint32_t lima_sched_context_add_fence(struct lima_sched_context 
*context,
+                                            struct dma_fence *fence,
+                                            uint32_t *done)
+{
+       uint32_t seq, idx, i;
+       struct dma_fence *other;
+
+       mutex_lock(&context->lock);
+
+       seq = context->sequence;
+       idx = seq & (lima_sched_max_tasks - 1);
+       other = context->fences[idx];
+
+       if (other) {
+               int err = dma_fence_wait(other, false);
+               if (err)
+                       DRM_ERROR("Error %d waiting context fence\n", err);
+       }
+
+       context->fences[idx] = dma_fence_get(fence);
+       context->sequence++;
+
+       /* get finished fence offset from seq */
+       for (i = 1; i < lima_sched_max_tasks; i++) {
+               idx = (seq - i) & (lima_sched_max_tasks - 1);
+               if (!context->fences[idx] ||
+                   dma_fence_is_signaled(context->fences[idx]))
+                       break;
+       }
+
+       mutex_unlock(&context->lock);
+
+       dma_fence_put(other);
+
+       *done = i;
+       return seq;
+}
+
+struct dma_fence *lima_sched_context_get_fence(
+       struct lima_sched_context *context, uint32_t seq)
+{
+       struct dma_fence *fence;
+       int idx;
+       uint32_t max, min;
+
+       mutex_lock(&context->lock);
+
+       max = context->sequence - 1;
+       min = context->sequence - lima_sched_max_tasks;
+
+       /* handle overflow case */
+       if ((min < max && (seq < min || seq > max)) ||
+           (min > max && (seq < min && seq > max))) {
+                   fence = NULL;
+                   goto out;
+       }
+
+       idx = seq & (lima_sched_max_tasks - 1);
+       fence = dma_fence_get(context->fences[idx]);
+
+out:
+       mutex_unlock(&context->lock);
+
+       return fence;
+}
+
+uint32_t lima_sched_context_queue_task(struct lima_sched_context *context,
+                                      struct lima_sched_task *task,
+                                      uint32_t *done)
+{
+       uint32_t seq = lima_sched_context_add_fence(
+               context, &task->base.s_fence->finished, done);
+       drm_sched_entity_push_job(&task->base, &context->base);
+       return seq;
+}
+
+static struct dma_fence *lima_sched_dependency(struct drm_sched_job *job,
+                                              struct drm_sched_entity *entity)
+{
+       struct lima_sched_task *task = to_lima_task(job);
+       int i;
+
+       for (i = 0; i < task->num_dep; i++) {
+               struct dma_fence *fence = task->dep[i];
+
+               if (!task->dep[i])
+                       continue;
+
+               task->dep[i] = NULL;
+
+               if (!dma_fence_is_signaled(fence))
+                       return fence;
+
+               dma_fence_put(fence);
+       }
+
+       return NULL;
+}
+
+static struct dma_fence *lima_sched_run_job(struct drm_sched_job *job)
+{
+       struct lima_sched_task *task = to_lima_task(job);
+       struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
+       struct lima_fence *fence;
+       struct dma_fence *ret;
+       struct lima_vm *vm = NULL, *last_vm = NULL;
+       int i;
+
+       /* after GPU reset */
+       if (job->s_fence->finished.error < 0)
+               return NULL;
+
+       fence = lima_fence_create(pipe);
+       if (!fence)
+               return NULL;
+       task->fence = &fence->base;
+
+       /* for caller usage of the fence, otherwise irq handler 
+        * may consume the fence before caller use it */
+       ret = dma_fence_get(task->fence);
+
+       pipe->current_task = task;
+
+       /* this is needed for MMU to work correctly, otherwise GP/PP
+        * will hang or page fault for unknown reason after running for
+        * a while.
+        *
+        * Need to investigate:
+        * 1. is it related to TLB
+        * 2. how much performance will be affected by L2 cache flush
+        * 3. can we reduce the calling of this function because all
+        *    GP/PP use the same L2 cache on mali400
+        *
+        * TODO:
+        * 1. move this to task fini to save some wait time?
+        * 2. when GP/PP use different l2 cache, need PP wait GP l2
+        *    cache flush?
+        */
+       for (i = 0; i < pipe->num_l2_cache; i++)
+               lima_l2_cache_flush(pipe->l2_cache[i]);
+
+       if (task->vm != pipe->current_vm) {
+               vm = lima_vm_get(task->vm);
+               last_vm = pipe->current_vm;
+               pipe->current_vm = task->vm;
+       }
+
+       if (pipe->bcast_mmu)
+               lima_mmu_switch_vm(pipe->bcast_mmu, vm);
+       else {
+               for (i = 0; i < pipe->num_mmu; i++)
+                       lima_mmu_switch_vm(pipe->mmu[i], vm);
+       }
+
+       if (last_vm)
+               lima_vm_put(last_vm);
+
+       pipe->error = false;
+       pipe->task_run(pipe, task);
+
+       return task->fence;
+}
+
+static void lima_sched_handle_error_task(struct lima_sched_pipe *pipe,
+                                        struct lima_sched_task *task)
+{
+       kthread_park(pipe->base.thread);
+       drm_sched_hw_job_reset(&pipe->base, &task->base);
+
+       pipe->task_error(pipe);
+
+       if (pipe->bcast_mmu)
+               lima_mmu_page_fault_resume(pipe->bcast_mmu);
+       else {
+               int i;
+               for (i = 0; i < pipe->num_mmu; i++)
+                       lima_mmu_page_fault_resume(pipe->mmu[i]);
+       }
+
+       if (pipe->current_vm)
+               lima_vm_put(pipe->current_vm);
+
+       pipe->current_vm = NULL;
+       pipe->current_task = NULL;
+
+       drm_sched_job_recovery(&pipe->base);
+       kthread_unpark(pipe->base.thread);
+}
+
+static void lima_sched_timedout_job(struct drm_sched_job *job)
+{
+       struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
+       struct lima_sched_task *task = to_lima_task(job);
+
+       lima_sched_handle_error_task(pipe, task);
+}
+
+static void lima_sched_free_job(struct drm_sched_job *job)
+{
+       struct lima_sched_task *task = to_lima_task(job);
+       struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
+       int i;
+
+       dma_fence_put(task->fence);
+
+       for (i = 0; i < task->num_dep; i++) {
+               if (task->dep[i])
+                       dma_fence_put(task->dep[i]);
+       }
+
+       if (task->dep)
+               kfree(task->dep);
+
+       lima_vm_put(task->vm);
+       kmem_cache_free(pipe->task_slab, task);
+}
+
+const struct drm_sched_backend_ops lima_sched_ops = {
+       .dependency = lima_sched_dependency,
+       .run_job = lima_sched_run_job,
+       .timedout_job = lima_sched_timedout_job,
+       .free_job = lima_sched_free_job,
+};
+
+static void lima_sched_error_work(struct work_struct *work)
+{
+       struct lima_sched_pipe *pipe =
+               container_of(work, struct lima_sched_pipe, error_work);
+       struct lima_sched_task *task = pipe->current_task;
+
+       lima_sched_handle_error_task(pipe, task);
+}
+
+int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
+{
+       long timeout;
+
+       if (lima_sched_timeout_ms <= 0)
+               timeout = MAX_SCHEDULE_TIMEOUT;
+       else
+               timeout = msecs_to_jiffies(lima_sched_timeout_ms);
+
+       pipe->fence_context = dma_fence_context_alloc(1);
+       spin_lock_init(&pipe->fence_lock);
+
+       INIT_WORK(&pipe->error_work, lima_sched_error_work);
+
+       return drm_sched_init(&pipe->base, &lima_sched_ops, 1, 0, timeout, 
name);
+}
+
+void lima_sched_pipe_fini(struct lima_sched_pipe *pipe)
+{
+       drm_sched_fini(&pipe->base);
+}
+
+unsigned long lima_timeout_to_jiffies(u64 timeout_ns)
+{
+       unsigned long timeout_jiffies;
+       ktime_t timeout;
+
+       /* clamp timeout if it's to large */
+       if (((s64)timeout_ns) < 0)
+               return MAX_SCHEDULE_TIMEOUT;
+
+       timeout = ktime_sub(ns_to_ktime(timeout_ns), ktime_get());
+       if (ktime_to_ns(timeout) < 0)
+               return 0;
+
+       timeout_jiffies = nsecs_to_jiffies(ktime_to_ns(timeout));
+       /*  clamp timeout to avoid unsigned-> signed overflow */
+       if (timeout_jiffies > MAX_SCHEDULE_TIMEOUT )
+               return MAX_SCHEDULE_TIMEOUT;
+
+       return timeout_jiffies;
+}
+
+void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe)
+{
+       if (pipe->error)
+               schedule_work(&pipe->error_work);
+       else {
+               struct lima_sched_task *task = pipe->current_task;
+
+               pipe->task_fini(pipe);
+               dma_fence_signal(task->fence);
+       }
+}
diff --git a/drivers/gpu/drm/lima/lima_sched.h 
b/drivers/gpu/drm/lima/lima_sched.h
new file mode 100644
index 000000000000..b93b7b4eded4
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_sched.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2017-2018 Lima Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __LIMA_SCHED_H__
+#define __LIMA_SCHED_H__
+
+#include <drm/gpu_scheduler.h>
+
+struct lima_vm;
+
+struct lima_sched_task {
+       struct drm_sched_job base;
+
+       struct lima_vm *vm;
+       void *frame;
+
+       struct dma_fence **dep;
+       int num_dep;
+       int max_dep;
+
+       /* pipe fence */
+       struct dma_fence *fence;
+};
+
+struct lima_sched_context {
+       struct drm_sched_entity base;
+       struct mutex lock;
+       struct dma_fence **fences;
+       uint32_t sequence;
+};
+
+#define LIMA_SCHED_PIPE_MAX_MMU       8
+#define LIMA_SCHED_PIPE_MAX_L2_CACHE  2
+#define LIMA_SCHED_PIPE_MAX_PROCESSOR 8
+
+struct lima_ip;
+
+struct lima_sched_pipe {
+       struct drm_gpu_scheduler base;
+
+       u64 fence_context;
+       u32 fence_seqno;
+       spinlock_t fence_lock;
+
+       struct lima_sched_task *current_task;
+       struct lima_vm *current_vm;
+
+       struct lima_ip *mmu[LIMA_SCHED_PIPE_MAX_MMU];
+       int num_mmu;
+
+       struct lima_ip *l2_cache[LIMA_SCHED_PIPE_MAX_L2_CACHE];
+       int num_l2_cache;
+
+       struct lima_ip *processor[LIMA_SCHED_PIPE_MAX_PROCESSOR];
+       int num_processor;
+
+       struct lima_ip *bcast_processor;
+       struct lima_ip *bcast_mmu;
+
+       u32 done;
+       bool error;
+       atomic_t task;
+
+       int frame_size;
+       struct kmem_cache *task_slab;
+
+       int (*task_validate)(struct lima_sched_pipe *pipe, struct 
lima_sched_task *task);
+       void (*task_run)(struct lima_sched_pipe *pipe, struct lima_sched_task 
*task);
+       void (*task_fini)(struct lima_sched_pipe *pipe);
+       void (*task_error)(struct lima_sched_pipe *pipe);
+       void (*task_mmu_error)(struct lima_sched_pipe *pipe);
+
+       struct work_struct error_work;
+};
+
+int lima_sched_task_init(struct lima_sched_task *task,
+                        struct lima_sched_context *context,
+                        struct lima_vm *vm);
+void lima_sched_task_fini(struct lima_sched_task *task);
+int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence 
*fence);
+
+int lima_sched_context_init(struct lima_sched_pipe *pipe,
+                           struct lima_sched_context *context,
+                           atomic_t *guilty);
+void lima_sched_context_fini(struct lima_sched_pipe *pipe,
+                            struct lima_sched_context *context);
+uint32_t lima_sched_context_queue_task(struct lima_sched_context *context,
+                                      struct lima_sched_task *task,
+                                      uint32_t *done);
+struct dma_fence *lima_sched_context_get_fence(
+       struct lima_sched_context *context, uint32_t seq);
+
+int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name);
+void lima_sched_pipe_fini(struct lima_sched_pipe *pipe);
+void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe);
+
+static inline void lima_sched_pipe_mmu_error(struct lima_sched_pipe *pipe)
+{
+       pipe->error = true;
+       pipe->task_mmu_error(pipe);
+}
+
+int lima_sched_slab_init(void);
+void lima_sched_slab_fini(void);
+
+unsigned long lima_timeout_to_jiffies(u64 timeout_ns);
+
+#endif
-- 
2.17.0

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH RFC 22/24] drm/lima: add GPU schedule using DRM_SCHED

Reply via email to