To implement fair scheduling we need a view into the GPU time consumed by entities. Problem we have is that jobs and entities objects have decoupled lifetimes, where at the point we have a view into accurate GPU time, we cannot link back to the entity any longer.
Solve this by adding a light weight entity stats object which is reference counted by both entity and the job and hence can safely be used from either side. With that, the only other thing we need is to add a helper for adding the job's GPU time into the respective entity stats object, and call it once the accurate GPU time has been calculated. The most convenient place to do that is the free job worker for several reasons. Doing the accounting from the job completion callback would mean a few locks would need to become irq safe and we would also need to worry about out of order completions (via dma_fence_is_signaled calls which we cannot control). In-order completions are critical for GPU time accuracy which is currently adjusted per fence in the free worker and requires looking at the next job in the scheduler pending list. We would also need to add a new lock to protect the scheduler average stats update. In contrast to those complications, having the accounting done from the free worker is serialized by definition and all the above complications are avoided. Downside is there is potential for a time lag between job completions and GPU time being accounted against the entity. Since that is partly alleviated by batch processing the completed job queue, and the scheduling algorithm does not attempt to be completely fair, which would even be rather impossible to achieve in the GPU world with the current DRM scheduler design and hardware with no or poor preemption support, this downside is not considered critical. Plus, in practice the scheduler is also affected by worker scheduling delays from other angles too. Not least being able to promptly feed the GPU with new work. We therefore choose the simple option and can later consider improving upon it if the need arises. Signed-off-by: Tvrtko Ursulin <[email protected]> Cc: Christian König <[email protected]> Cc: Danilo Krummrich <[email protected]> Cc: Matthew Brost <[email protected]> Cc: Philipp Stanner <[email protected]> --- drivers/gpu/drm/scheduler/sched_entity.c | 58 ++++++++++++++++++++++ drivers/gpu/drm/scheduler/sched_internal.h | 48 ++++++++++++++++++ drivers/gpu/drm/scheduler/sched_main.c | 6 ++- include/drm/gpu_scheduler.h | 12 +++++ 4 files changed, 123 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 768f11510129..591cb9f82608 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -32,6 +32,58 @@ #include "gpu_scheduler_trace.h" + +/** + * drm_sched_entity_stats_release - Entity stats kref release function + * @kref: Entity stats embedded kref pointer + */ +void drm_sched_entity_stats_release(struct kref *kref) +{ + struct drm_sched_entity_stats *stats = + container_of(kref, typeof(*stats), kref); + + kfree(stats); +} + +/** + * drm_sched_entity_stats_new - Allocate a new struct drm_sched_entity_stats object + * + * Return: Pointer to newly allocated struct drm_sched_entity_stats object. + */ +static struct drm_sched_entity_stats *drm_sched_entity_stats_new(void) +{ + struct drm_sched_entity_stats *stats; + + stats = kzalloc(sizeof(*stats), GFP_KERNEL); + if (!stats) + return NULL; + + kref_init(&stats->kref); + spin_lock_init(&stats->lock); + + return stats; +} + +/** + * drm_sched_entity_stats_job_add_gpu_time - Account job execution time to entity + * @job: Scheduler job to account. + * + * Accounts the execution time of @job to its respective entity stats object. + */ +void drm_sched_entity_stats_job_add_gpu_time(struct drm_sched_job *job) +{ + struct drm_sched_entity_stats *stats = job->entity_stats; + struct drm_sched_fence *s_fence = job->s_fence; + ktime_t start, end; + + start = dma_fence_timestamp(&s_fence->scheduled); + end = dma_fence_timestamp(&s_fence->finished); + + spin_lock(&stats->lock); + stats->runtime = ktime_add(stats->runtime, ktime_sub(end, start)); + spin_unlock(&stats->lock); +} + /** * drm_sched_entity_init - Init a context entity used by scheduler when * submit to HW ring. @@ -65,6 +117,11 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, return -EINVAL; memset(entity, 0, sizeof(struct drm_sched_entity)); + + entity->stats = drm_sched_entity_stats_new(); + if (!entity->stats) + return -ENOMEM; + INIT_LIST_HEAD(&entity->list); entity->rq = NULL; entity->guilty = guilty; @@ -338,6 +395,7 @@ void drm_sched_entity_fini(struct drm_sched_entity *entity) dma_fence_put(rcu_dereference_check(entity->last_scheduled, true)); RCU_INIT_POINTER(entity->last_scheduled, NULL); + drm_sched_entity_stats_put(entity->stats); } EXPORT_SYMBOL(drm_sched_entity_fini); diff --git a/drivers/gpu/drm/scheduler/sched_internal.h b/drivers/gpu/drm/scheduler/sched_internal.h index b683cf813469..be5d99c641c5 100644 --- a/drivers/gpu/drm/scheduler/sched_internal.h +++ b/drivers/gpu/drm/scheduler/sched_internal.h @@ -3,6 +3,26 @@ #ifndef _DRM_GPU_SCHEDULER_INTERNAL_H_ #define _DRM_GPU_SCHEDULER_INTERNAL_H_ +#include <linux/ktime.h> +#include <linux/kref.h> +#include <linux/spinlock.h> + +/** + * struct drm_sched_entity_stats - execution stats for an entity. + * @kref: reference count for the object. + * @lock: lock guarding the @runtime updates. + * @runtime: time entity spent on the GPU. + * + * Because jobs and entities have decoupled lifetimes, ie. we cannot access the + * entity once the job has been de-queued, and we do need know how much GPU time + * each entity has spent, we need to track this in a separate object which is + * reference counted by both entities and jobs. + */ +struct drm_sched_entity_stats { + struct kref kref; + spinlock_t lock; + ktime_t runtime; +}; /* Used to choose between FIFO and RR job-scheduling */ extern int drm_sched_policy; @@ -95,4 +115,32 @@ drm_sched_entity_is_ready(struct drm_sched_entity *entity) return true; } +void drm_sched_entity_stats_release(struct kref *kref); + +/** + * drm_sched_entity_stats_get - Obtain a reference count on &struct drm_sched_entity_stats object + * @stats: struct drm_sched_entity_stats pointer + * + * Return: struct drm_sched_entity_stats pointer + */ +static inline struct drm_sched_entity_stats * +drm_sched_entity_stats_get(struct drm_sched_entity_stats *stats) +{ + kref_get(&stats->kref); + + return stats; +} + +/** + * drm_sched_entity_stats_put - Release a reference count on &struct drm_sched_entity_stats object + * @stats: struct drm_sched_entity_stats pointer + */ +static inline void +drm_sched_entity_stats_put(struct drm_sched_entity_stats *stats) +{ + kref_put(&stats->kref, drm_sched_entity_stats_release); +} + +void drm_sched_entity_stats_job_add_gpu_time(struct drm_sched_job *job); + #endif diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index f825ad9e2260..4c10c7ba6704 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -660,6 +660,7 @@ void drm_sched_job_arm(struct drm_sched_job *job) job->sched = sched; job->s_priority = entity->priority; + job->entity_stats = drm_sched_entity_stats_get(entity->stats); drm_sched_fence_init(job->s_fence, job->entity); } @@ -849,6 +850,7 @@ void drm_sched_job_cleanup(struct drm_sched_job *job) * been called. */ dma_fence_put(&job->s_fence->finished); + drm_sched_entity_stats_put(job->entity_stats); } else { /* The job was aborted before it has been committed to be run; * notably, drm_sched_job_arm() has not been called. @@ -1000,8 +1002,10 @@ static void drm_sched_free_job_work(struct work_struct *w) container_of(w, struct drm_gpu_scheduler, work_free_job); struct drm_sched_job *job; - while ((job = drm_sched_get_finished_job(sched))) + while ((job = drm_sched_get_finished_job(sched))) { + drm_sched_entity_stats_job_add_gpu_time(job); sched->ops->free_job(job); + } drm_sched_run_job_queue(sched); } diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 2b3eaf623f68..bf31c4c58a4c 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -71,6 +71,8 @@ enum drm_sched_priority { DRM_SCHED_PRIORITY_COUNT }; +struct drm_sched_entity_stats; + /** * struct drm_sched_entity - A wrapper around a job queue (typically * attached to the DRM file_priv). @@ -110,6 +112,11 @@ struct drm_sched_entity { */ struct drm_sched_rq *rq; + /** + * @stats: Stats object reference held by the entity and jobs. + */ + struct drm_sched_entity_stats *stats; + /** * @sched_list: * @@ -365,6 +372,11 @@ struct drm_sched_job { struct drm_sched_fence *s_fence; struct drm_sched_entity *entity; + /** + * @entity_stats: Stats object reference held by the job and entity. + */ + struct drm_sched_entity_stats *entity_stats; + enum drm_sched_priority s_priority; u32 credits; /** @last_dependency: tracks @dependencies as they signal */ -- 2.52.0
