[Intel-gfx] [PATCH 5/6] drm/i915: Split i915_gem_timeline into individual timelines
We need to move to a more flexible timeline that doesn't assume one fence context per engine, and so allow for a single timeline to be used across a combination of engines. This means that preallocating a fence context per engine is now a hindrance, and so we want to introduce the singular timeline. From the code perspective, this has the notable advantage of clearing up a lot of mirky semantics and some clumsy pointer chasing. By splitting the timeline up into a single entity rather than an array of per-engine timelines, we can realise the goal of the previous patch of tracking the timeline alongside the ring. v2: Tweak wait_for_idle to stop the compiling thinking that ret may be uninitialised. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin --- drivers/gpu/drm/i915/Makefile | 2 +- drivers/gpu/drm/i915/i915_drv.h | 4 +- drivers/gpu/drm/i915/i915_gem.c | 129 +--- drivers/gpu/drm/i915/i915_gem_context.c | 49 ++--- drivers/gpu/drm/i915/i915_gem_context.h | 2 - drivers/gpu/drm/i915/i915_gem_gtt.h | 3 +- drivers/gpu/drm/i915/i915_gem_timeline.c | 198 -- drivers/gpu/drm/i915/i915_gpu_error.c | 4 +- drivers/gpu/drm/i915/i915_perf.c | 10 +- drivers/gpu/drm/i915/i915_request.c | 68 +++--- drivers/gpu/drm/i915/i915_request.h | 3 +- drivers/gpu/drm/i915/i915_timeline.c | 105 ++ .../{i915_gem_timeline.h => i915_timeline.h} | 67 +++--- drivers/gpu/drm/i915/intel_engine_cs.c| 27 ++- drivers/gpu/drm/i915/intel_guc_submission.c | 4 +- drivers/gpu/drm/i915/intel_lrc.c | 48 +++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 25 ++- drivers/gpu/drm/i915/intel_ringbuffer.h | 11 +- .../{i915_gem_timeline.c => i915_timeline.c} | 94 +++-- drivers/gpu/drm/i915/selftests/mock_engine.c | 32 ++- .../gpu/drm/i915/selftests/mock_gem_device.c | 10 +- .../gpu/drm/i915/selftests/mock_timeline.c| 45 ++-- .../gpu/drm/i915/selftests/mock_timeline.h| 28 +-- 23 files changed, 398 insertions(+), 570 deletions(-) delete mode 100644 drivers/gpu/drm/i915/i915_gem_timeline.c create mode 100644 drivers/gpu/drm/i915/i915_timeline.c rename drivers/gpu/drm/i915/{i915_gem_timeline.h => i915_timeline.h} (68%) rename drivers/gpu/drm/i915/selftests/{i915_gem_timeline.c => i915_timeline.c} (70%) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 9bee52a949a9..120db21fcd50 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -67,11 +67,11 @@ i915-y += i915_cmd_parser.o \ i915_gem_shrinker.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ - i915_gem_timeline.o \ i915_gem_userptr.o \ i915_gemfs.o \ i915_query.o \ i915_request.o \ + i915_timeline.o \ i915_trace_points.o \ i915_vma.o \ intel_breadcrumbs.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b9bd8328f501..dab15b6abc3c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -72,10 +72,10 @@ #include "i915_gem_fence_reg.h" #include "i915_gem_object.h" #include "i915_gem_gtt.h" -#include "i915_gem_timeline.h" #include "i915_gpu_error.h" #include "i915_request.h" #include "i915_scheduler.h" +#include "i915_timeline.h" #include "i915_vma.h" #include "intel_gvt.h" @@ -2058,8 +2058,6 @@ struct drm_i915_private { void (*resume)(struct drm_i915_private *); void (*cleanup_engine)(struct intel_engine_cs *engine); - struct i915_gem_timeline execution_timeline; - struct i915_gem_timeline legacy_timeline; struct list_head timelines; struct list_head active_rings; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d60f3bd4bc66..44cf67f713c7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -162,7 +162,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915) synchronize_irq(i915->drm.irq); intel_engines_park(i915); - i915_gem_timelines_park(i915); + i915_timelines_park(i915); i915_pmu_gt_parked(i915); @@ -2977,8 +2977,8 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) * extra delay for a recent interrupt is pointless. Hence, we do * not need an engine->irq_seqno_barrier() before the seqno reads. */ - spin_lock_irqsave(&engine->timeline->lock, flags); - list_for_each_entry(request, &engine->timeline->requests, link) { + spin_lock_irqsave(&engine->timeline.lock, flags); + list_for_each_entry(request, &engine->timeline.requests, link) { if (__i915_request_completed(request, request->global_seqno)) contin
Re: [Intel-gfx] [PATCH 5/6] drm/i915: Split i915_gem_timeline into individual timelines
Quoting Tvrtko Ursulin (2018-04-23 13:33:04) > > On 23/04/2018 11:13, Chris Wilson wrote: > > We need to move to a more flexible timeline that doesn't assume one > > fence context per engine, and so allow for a single timeline to be used > > across a combination of engines. This means that preallocating a fence > > context per engine is now a hindrance, and so we want to introduce the > > singular timeline. From the code perspective, this has the notable > > advantage of clearing up a lot of mirky semantics and some clumsy > > pointer chasing. > > > > By splitting the timeline up into a single entity rather than an array > > of per-engine timelines, we can realise the goal of the previous patch > > of tracking the timeline alongside the ring. > > Isn't single fence context and a single seqno space breaking the ABI? > Submissions from a context are now serialized across all engines. I am > thinking about await and dependency created in __i915_add_request to > timeline->last_request. It's still one per engine, the ABI shouldn't have changed unless I've screwed up. I now I wrote some tests to assert the independence... But probably only have those for the new ABI we were discussing. I guess I should make sure gem_exec_schedule is covering it. -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 5/6] drm/i915: Split i915_gem_timeline into individual timelines
On 23/04/2018 11:13, Chris Wilson wrote: We need to move to a more flexible timeline that doesn't assume one fence context per engine, and so allow for a single timeline to be used across a combination of engines. This means that preallocating a fence context per engine is now a hindrance, and so we want to introduce the singular timeline. From the code perspective, this has the notable advantage of clearing up a lot of mirky semantics and some clumsy pointer chasing. By splitting the timeline up into a single entity rather than an array of per-engine timelines, we can realise the goal of the previous patch of tracking the timeline alongside the ring. Isn't single fence context and a single seqno space breaking the ABI? Submissions from a context are now serialized across all engines. I am thinking about await and dependency created in __i915_add_request to timeline->last_request. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin --- drivers/gpu/drm/i915/Makefile | 2 +- drivers/gpu/drm/i915/i915_drv.h | 4 +- drivers/gpu/drm/i915/i915_gem.c | 117 +-- drivers/gpu/drm/i915/i915_gem_context.c | 49 ++--- drivers/gpu/drm/i915/i915_gem_context.h | 2 - drivers/gpu/drm/i915/i915_gem_gtt.h | 3 +- drivers/gpu/drm/i915/i915_gem_timeline.c | 198 -- drivers/gpu/drm/i915/i915_gpu_error.c | 4 +- drivers/gpu/drm/i915/i915_perf.c | 10 +- drivers/gpu/drm/i915/i915_request.c | 65 +++--- drivers/gpu/drm/i915/i915_request.h | 3 +- drivers/gpu/drm/i915/i915_timeline.c | 105 ++ .../{i915_gem_timeline.h => i915_timeline.h} | 67 +++--- drivers/gpu/drm/i915/intel_engine_cs.c| 27 ++- drivers/gpu/drm/i915/intel_guc_submission.c | 4 +- drivers/gpu/drm/i915/intel_lrc.c | 48 +++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 23 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 11 +- .../{i915_gem_timeline.c => i915_timeline.c} | 94 +++-- drivers/gpu/drm/i915/selftests/mock_engine.c | 32 ++- .../gpu/drm/i915/selftests/mock_gem_device.c | 11 +- .../gpu/drm/i915/selftests/mock_timeline.c| 45 ++-- .../gpu/drm/i915/selftests/mock_timeline.h| 28 +-- 23 files changed, 389 insertions(+), 563 deletions(-) delete mode 100644 drivers/gpu/drm/i915/i915_gem_timeline.c create mode 100644 drivers/gpu/drm/i915/i915_timeline.c rename drivers/gpu/drm/i915/{i915_gem_timeline.h => i915_timeline.h} (68%) rename drivers/gpu/drm/i915/selftests/{i915_gem_timeline.c => i915_timeline.c} (70%) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 9bee52a949a9..120db21fcd50 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -67,11 +67,11 @@ i915-y += i915_cmd_parser.o \ i915_gem_shrinker.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ - i915_gem_timeline.o \ i915_gem_userptr.o \ i915_gemfs.o \ i915_query.o \ i915_request.o \ + i915_timeline.o \ i915_trace_points.o \ i915_vma.o \ intel_breadcrumbs.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 66123cf0eda3..89cb74c30a00 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -72,10 +72,10 @@ #include "i915_gem_fence_reg.h" #include "i915_gem_object.h" #include "i915_gem_gtt.h" -#include "i915_gem_timeline.h" #include "i915_gpu_error.h" #include "i915_request.h" #include "i915_scheduler.h" +#include "i915_timeline.h" #include "i915_vma.h" #include "intel_gvt.h" @@ -2058,8 +2058,6 @@ struct drm_i915_private { void (*resume)(struct drm_i915_private *); void (*cleanup_engine)(struct intel_engine_cs *engine); - struct i915_gem_timeline execution_timeline; - struct i915_gem_timeline legacy_timeline; struct list_head timelines; struct list_head live_rings; u32 active_requests; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1635975dbc16..f07556693cfe 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -162,7 +162,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915) synchronize_irq(i915->drm.irq); intel_engines_park(i915); - i915_gem_timelines_park(i915); + i915_timelines_park(i915); i915_pmu_gt_parked(i915); @@ -2977,8 +2977,8 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) * extra delay for a recent interrupt is pointless. Hence, we do * not need an engine->irq_seqno_barrier() before the seqno reads. */ - spin_lock_irqsave(&engine->timeline->lock, flags); - list_for_each_entry(request, &engine->timeline->reques
[Intel-gfx] [PATCH 5/6] drm/i915: Split i915_gem_timeline into individual timelines
We need to move to a more flexible timeline that doesn't assume one fence context per engine, and so allow for a single timeline to be used across a combination of engines. This means that preallocating a fence context per engine is now a hindrance, and so we want to introduce the singular timeline. From the code perspective, this has the notable advantage of clearing up a lot of mirky semantics and some clumsy pointer chasing. By splitting the timeline up into a single entity rather than an array of per-engine timelines, we can realise the goal of the previous patch of tracking the timeline alongside the ring. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin --- drivers/gpu/drm/i915/Makefile | 2 +- drivers/gpu/drm/i915/i915_drv.h | 4 +- drivers/gpu/drm/i915/i915_gem.c | 117 +-- drivers/gpu/drm/i915/i915_gem_context.c | 49 ++--- drivers/gpu/drm/i915/i915_gem_context.h | 2 - drivers/gpu/drm/i915/i915_gem_gtt.h | 3 +- drivers/gpu/drm/i915/i915_gem_timeline.c | 198 -- drivers/gpu/drm/i915/i915_gpu_error.c | 4 +- drivers/gpu/drm/i915/i915_perf.c | 10 +- drivers/gpu/drm/i915/i915_request.c | 65 +++--- drivers/gpu/drm/i915/i915_request.h | 3 +- drivers/gpu/drm/i915/i915_timeline.c | 105 ++ .../{i915_gem_timeline.h => i915_timeline.h} | 67 +++--- drivers/gpu/drm/i915/intel_engine_cs.c| 27 ++- drivers/gpu/drm/i915/intel_guc_submission.c | 4 +- drivers/gpu/drm/i915/intel_lrc.c | 48 +++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 23 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 11 +- .../{i915_gem_timeline.c => i915_timeline.c} | 94 +++-- drivers/gpu/drm/i915/selftests/mock_engine.c | 32 ++- .../gpu/drm/i915/selftests/mock_gem_device.c | 11 +- .../gpu/drm/i915/selftests/mock_timeline.c| 45 ++-- .../gpu/drm/i915/selftests/mock_timeline.h| 28 +-- 23 files changed, 389 insertions(+), 563 deletions(-) delete mode 100644 drivers/gpu/drm/i915/i915_gem_timeline.c create mode 100644 drivers/gpu/drm/i915/i915_timeline.c rename drivers/gpu/drm/i915/{i915_gem_timeline.h => i915_timeline.h} (68%) rename drivers/gpu/drm/i915/selftests/{i915_gem_timeline.c => i915_timeline.c} (70%) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 9bee52a949a9..120db21fcd50 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -67,11 +67,11 @@ i915-y += i915_cmd_parser.o \ i915_gem_shrinker.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ - i915_gem_timeline.o \ i915_gem_userptr.o \ i915_gemfs.o \ i915_query.o \ i915_request.o \ + i915_timeline.o \ i915_trace_points.o \ i915_vma.o \ intel_breadcrumbs.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 66123cf0eda3..89cb74c30a00 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -72,10 +72,10 @@ #include "i915_gem_fence_reg.h" #include "i915_gem_object.h" #include "i915_gem_gtt.h" -#include "i915_gem_timeline.h" #include "i915_gpu_error.h" #include "i915_request.h" #include "i915_scheduler.h" +#include "i915_timeline.h" #include "i915_vma.h" #include "intel_gvt.h" @@ -2058,8 +2058,6 @@ struct drm_i915_private { void (*resume)(struct drm_i915_private *); void (*cleanup_engine)(struct intel_engine_cs *engine); - struct i915_gem_timeline execution_timeline; - struct i915_gem_timeline legacy_timeline; struct list_head timelines; struct list_head live_rings; u32 active_requests; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1635975dbc16..f07556693cfe 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -162,7 +162,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915) synchronize_irq(i915->drm.irq); intel_engines_park(i915); - i915_gem_timelines_park(i915); + i915_timelines_park(i915); i915_pmu_gt_parked(i915); @@ -2977,8 +2977,8 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) * extra delay for a recent interrupt is pointless. Hence, we do * not need an engine->irq_seqno_barrier() before the seqno reads. */ - spin_lock_irqsave(&engine->timeline->lock, flags); - list_for_each_entry(request, &engine->timeline->requests, link) { + spin_lock_irqsave(&engine->timeline.lock, flags); + list_for_each_entry(request, &engine->timeline.requests, link) { if (__i915_request_completed(request, request->global_seqno)) continue; @@ -2989,7 +2989,7 @@ i915_gem_find_active_request