[Intel-gfx] [PATCH 5/6] drm/i915: Split i915_gem_timeline into individual timelines

2018-04-24 Thread Chris Wilson
We need to move to a more flexible timeline that doesn't assume one
fence context per engine, and so allow for a single timeline to be used
across a combination of engines. This means that preallocating a fence
context per engine is now a hindrance, and so we want to introduce the
singular timeline. From the code perspective, this has the notable
advantage of clearing up a lot of mirky semantics and some clumsy
pointer chasing.

By splitting the timeline up into a single entity rather than an array
of per-engine timelines, we can realise the goal of the previous patch
of tracking the timeline alongside the ring.

v2: Tweak wait_for_idle to stop the compiling thinking that ret may be
uninitialised.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/Makefile |   2 +-
 drivers/gpu/drm/i915/i915_drv.h   |   4 +-
 drivers/gpu/drm/i915/i915_gem.c   | 129 +---
 drivers/gpu/drm/i915/i915_gem_context.c   |  49 ++---
 drivers/gpu/drm/i915/i915_gem_context.h   |   2 -
 drivers/gpu/drm/i915/i915_gem_gtt.h   |   3 +-
 drivers/gpu/drm/i915/i915_gem_timeline.c  | 198 --
 drivers/gpu/drm/i915/i915_gpu_error.c |   4 +-
 drivers/gpu/drm/i915/i915_perf.c  |  10 +-
 drivers/gpu/drm/i915/i915_request.c   |  68 +++---
 drivers/gpu/drm/i915/i915_request.h   |   3 +-
 drivers/gpu/drm/i915/i915_timeline.c  | 105 ++
 .../{i915_gem_timeline.h => i915_timeline.h}  |  67 +++---
 drivers/gpu/drm/i915/intel_engine_cs.c|  27 ++-
 drivers/gpu/drm/i915/intel_guc_submission.c   |   4 +-
 drivers/gpu/drm/i915/intel_lrc.c  |  48 +++--
 drivers/gpu/drm/i915/intel_ringbuffer.c   |  25 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.h   |  11 +-
 .../{i915_gem_timeline.c => i915_timeline.c}  |  94 +++--
 drivers/gpu/drm/i915/selftests/mock_engine.c  |  32 ++-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  10 +-
 .../gpu/drm/i915/selftests/mock_timeline.c|  45 ++--
 .../gpu/drm/i915/selftests/mock_timeline.h|  28 +--
 23 files changed, 398 insertions(+), 570 deletions(-)
 delete mode 100644 drivers/gpu/drm/i915/i915_gem_timeline.c
 create mode 100644 drivers/gpu/drm/i915/i915_timeline.c
 rename drivers/gpu/drm/i915/{i915_gem_timeline.h => i915_timeline.h} (68%)
 rename drivers/gpu/drm/i915/selftests/{i915_gem_timeline.c => i915_timeline.c} 
(70%)

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 9bee52a949a9..120db21fcd50 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -67,11 +67,11 @@ i915-y += i915_cmd_parser.o \
  i915_gem_shrinker.o \
  i915_gem_stolen.o \
  i915_gem_tiling.o \
- i915_gem_timeline.o \
  i915_gem_userptr.o \
  i915_gemfs.o \
  i915_query.o \
  i915_request.o \
+ i915_timeline.o \
  i915_trace_points.o \
  i915_vma.o \
  intel_breadcrumbs.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b9bd8328f501..dab15b6abc3c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -72,10 +72,10 @@
 #include "i915_gem_fence_reg.h"
 #include "i915_gem_object.h"
 #include "i915_gem_gtt.h"
-#include "i915_gem_timeline.h"
 #include "i915_gpu_error.h"
 #include "i915_request.h"
 #include "i915_scheduler.h"
+#include "i915_timeline.h"
 #include "i915_vma.h"
 
 #include "intel_gvt.h"
@@ -2058,8 +2058,6 @@ struct drm_i915_private {
void (*resume)(struct drm_i915_private *);
void (*cleanup_engine)(struct intel_engine_cs *engine);
 
-   struct i915_gem_timeline execution_timeline;
-   struct i915_gem_timeline legacy_timeline;
struct list_head timelines;
 
struct list_head active_rings;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d60f3bd4bc66..44cf67f713c7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -162,7 +162,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
synchronize_irq(i915->drm.irq);
 
intel_engines_park(i915);
-   i915_gem_timelines_park(i915);
+   i915_timelines_park(i915);
 
i915_pmu_gt_parked(i915);
 
@@ -2977,8 +2977,8 @@ i915_gem_find_active_request(struct intel_engine_cs 
*engine)
 * extra delay for a recent interrupt is pointless. Hence, we do
 * not need an engine->irq_seqno_barrier() before the seqno reads.
 */
-   spin_lock_irqsave(>timeline->lock, flags);
-   list_for_each_entry(request, >timeline->requests, link) {
+   spin_lock_irqsave(>timeline.lock, flags);
+   list_for_each_entry(request, >timeline.requests, link) {
if (__i915_request_completed(request, request->global_seqno))
  

Re: [Intel-gfx] [PATCH 5/6] drm/i915: Split i915_gem_timeline into individual timelines

2018-04-23 Thread Chris Wilson
Quoting Tvrtko Ursulin (2018-04-23 13:33:04)
> 
> On 23/04/2018 11:13, Chris Wilson wrote:
> > We need to move to a more flexible timeline that doesn't assume one
> > fence context per engine, and so allow for a single timeline to be used
> > across a combination of engines. This means that preallocating a fence
> > context per engine is now a hindrance, and so we want to introduce the
> > singular timeline. From the code perspective, this has the notable
> > advantage of clearing up a lot of mirky semantics and some clumsy
> > pointer chasing.
> > 
> > By splitting the timeline up into a single entity rather than an array
> > of per-engine timelines, we can realise the goal of the previous patch
> > of tracking the timeline alongside the ring.
> 
> Isn't single fence context and a single seqno space breaking the ABI? 
> Submissions from a context are now serialized across all engines. I am 
> thinking about await and dependency created in __i915_add_request to 
> timeline->last_request.

It's still one per engine, the ABI shouldn't have changed unless I've
screwed up. I now I wrote some tests to assert the independence... But
probably only have those for the new ABI we were discussing.

I guess I should make sure gem_exec_schedule is covering it.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 5/6] drm/i915: Split i915_gem_timeline into individual timelines

2018-04-23 Thread Tvrtko Ursulin


On 23/04/2018 11:13, Chris Wilson wrote:

We need to move to a more flexible timeline that doesn't assume one
fence context per engine, and so allow for a single timeline to be used
across a combination of engines. This means that preallocating a fence
context per engine is now a hindrance, and so we want to introduce the
singular timeline. From the code perspective, this has the notable
advantage of clearing up a lot of mirky semantics and some clumsy
pointer chasing.

By splitting the timeline up into a single entity rather than an array
of per-engine timelines, we can realise the goal of the previous patch
of tracking the timeline alongside the ring.


Isn't single fence context and a single seqno space breaking the ABI? 
Submissions from a context are now serialized across all engines. I am 
thinking about await and dependency created in __i915_add_request to 
timeline->last_request.




Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
  drivers/gpu/drm/i915/Makefile |   2 +-
  drivers/gpu/drm/i915/i915_drv.h   |   4 +-
  drivers/gpu/drm/i915/i915_gem.c   | 117 +--
  drivers/gpu/drm/i915/i915_gem_context.c   |  49 ++---
  drivers/gpu/drm/i915/i915_gem_context.h   |   2 -
  drivers/gpu/drm/i915/i915_gem_gtt.h   |   3 +-
  drivers/gpu/drm/i915/i915_gem_timeline.c  | 198 --
  drivers/gpu/drm/i915/i915_gpu_error.c |   4 +-
  drivers/gpu/drm/i915/i915_perf.c  |  10 +-
  drivers/gpu/drm/i915/i915_request.c   |  65 +++---
  drivers/gpu/drm/i915/i915_request.h   |   3 +-
  drivers/gpu/drm/i915/i915_timeline.c  | 105 ++
  .../{i915_gem_timeline.h => i915_timeline.h}  |  67 +++---
  drivers/gpu/drm/i915/intel_engine_cs.c|  27 ++-
  drivers/gpu/drm/i915/intel_guc_submission.c   |   4 +-
  drivers/gpu/drm/i915/intel_lrc.c  |  48 +++--
  drivers/gpu/drm/i915/intel_ringbuffer.c   |  23 +-
  drivers/gpu/drm/i915/intel_ringbuffer.h   |  11 +-
  .../{i915_gem_timeline.c => i915_timeline.c}  |  94 +++--
  drivers/gpu/drm/i915/selftests/mock_engine.c  |  32 ++-
  .../gpu/drm/i915/selftests/mock_gem_device.c  |  11 +-
  .../gpu/drm/i915/selftests/mock_timeline.c|  45 ++--
  .../gpu/drm/i915/selftests/mock_timeline.h|  28 +--
  23 files changed, 389 insertions(+), 563 deletions(-)
  delete mode 100644 drivers/gpu/drm/i915/i915_gem_timeline.c
  create mode 100644 drivers/gpu/drm/i915/i915_timeline.c
  rename drivers/gpu/drm/i915/{i915_gem_timeline.h => i915_timeline.h} (68%)
  rename drivers/gpu/drm/i915/selftests/{i915_gem_timeline.c => 
i915_timeline.c} (70%)

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 9bee52a949a9..120db21fcd50 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -67,11 +67,11 @@ i915-y += i915_cmd_parser.o \
  i915_gem_shrinker.o \
  i915_gem_stolen.o \
  i915_gem_tiling.o \
- i915_gem_timeline.o \
  i915_gem_userptr.o \
  i915_gemfs.o \
  i915_query.o \
  i915_request.o \
+ i915_timeline.o \
  i915_trace_points.o \
  i915_vma.o \
  intel_breadcrumbs.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 66123cf0eda3..89cb74c30a00 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -72,10 +72,10 @@
  #include "i915_gem_fence_reg.h"
  #include "i915_gem_object.h"
  #include "i915_gem_gtt.h"
-#include "i915_gem_timeline.h"
  #include "i915_gpu_error.h"
  #include "i915_request.h"
  #include "i915_scheduler.h"
+#include "i915_timeline.h"
  #include "i915_vma.h"
  
  #include "intel_gvt.h"

@@ -2058,8 +2058,6 @@ struct drm_i915_private {
void (*resume)(struct drm_i915_private *);
void (*cleanup_engine)(struct intel_engine_cs *engine);
  
-		struct i915_gem_timeline execution_timeline;

-   struct i915_gem_timeline legacy_timeline;
struct list_head timelines;
struct list_head live_rings;
u32 active_requests;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1635975dbc16..f07556693cfe 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -162,7 +162,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
synchronize_irq(i915->drm.irq);
  
  	intel_engines_park(i915);

-   i915_gem_timelines_park(i915);
+   i915_timelines_park(i915);
  
  	i915_pmu_gt_parked(i915);
  
@@ -2977,8 +2977,8 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)

 * extra delay for a recent interrupt is pointless. Hence, we do
 * not need an engine->irq_seqno_barrier() before the seqno reads.
 */
-   spin_lock_irqsave(>timeline->lock, flags);
-   

[Intel-gfx] [PATCH 5/6] drm/i915: Split i915_gem_timeline into individual timelines

2018-04-23 Thread Chris Wilson
We need to move to a more flexible timeline that doesn't assume one
fence context per engine, and so allow for a single timeline to be used
across a combination of engines. This means that preallocating a fence
context per engine is now a hindrance, and so we want to introduce the
singular timeline. From the code perspective, this has the notable
advantage of clearing up a lot of mirky semantics and some clumsy
pointer chasing.

By splitting the timeline up into a single entity rather than an array
of per-engine timelines, we can realise the goal of the previous patch
of tracking the timeline alongside the ring.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/Makefile |   2 +-
 drivers/gpu/drm/i915/i915_drv.h   |   4 +-
 drivers/gpu/drm/i915/i915_gem.c   | 117 +--
 drivers/gpu/drm/i915/i915_gem_context.c   |  49 ++---
 drivers/gpu/drm/i915/i915_gem_context.h   |   2 -
 drivers/gpu/drm/i915/i915_gem_gtt.h   |   3 +-
 drivers/gpu/drm/i915/i915_gem_timeline.c  | 198 --
 drivers/gpu/drm/i915/i915_gpu_error.c |   4 +-
 drivers/gpu/drm/i915/i915_perf.c  |  10 +-
 drivers/gpu/drm/i915/i915_request.c   |  65 +++---
 drivers/gpu/drm/i915/i915_request.h   |   3 +-
 drivers/gpu/drm/i915/i915_timeline.c  | 105 ++
 .../{i915_gem_timeline.h => i915_timeline.h}  |  67 +++---
 drivers/gpu/drm/i915/intel_engine_cs.c|  27 ++-
 drivers/gpu/drm/i915/intel_guc_submission.c   |   4 +-
 drivers/gpu/drm/i915/intel_lrc.c  |  48 +++--
 drivers/gpu/drm/i915/intel_ringbuffer.c   |  23 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h   |  11 +-
 .../{i915_gem_timeline.c => i915_timeline.c}  |  94 +++--
 drivers/gpu/drm/i915/selftests/mock_engine.c  |  32 ++-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  11 +-
 .../gpu/drm/i915/selftests/mock_timeline.c|  45 ++--
 .../gpu/drm/i915/selftests/mock_timeline.h|  28 +--
 23 files changed, 389 insertions(+), 563 deletions(-)
 delete mode 100644 drivers/gpu/drm/i915/i915_gem_timeline.c
 create mode 100644 drivers/gpu/drm/i915/i915_timeline.c
 rename drivers/gpu/drm/i915/{i915_gem_timeline.h => i915_timeline.h} (68%)
 rename drivers/gpu/drm/i915/selftests/{i915_gem_timeline.c => i915_timeline.c} 
(70%)

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 9bee52a949a9..120db21fcd50 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -67,11 +67,11 @@ i915-y += i915_cmd_parser.o \
  i915_gem_shrinker.o \
  i915_gem_stolen.o \
  i915_gem_tiling.o \
- i915_gem_timeline.o \
  i915_gem_userptr.o \
  i915_gemfs.o \
  i915_query.o \
  i915_request.o \
+ i915_timeline.o \
  i915_trace_points.o \
  i915_vma.o \
  intel_breadcrumbs.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 66123cf0eda3..89cb74c30a00 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -72,10 +72,10 @@
 #include "i915_gem_fence_reg.h"
 #include "i915_gem_object.h"
 #include "i915_gem_gtt.h"
-#include "i915_gem_timeline.h"
 #include "i915_gpu_error.h"
 #include "i915_request.h"
 #include "i915_scheduler.h"
+#include "i915_timeline.h"
 #include "i915_vma.h"
 
 #include "intel_gvt.h"
@@ -2058,8 +2058,6 @@ struct drm_i915_private {
void (*resume)(struct drm_i915_private *);
void (*cleanup_engine)(struct intel_engine_cs *engine);
 
-   struct i915_gem_timeline execution_timeline;
-   struct i915_gem_timeline legacy_timeline;
struct list_head timelines;
struct list_head live_rings;
u32 active_requests;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1635975dbc16..f07556693cfe 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -162,7 +162,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
synchronize_irq(i915->drm.irq);
 
intel_engines_park(i915);
-   i915_gem_timelines_park(i915);
+   i915_timelines_park(i915);
 
i915_pmu_gt_parked(i915);
 
@@ -2977,8 +2977,8 @@ i915_gem_find_active_request(struct intel_engine_cs 
*engine)
 * extra delay for a recent interrupt is pointless. Hence, we do
 * not need an engine->irq_seqno_barrier() before the seqno reads.
 */
-   spin_lock_irqsave(>timeline->lock, flags);
-   list_for_each_entry(request, >timeline->requests, link) {
+   spin_lock_irqsave(>timeline.lock, flags);
+   list_for_each_entry(request, >timeline.requests, link) {
if (__i915_request_completed(request, request->global_seqno))
continue;
 
@@ -2989,7 +2989,7 @@