Re: [Intel-gfx] [PATCH 05/18] drm/i915: context switch implementation

2012-03-30 Thread Ben Widawsky
On Thu, 29 Mar 2012 20:49:00 +0200
Daniel Vetter dan...@ffwll.ch wrote:

 On Thu, Mar 29, 2012 at 11:43:12AM -0700, Ben Widawsky wrote:
  On Thu, 29 Mar 2012 20:24:11 +0200
  Daniel Vetter dan...@ffwll.ch wrote:
  
   On Sun, Mar 18, 2012 at 01:39:45PM -0700, Ben Widawsky wrote:
Implement the context switch code as well as the interfaces to do the
context switch. This patch also doesn't match 1:1 with the RFC patches.
The main difference is that from Daniel's responses the last context
object is now stored instead of the last context. This aids in allows us
to free the context data structure, and context object independently.

There is room for optimization: this code will pin the context object
until the next context is active. The optimal way to do it is to
actually pin the object, move it to the active list, do the context
switch, and then unpin it. This allows the eviction code to actually
evict the context object if needed.

The context switch code is missing workarounds, they will be implemented
in future patches.

Signed-off-by: Ben Widawsky b...@bwidawsk.net
   
   Ok, I've looked at the use-sites of context_get and all this refcounting
   and noticed that:
   - we always hold dev-struct_mutex
   - we always drop the acquired reference to the context structure in the
 same function without dropping struct_mutex in between.
   
   So we don't seem to require any reference counting on these (and
   additional locking on the idr). Additionally the idr locking seems to give
   us a false sense of security because afaics the locking/refcounting would
   be broken when we would _not_ hold struct_mutex.
   
   So can we just rip this out or do we need this (in which case it needs
   some more work imo)?
   -Daniel
  
  I think it can be ripped out. I was on the fence about this before
  submitting the patches and left it out of laziness; it doesn't hurt as
  there is no lock contention assuming your statement is true with no
  bugs in the code, and it follows the canonical use of idrs.
  
  Let me look it over some more to make sure after you finish reviewing
  the other stuff. The idea was supposed to be contexts can be created
  and looked up without struct mutex, but that isn't actually done in the
  code.
 
 Well, if you want to leave it I have to add some more review comments
 about it - atm I think it would be buggy and racy without holding
 struct_mutex ...
 -Daniel

As I said before, I'll either find a good use for it, or remove it.
Context creation is the only currently viable case for it- but probably
not worth the extra lock.

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 05/18] drm/i915: context switch implementation

2012-03-29 Thread Daniel Vetter
On Sun, Mar 18, 2012 at 01:39:45PM -0700, Ben Widawsky wrote:
 Implement the context switch code as well as the interfaces to do the
 context switch. This patch also doesn't match 1:1 with the RFC patches.
 The main difference is that from Daniel's responses the last context
 object is now stored instead of the last context. This aids in allows us
 to free the context data structure, and context object independently.
 
 There is room for optimization: this code will pin the context object
 until the next context is active. The optimal way to do it is to
 actually pin the object, move it to the active list, do the context
 switch, and then unpin it. This allows the eviction code to actually
 evict the context object if needed.
 
 The context switch code is missing workarounds, they will be implemented
 in future patches.
 
 Signed-off-by: Ben Widawsky b...@bwidawsk.net

Ok, I've looked at the use-sites of context_get and all this refcounting
and noticed that:
- we always hold dev-struct_mutex
- we always drop the acquired reference to the context structure in the
  same function without dropping struct_mutex in between.

So we don't seem to require any reference counting on these (and
additional locking on the idr). Additionally the idr locking seems to give
us a false sense of security because afaics the locking/refcounting would
be broken when we would _not_ hold struct_mutex.

So can we just rip this out or do we need this (in which case it needs
some more work imo)?
-Daniel

 ---
  drivers/gpu/drm/i915/i915_drv.h |5 ++
  drivers/gpu/drm/i915/i915_gem_context.c |  118 
 ++-
  drivers/gpu/drm/i915/intel_ringbuffer.c |   26 +++
  drivers/gpu/drm/i915/intel_ringbuffer.h |5 ++
  4 files changed, 153 insertions(+), 1 deletion(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
 index f458a8f..c6c2ada 100644
 --- a/drivers/gpu/drm/i915/i915_drv.h
 +++ b/drivers/gpu/drm/i915/i915_drv.h
 @@ -1303,10 +1303,15 @@ int i915_gem_object_set_cache_level(struct 
 drm_i915_gem_object *obj,
   enum i915_cache_level cache_level);
  
  /* i915_gem_context.c */
 +#define I915_CONTEXT_FORCED_SWITCH (10)
 +#define I915_CONTEXT_INITIAL_SWITCH (11)
  void i915_gem_context_load(struct drm_device *dev);
  void i915_gem_context_unload(struct drm_device *dev);
  void i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
  void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
 +int i915_switch_context(struct intel_ring_buffer *ring,
 + struct drm_file *file,
 + int to_id, u32 seqno, u32 flags);
  
  /* i915_gem_gtt.c */
  int __must_check i915_gem_init_aliasing_ppgtt(struct drm_device *dev);
 diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
 b/drivers/gpu/drm/i915/i915_gem_context.c
 index 321bafd..cc508d5 100644
 --- a/drivers/gpu/drm/i915/i915_gem_context.c
 +++ b/drivers/gpu/drm/i915/i915_gem_context.c
 @@ -262,7 +262,7 @@ void i915_gem_context_close(struct drm_device *dev, 
 struct drm_file *file)
   mutex_unlock(dev-struct_mutex);
  }
  
 -static __used struct i915_hw_context *
 +static struct i915_hw_context *
  i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id)
  {
   struct i915_hw_context *ctx = NULL;
 @@ -279,3 +279,119 @@ i915_gem_context_get(struct drm_i915_file_private 
 *file_priv, u32 id)
  
   return ctx;
  }
 +
 +static int do_switch(struct drm_i915_gem_object *from_obj,
 +  struct i915_hw_context *to,
 +  u32 seqno, u32 flags)
 +{
 + bool initial_switch = (flags  I915_CONTEXT_INITIAL_SWITCH) ? true : 
 false;
 + bool force =  (flags  I915_CONTEXT_FORCED_SWITCH) ? true : false;
 + struct intel_ring_buffer *ring = NULL;
 + u32 hw_flags = 0;
 + int ret;
 +
 + BUG_ON(to == NULL);
 + BUG_ON(from_obj != NULL  from_obj-pin_count == 0);
 + BUG_ON((from_obj != NULL  from_obj-context_id  0) || 
 to-obj-context_id  0);
 +
 + ret = i915_gem_object_pin(to-obj, CONTEXT_ALIGN, false);
 + if (ret)
 + return ret;
 +
 + if (initial_switch)
 + hw_flags |= MI_RESTORE_INHIBIT;
 + if (force)
 + hw_flags |= MI_FORCE_RESTORE;
 +
 + ring = to-ring;
 + ret = intel_ring_mi_set_context(ring, to, hw_flags);
 + if (ret) {
 + i915_gem_object_unpin(to-obj);
 + return ret;
 + }
 +
 + /* The backing object for the context is done after switching to the
 +  * *next* context. Therefore we cannot retire the previous context until
 +  * the next context has already started running. In fact, the below code
 +  * is a bit suboptimal because the retiring can occur simply after the
 +  * MI_SET_CONTEXT instead of when the next seqno has completed.
 +  */
 + if (from_obj != NULL) {
 + i915_gem_object_move_to_active(from_obj, ring, seqno);
 +  

Re: [Intel-gfx] [PATCH 05/18] drm/i915: context switch implementation

2012-03-29 Thread Ben Widawsky
On Thu, 29 Mar 2012 20:24:11 +0200
Daniel Vetter dan...@ffwll.ch wrote:

 On Sun, Mar 18, 2012 at 01:39:45PM -0700, Ben Widawsky wrote:
  Implement the context switch code as well as the interfaces to do the
  context switch. This patch also doesn't match 1:1 with the RFC patches.
  The main difference is that from Daniel's responses the last context
  object is now stored instead of the last context. This aids in allows us
  to free the context data structure, and context object independently.
  
  There is room for optimization: this code will pin the context object
  until the next context is active. The optimal way to do it is to
  actually pin the object, move it to the active list, do the context
  switch, and then unpin it. This allows the eviction code to actually
  evict the context object if needed.
  
  The context switch code is missing workarounds, they will be implemented
  in future patches.
  
  Signed-off-by: Ben Widawsky b...@bwidawsk.net
 
 Ok, I've looked at the use-sites of context_get and all this refcounting
 and noticed that:
 - we always hold dev-struct_mutex
 - we always drop the acquired reference to the context structure in the
   same function without dropping struct_mutex in between.
 
 So we don't seem to require any reference counting on these (and
 additional locking on the idr). Additionally the idr locking seems to give
 us a false sense of security because afaics the locking/refcounting would
 be broken when we would _not_ hold struct_mutex.
 
 So can we just rip this out or do we need this (in which case it needs
 some more work imo)?
 -Daniel

I think it can be ripped out. I was on the fence about this before
submitting the patches and left it out of laziness; it doesn't hurt as
there is no lock contention assuming your statement is true with no
bugs in the code, and it follows the canonical use of idrs.

Let me look it over some more to make sure after you finish reviewing
the other stuff. The idea was supposed to be contexts can be created
and looked up without struct mutex, but that isn't actually done in the
code.
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 05/18] drm/i915: context switch implementation

2012-03-29 Thread Daniel Vetter
On Sun, Mar 18, 2012 at 01:39:45PM -0700, Ben Widawsky wrote:
 Implement the context switch code as well as the interfaces to do the
 context switch. This patch also doesn't match 1:1 with the RFC patches.
 The main difference is that from Daniel's responses the last context
 object is now stored instead of the last context. This aids in allows us
 to free the context data structure, and context object independently.
 
 There is room for optimization: this code will pin the context object
 until the next context is active. The optimal way to do it is to
 actually pin the object, move it to the active list, do the context
 switch, and then unpin it. This allows the eviction code to actually
 evict the context object if needed.
 
 The context switch code is missing workarounds, they will be implemented
 in future patches.
 
 Signed-off-by: Ben Widawsky b...@bwidawsk.net

Meh, I've forgotten half of the review, some more comments below.

 ---
  drivers/gpu/drm/i915/i915_drv.h |5 ++
  drivers/gpu/drm/i915/i915_gem_context.c |  118 
 ++-
  drivers/gpu/drm/i915/intel_ringbuffer.c |   26 +++
  drivers/gpu/drm/i915/intel_ringbuffer.h |5 ++
  4 files changed, 153 insertions(+), 1 deletion(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
 index f458a8f..c6c2ada 100644
 --- a/drivers/gpu/drm/i915/i915_drv.h
 +++ b/drivers/gpu/drm/i915/i915_drv.h
 @@ -1303,10 +1303,15 @@ int i915_gem_object_set_cache_level(struct 
 drm_i915_gem_object *obj,
   enum i915_cache_level cache_level);
  
  /* i915_gem_context.c */
 +#define I915_CONTEXT_FORCED_SWITCH (10)
 +#define I915_CONTEXT_INITIAL_SWITCH (11)
  void i915_gem_context_load(struct drm_device *dev);
  void i915_gem_context_unload(struct drm_device *dev);
  void i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
  void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
 +int i915_switch_context(struct intel_ring_buffer *ring,
 + struct drm_file *file,
 + int to_id, u32 seqno, u32 flags);
  
  /* i915_gem_gtt.c */
  int __must_check i915_gem_init_aliasing_ppgtt(struct drm_device *dev);
 diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
 b/drivers/gpu/drm/i915/i915_gem_context.c
 index 321bafd..cc508d5 100644
 --- a/drivers/gpu/drm/i915/i915_gem_context.c
 +++ b/drivers/gpu/drm/i915/i915_gem_context.c
 @@ -262,7 +262,7 @@ void i915_gem_context_close(struct drm_device *dev, 
 struct drm_file *file)
   mutex_unlock(dev-struct_mutex);
  }
  
 -static __used struct i915_hw_context *
 +static struct i915_hw_context *
  i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id)
  {
   struct i915_hw_context *ctx = NULL;
 @@ -279,3 +279,119 @@ i915_gem_context_get(struct drm_i915_file_private 
 *file_priv, u32 id)
  
   return ctx;
  }
 +
 +static int do_switch(struct drm_i915_gem_object *from_obj,
 +  struct i915_hw_context *to,
 +  u32 seqno, u32 flags)
 +{
 + bool initial_switch = (flags  I915_CONTEXT_INITIAL_SWITCH) ? true : 
 false;
 + bool force =  (flags  I915_CONTEXT_FORCED_SWITCH) ? true : false;

I don't like this flag passing. force seems to be totally unused. And imo
inital_switch would work much better if we track this in
i915_hw_context-intialized (which will be false on context create) and
check this when before doing the context switch.

That way you'd also lose the complexity of having to initialize a context
explicitly after create - it would just magically happen.

 + struct intel_ring_buffer *ring = NULL;
 + u32 hw_flags = 0;
 + int ret;
 +
 + BUG_ON(to == NULL);
 + BUG_ON(from_obj != NULL  from_obj-pin_count == 0);
 + BUG_ON((from_obj != NULL  from_obj-context_id  0) || 
 to-obj-context_id  0);
 +
 + ret = i915_gem_object_pin(to-obj, CONTEXT_ALIGN, false);
 + if (ret)
 + return ret;
 +
 + if (initial_switch)
 + hw_flags |= MI_RESTORE_INHIBIT;
 + if (force)
 + hw_flags |= MI_FORCE_RESTORE;

Please move this into the function that actually emits the MI_ command -
it's imo hard to read things if it's distributed all over the place. And
because we don't seem to need force, a simple (and destriptive) bool
inhibit_restore would be sufficient.

 +
 + ring = to-ring;
 + ret = intel_ring_mi_set_context(ring, to, hw_flags);
 + if (ret) {
 + i915_gem_object_unpin(to-obj);
 + return ret;
 + }
 +
 + /* The backing object for the context is done after switching to the
 +  * *next* context. Therefore we cannot retire the previous context until
 +  * the next context has already started running. In fact, the below code
 +  * is a bit suboptimal because the retiring can occur simply after the
 +  * MI_SET_CONTEXT instead of when the next seqno has completed.
 +  */
 + if 

Re: [Intel-gfx] [PATCH 05/18] drm/i915: context switch implementation

2012-03-29 Thread Daniel Vetter
On Thu, Mar 29, 2012 at 11:43:12AM -0700, Ben Widawsky wrote:
 On Thu, 29 Mar 2012 20:24:11 +0200
 Daniel Vetter dan...@ffwll.ch wrote:
 
  On Sun, Mar 18, 2012 at 01:39:45PM -0700, Ben Widawsky wrote:
   Implement the context switch code as well as the interfaces to do the
   context switch. This patch also doesn't match 1:1 with the RFC patches.
   The main difference is that from Daniel's responses the last context
   object is now stored instead of the last context. This aids in allows us
   to free the context data structure, and context object independently.
   
   There is room for optimization: this code will pin the context object
   until the next context is active. The optimal way to do it is to
   actually pin the object, move it to the active list, do the context
   switch, and then unpin it. This allows the eviction code to actually
   evict the context object if needed.
   
   The context switch code is missing workarounds, they will be implemented
   in future patches.
   
   Signed-off-by: Ben Widawsky b...@bwidawsk.net
  
  Ok, I've looked at the use-sites of context_get and all this refcounting
  and noticed that:
  - we always hold dev-struct_mutex
  - we always drop the acquired reference to the context structure in the
same function without dropping struct_mutex in between.
  
  So we don't seem to require any reference counting on these (and
  additional locking on the idr). Additionally the idr locking seems to give
  us a false sense of security because afaics the locking/refcounting would
  be broken when we would _not_ hold struct_mutex.
  
  So can we just rip this out or do we need this (in which case it needs
  some more work imo)?
  -Daniel
 
 I think it can be ripped out. I was on the fence about this before
 submitting the patches and left it out of laziness; it doesn't hurt as
 there is no lock contention assuming your statement is true with no
 bugs in the code, and it follows the canonical use of idrs.
 
 Let me look it over some more to make sure after you finish reviewing
 the other stuff. The idea was supposed to be contexts can be created
 and looked up without struct mutex, but that isn't actually done in the
 code.

Well, if you want to leave it I have to add some more review comments
about it - atm I think it would be buggy and racy without holding
struct_mutex ...
-Daniel
-- 
Daniel Vetter
Mail: dan...@ffwll.ch
Mobile: +41 (0)79 365 57 48
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 05/18] drm/i915: context switch implementation

2012-03-18 Thread Ben Widawsky
Implement the context switch code as well as the interfaces to do the
context switch. This patch also doesn't match 1:1 with the RFC patches.
The main difference is that from Daniel's responses the last context
object is now stored instead of the last context. This aids in allows us
to free the context data structure, and context object independently.

There is room for optimization: this code will pin the context object
until the next context is active. The optimal way to do it is to
actually pin the object, move it to the active list, do the context
switch, and then unpin it. This allows the eviction code to actually
evict the context object if needed.

The context switch code is missing workarounds, they will be implemented
in future patches.

Signed-off-by: Ben Widawsky b...@bwidawsk.net
---
 drivers/gpu/drm/i915/i915_drv.h |5 ++
 drivers/gpu/drm/i915/i915_gem_context.c |  118 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.c |   26 +++
 drivers/gpu/drm/i915/intel_ringbuffer.h |5 ++
 4 files changed, 153 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f458a8f..c6c2ada 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1303,10 +1303,15 @@ int i915_gem_object_set_cache_level(struct 
drm_i915_gem_object *obj,
enum i915_cache_level cache_level);
 
 /* i915_gem_context.c */
+#define I915_CONTEXT_FORCED_SWITCH (10)
+#define I915_CONTEXT_INITIAL_SWITCH (11)
 void i915_gem_context_load(struct drm_device *dev);
 void i915_gem_context_unload(struct drm_device *dev);
 void i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
 void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
+int i915_switch_context(struct intel_ring_buffer *ring,
+   struct drm_file *file,
+   int to_id, u32 seqno, u32 flags);
 
 /* i915_gem_gtt.c */
 int __must_check i915_gem_init_aliasing_ppgtt(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 321bafd..cc508d5 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -262,7 +262,7 @@ void i915_gem_context_close(struct drm_device *dev, struct 
drm_file *file)
mutex_unlock(dev-struct_mutex);
 }
 
-static __used struct i915_hw_context *
+static struct i915_hw_context *
 i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id)
 {
struct i915_hw_context *ctx = NULL;
@@ -279,3 +279,119 @@ i915_gem_context_get(struct drm_i915_file_private 
*file_priv, u32 id)
 
return ctx;
 }
+
+static int do_switch(struct drm_i915_gem_object *from_obj,
+struct i915_hw_context *to,
+u32 seqno, u32 flags)
+{
+   bool initial_switch = (flags  I915_CONTEXT_INITIAL_SWITCH) ? true : 
false;
+   bool force =  (flags  I915_CONTEXT_FORCED_SWITCH) ? true : false;
+   struct intel_ring_buffer *ring = NULL;
+   u32 hw_flags = 0;
+   int ret;
+
+   BUG_ON(to == NULL);
+   BUG_ON(from_obj != NULL  from_obj-pin_count == 0);
+   BUG_ON((from_obj != NULL  from_obj-context_id  0) || 
to-obj-context_id  0);
+
+   ret = i915_gem_object_pin(to-obj, CONTEXT_ALIGN, false);
+   if (ret)
+   return ret;
+
+   if (initial_switch)
+   hw_flags |= MI_RESTORE_INHIBIT;
+   if (force)
+   hw_flags |= MI_FORCE_RESTORE;
+
+   ring = to-ring;
+   ret = intel_ring_mi_set_context(ring, to, hw_flags);
+   if (ret) {
+   i915_gem_object_unpin(to-obj);
+   return ret;
+   }
+
+   /* The backing object for the context is done after switching to the
+* *next* context. Therefore we cannot retire the previous context until
+* the next context has already started running. In fact, the below code
+* is a bit suboptimal because the retiring can occur simply after the
+* MI_SET_CONTEXT instead of when the next seqno has completed.
+*/
+   if (from_obj != NULL) {
+   i915_gem_object_move_to_active(from_obj, ring, seqno);
+   /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
+* whole damn pipeline, we don't need to explicitly mark the
+* object dirty. It should be safe to evict the object at any
+* point after MI_SET_CONTEXT has finished executing... true as
+* of GEN7. If not from_obj-dirty=1 would make this safer.
+*/
+   BUG_ON(from_obj-ring != to-ring);
+   }
+
+   if (from_obj)
+   i915_gem_object_unpin(from_obj);
+
+   ring-last_context_obj = to-obj;
+
+   return 0;
+}
+
+/**
+ * i915_switch_context() - perform a GPU context switch.
+ * @ring: ring for which we'll execute the