Re: [Intel-gfx] [PATCH 18/22] drm/i915/guc: Rework and simplify locking

2021-08-17 Thread Matthew Brost
On Tue, Aug 17, 2021 at 12:15:21PM +0200, Daniel Vetter wrote:
> On Mon, Aug 16, 2021 at 06:51:35AM -0700, Matthew Brost wrote:
> > Rework and simplify the locking with GuC subission. Drop
> > sched_state_no_lock and move all fields under the guc_state.sched_state
> > and protect all these fields with guc_state.lock . This requires
> > changing the locking hierarchy from guc_state.lock -> sched_engine.lock
> > to sched_engine.lock -> guc_state.lock.
> > 
> > Signed-off-by: Matthew Brost 
> 
> Yeah this is definitely going in the right direction. Especially
> sprinkling lockdep_assert_held around.
> 
> One comment below.
> 
> > ---
> >  drivers/gpu/drm/i915/gt/intel_context_types.h |   5 +-
> >  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 186 --
> >  drivers/gpu/drm/i915/i915_trace.h |   6 +-
> >  3 files changed, 89 insertions(+), 108 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
> > b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > index c06171ee8792..d5d643b04d54 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > @@ -161,7 +161,7 @@ struct intel_context {
> >  * sched_state: scheduling state of this context using GuC
> >  * submission
> >  */
> > -   u16 sched_state;
> > +   u32 sched_state;
> > /*
> >  * fences: maintains of list of requests that have a submit
> >  * fence related to GuC submission
> > @@ -178,9 +178,6 @@ struct intel_context {
> > struct list_head requests;
> > } guc_active;
> >  
> > -   /* GuC scheduling state flags that do not require a lock. */
> > -   atomic_t guc_sched_state_no_lock;
> > -
> > /* GuC LRC descriptor ID */
> > u16 guc_id;
> >  
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > index 7aa16371908a..ba19b99173fc 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > @@ -72,86 +72,23 @@ guc_create_virtual(struct intel_engine_cs **siblings, 
> > unsigned int count);
> >  
> >  #define GUC_REQUEST_SIZE 64 /* bytes */
> >  
> > -/*
> > - * Below is a set of functions which control the GuC scheduling state 
> > which do
> > - * not require a lock as all state transitions are mutually exclusive. 
> > i.e. It
> > - * is not possible for the context pinning code and submission, for the 
> > same
> > - * context, to be executing simultaneously. We still need an atomic as it 
> > is
> > - * possible for some of the bits to changing at the same time though.
> > - */
> > -#define SCHED_STATE_NO_LOCK_ENABLEDBIT(0)
> > -#define SCHED_STATE_NO_LOCK_PENDING_ENABLE BIT(1)
> > -#define SCHED_STATE_NO_LOCK_REGISTERED BIT(2)
> > -static inline bool context_enabled(struct intel_context *ce)
> > -{
> > -   return (atomic_read(>guc_sched_state_no_lock) &
> > -   SCHED_STATE_NO_LOCK_ENABLED);
> > -}
> > -
> > -static inline void set_context_enabled(struct intel_context *ce)
> > -{
> > -   atomic_or(SCHED_STATE_NO_LOCK_ENABLED, >guc_sched_state_no_lock);
> > -}
> > -
> > -static inline void clr_context_enabled(struct intel_context *ce)
> > -{
> > -   atomic_and((u32)~SCHED_STATE_NO_LOCK_ENABLED,
> > -  >guc_sched_state_no_lock);
> > -}
> > -
> > -static inline bool context_pending_enable(struct intel_context *ce)
> > -{
> > -   return (atomic_read(>guc_sched_state_no_lock) &
> > -   SCHED_STATE_NO_LOCK_PENDING_ENABLE);
> > -}
> > -
> > -static inline void set_context_pending_enable(struct intel_context *ce)
> > -{
> > -   atomic_or(SCHED_STATE_NO_LOCK_PENDING_ENABLE,
> > - >guc_sched_state_no_lock);
> > -}
> > -
> > -static inline void clr_context_pending_enable(struct intel_context *ce)
> > -{
> > -   atomic_and((u32)~SCHED_STATE_NO_LOCK_PENDING_ENABLE,
> > -  >guc_sched_state_no_lock);
> > -}
> > -
> > -static inline bool context_registered(struct intel_context *ce)
> > -{
> > -   return (atomic_read(>guc_sched_state_no_lock) &
> > -   SCHED_STATE_NO_LOCK_REGISTERED);
> > -}
> > -
> > -static inline void set_context_registered(struct intel_context *ce)
> > -{
> > -   atomic_or(SCHED_STATE_NO_LOCK_REGISTERED,
> > - >guc_sched_state_no_lock);
> > -}
> > -
> > -static inline void clr_context_registered(struct intel_context *ce)
> > -{
> > -   atomic_and((u32)~SCHED_STATE_NO_LOCK_REGISTERED,
> > -  >guc_sched_state_no_lock);
> > -}
> > -
> >  /*
> >   * Below is a set of functions which control the GuC scheduling state which
> > - * require a lock, aside from the special case where the functions are 
> > called
> > - * from guc_lrc_desc_pin(). In that case it isn't possible for any other 
> > code
> > - * path to be executing on the context.
> > + * require a 

Re: [Intel-gfx] [PATCH 18/22] drm/i915/guc: Rework and simplify locking

2021-08-17 Thread Daniel Vetter
On Mon, Aug 16, 2021 at 06:51:35AM -0700, Matthew Brost wrote:
> Rework and simplify the locking with GuC subission. Drop
> sched_state_no_lock and move all fields under the guc_state.sched_state
> and protect all these fields with guc_state.lock . This requires
> changing the locking hierarchy from guc_state.lock -> sched_engine.lock
> to sched_engine.lock -> guc_state.lock.
> 
> Signed-off-by: Matthew Brost 

Yeah this is definitely going in the right direction. Especially
sprinkling lockdep_assert_held around.

One comment below.

> ---
>  drivers/gpu/drm/i915/gt/intel_context_types.h |   5 +-
>  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 186 --
>  drivers/gpu/drm/i915/i915_trace.h |   6 +-
>  3 files changed, 89 insertions(+), 108 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
> b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index c06171ee8792..d5d643b04d54 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -161,7 +161,7 @@ struct intel_context {
>* sched_state: scheduling state of this context using GuC
>* submission
>*/
> - u16 sched_state;
> + u32 sched_state;
>   /*
>* fences: maintains of list of requests that have a submit
>* fence related to GuC submission
> @@ -178,9 +178,6 @@ struct intel_context {
>   struct list_head requests;
>   } guc_active;
>  
> - /* GuC scheduling state flags that do not require a lock. */
> - atomic_t guc_sched_state_no_lock;
> -
>   /* GuC LRC descriptor ID */
>   u16 guc_id;
>  
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 7aa16371908a..ba19b99173fc 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -72,86 +72,23 @@ guc_create_virtual(struct intel_engine_cs **siblings, 
> unsigned int count);
>  
>  #define GUC_REQUEST_SIZE 64 /* bytes */
>  
> -/*
> - * Below is a set of functions which control the GuC scheduling state which 
> do
> - * not require a lock as all state transitions are mutually exclusive. i.e. 
> It
> - * is not possible for the context pinning code and submission, for the same
> - * context, to be executing simultaneously. We still need an atomic as it is
> - * possible for some of the bits to changing at the same time though.
> - */
> -#define SCHED_STATE_NO_LOCK_ENABLED  BIT(0)
> -#define SCHED_STATE_NO_LOCK_PENDING_ENABLE   BIT(1)
> -#define SCHED_STATE_NO_LOCK_REGISTERED   BIT(2)
> -static inline bool context_enabled(struct intel_context *ce)
> -{
> - return (atomic_read(>guc_sched_state_no_lock) &
> - SCHED_STATE_NO_LOCK_ENABLED);
> -}
> -
> -static inline void set_context_enabled(struct intel_context *ce)
> -{
> - atomic_or(SCHED_STATE_NO_LOCK_ENABLED, >guc_sched_state_no_lock);
> -}
> -
> -static inline void clr_context_enabled(struct intel_context *ce)
> -{
> - atomic_and((u32)~SCHED_STATE_NO_LOCK_ENABLED,
> ->guc_sched_state_no_lock);
> -}
> -
> -static inline bool context_pending_enable(struct intel_context *ce)
> -{
> - return (atomic_read(>guc_sched_state_no_lock) &
> - SCHED_STATE_NO_LOCK_PENDING_ENABLE);
> -}
> -
> -static inline void set_context_pending_enable(struct intel_context *ce)
> -{
> - atomic_or(SCHED_STATE_NO_LOCK_PENDING_ENABLE,
> -   >guc_sched_state_no_lock);
> -}
> -
> -static inline void clr_context_pending_enable(struct intel_context *ce)
> -{
> - atomic_and((u32)~SCHED_STATE_NO_LOCK_PENDING_ENABLE,
> ->guc_sched_state_no_lock);
> -}
> -
> -static inline bool context_registered(struct intel_context *ce)
> -{
> - return (atomic_read(>guc_sched_state_no_lock) &
> - SCHED_STATE_NO_LOCK_REGISTERED);
> -}
> -
> -static inline void set_context_registered(struct intel_context *ce)
> -{
> - atomic_or(SCHED_STATE_NO_LOCK_REGISTERED,
> -   >guc_sched_state_no_lock);
> -}
> -
> -static inline void clr_context_registered(struct intel_context *ce)
> -{
> - atomic_and((u32)~SCHED_STATE_NO_LOCK_REGISTERED,
> ->guc_sched_state_no_lock);
> -}
> -
>  /*
>   * Below is a set of functions which control the GuC scheduling state which
> - * require a lock, aside from the special case where the functions are called
> - * from guc_lrc_desc_pin(). In that case it isn't possible for any other code
> - * path to be executing on the context.
> + * require a lock.
>   */
>  #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER  BIT(0)
>  #define SCHED_STATE_DESTROYEDBIT(1)
>  #define SCHED_STATE_PENDING_DISABLE  BIT(2)
>  #define SCHED_STATE_BANNED   BIT(3)

[Intel-gfx] [PATCH 18/22] drm/i915/guc: Rework and simplify locking

2021-08-16 Thread Matthew Brost
Rework and simplify the locking with GuC subission. Drop
sched_state_no_lock and move all fields under the guc_state.sched_state
and protect all these fields with guc_state.lock . This requires
changing the locking hierarchy from guc_state.lock -> sched_engine.lock
to sched_engine.lock -> guc_state.lock.

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/intel_context_types.h |   5 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 186 --
 drivers/gpu/drm/i915/i915_trace.h |   6 +-
 3 files changed, 89 insertions(+), 108 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index c06171ee8792..d5d643b04d54 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -161,7 +161,7 @@ struct intel_context {
 * sched_state: scheduling state of this context using GuC
 * submission
 */
-   u16 sched_state;
+   u32 sched_state;
/*
 * fences: maintains of list of requests that have a submit
 * fence related to GuC submission
@@ -178,9 +178,6 @@ struct intel_context {
struct list_head requests;
} guc_active;
 
-   /* GuC scheduling state flags that do not require a lock. */
-   atomic_t guc_sched_state_no_lock;
-
/* GuC LRC descriptor ID */
u16 guc_id;
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 7aa16371908a..ba19b99173fc 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -72,86 +72,23 @@ guc_create_virtual(struct intel_engine_cs **siblings, 
unsigned int count);
 
 #define GUC_REQUEST_SIZE 64 /* bytes */
 
-/*
- * Below is a set of functions which control the GuC scheduling state which do
- * not require a lock as all state transitions are mutually exclusive. i.e. It
- * is not possible for the context pinning code and submission, for the same
- * context, to be executing simultaneously. We still need an atomic as it is
- * possible for some of the bits to changing at the same time though.
- */
-#define SCHED_STATE_NO_LOCK_ENABLEDBIT(0)
-#define SCHED_STATE_NO_LOCK_PENDING_ENABLE BIT(1)
-#define SCHED_STATE_NO_LOCK_REGISTERED BIT(2)
-static inline bool context_enabled(struct intel_context *ce)
-{
-   return (atomic_read(>guc_sched_state_no_lock) &
-   SCHED_STATE_NO_LOCK_ENABLED);
-}
-
-static inline void set_context_enabled(struct intel_context *ce)
-{
-   atomic_or(SCHED_STATE_NO_LOCK_ENABLED, >guc_sched_state_no_lock);
-}
-
-static inline void clr_context_enabled(struct intel_context *ce)
-{
-   atomic_and((u32)~SCHED_STATE_NO_LOCK_ENABLED,
-  >guc_sched_state_no_lock);
-}
-
-static inline bool context_pending_enable(struct intel_context *ce)
-{
-   return (atomic_read(>guc_sched_state_no_lock) &
-   SCHED_STATE_NO_LOCK_PENDING_ENABLE);
-}
-
-static inline void set_context_pending_enable(struct intel_context *ce)
-{
-   atomic_or(SCHED_STATE_NO_LOCK_PENDING_ENABLE,
- >guc_sched_state_no_lock);
-}
-
-static inline void clr_context_pending_enable(struct intel_context *ce)
-{
-   atomic_and((u32)~SCHED_STATE_NO_LOCK_PENDING_ENABLE,
-  >guc_sched_state_no_lock);
-}
-
-static inline bool context_registered(struct intel_context *ce)
-{
-   return (atomic_read(>guc_sched_state_no_lock) &
-   SCHED_STATE_NO_LOCK_REGISTERED);
-}
-
-static inline void set_context_registered(struct intel_context *ce)
-{
-   atomic_or(SCHED_STATE_NO_LOCK_REGISTERED,
- >guc_sched_state_no_lock);
-}
-
-static inline void clr_context_registered(struct intel_context *ce)
-{
-   atomic_and((u32)~SCHED_STATE_NO_LOCK_REGISTERED,
-  >guc_sched_state_no_lock);
-}
-
 /*
  * Below is a set of functions which control the GuC scheduling state which
- * require a lock, aside from the special case where the functions are called
- * from guc_lrc_desc_pin(). In that case it isn't possible for any other code
- * path to be executing on the context.
+ * require a lock.
  */
 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTERBIT(0)
 #define SCHED_STATE_DESTROYED  BIT(1)
 #define SCHED_STATE_PENDING_DISABLEBIT(2)
 #define SCHED_STATE_BANNED BIT(3)
-#define SCHED_STATE_BLOCKED_SHIFT  4
+#define SCHED_STATE_ENABLEDBIT(4)
+#define SCHED_STATE_PENDING_ENABLE BIT(5)
+#define SCHED_STATE_REGISTERED BIT(6)
+#define SCHED_STATE_BLOCKED_SHIFT  7
 #define SCHED_STATE_BLOCKEDBIT(SCHED_STATE_BLOCKED_SHIFT)