Re: [Intel-gfx] [PATCH 14/27] drm/i915/guc: Assign contexts in parent-child relationship consecutive guc_ids

2021-09-15 Thread Matthew Brost
On Wed, Sep 15, 2021 at 01:04:45PM -0700, John Harrison wrote:
> On 8/20/2021 15:44, Matthew Brost wrote:
> > Assign contexts in parent-child relationship consecutive guc_ids. This
> > is accomplished by partitioning guc_id space between ones that need to
> > be consecutive (1/16 available guc_ids) and ones that do not (15/16 of
> > available guc_ids). The consecutive search is implemented via the bitmap
> > API.
> > 
> > This is a precursor to the full GuC multi-lrc implementation but aligns
> > to how GuC mutli-lrc interface is defined - guc_ids must be consecutive
> > when using the GuC multi-lrc interface.
> > 
> > v2:
> >   (Daniel Vetter)
> >- Explictly state why we assign consecutive guc_ids
> > 
> > Signed-off-by: Matthew Brost 
> > ---
> >   drivers/gpu/drm/i915/gt/uc/intel_guc.h|   6 +-
> >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 107 +-
> >   2 files changed, 86 insertions(+), 27 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > index 023953e77553..3f95b1b4f15c 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > @@ -61,9 +61,13 @@ struct intel_guc {
> >  */
> > spinlock_t lock;
> > /**
> > -* @guc_ids: used to allocate new guc_ids
> > +* @guc_ids: used to allocate new guc_ids, single-lrc
> >  */
> > struct ida guc_ids;
> > +   /**
> > +* @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc
> > +*/
> > +   unsigned long *guc_ids_bitmap;
> > /** @num_guc_ids: number of guc_ids that can be used */
> > u32 num_guc_ids;
> > /** @max_guc_ids: max number of guc_ids that can be used */
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > index 00d54bb00bfb..e9dfd43d29a0 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > @@ -125,6 +125,18 @@ guc_create_virtual(struct intel_engine_cs **siblings, 
> > unsigned int count);
> >   #define GUC_REQUEST_SIZE 64 /* bytes */
> > +/*
> > + * We reserve 1/16 of the guc_ids for multi-lrc as these need to be 
> > contiguous
> > + * per the GuC submission interface. A different allocation algorithm is 
> > used
> > + * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
> The 'hence' clause seems to be attached to the wrong reason. The id space is
> partition because of the contiguous vs random requirements of multi vs
> single LRC, not because a different allocator is used in one partion vs the
> other.
> 

Kinda? The reason I partitioned it because to algorithms are different,
we could a unified space with a single algorithm, right? It was just
easier split the space and use 2 already existing data structures rather
cook up an algorithm in a unified space. There isn't a requirement from
the GuC that the space is partitioned, the only requirement is multi-lrc
IDs are contiguous. All this being said, I think comment is correct.

> > + * partition the guc_id space. We believe the number of multi-lrc contexts 
> > in
> > + * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids 
> > for
> > + * multi-lrc.
> > + */
> > +#define NUMBER_MULTI_LRC_GUC_ID(guc) \
> > +   ((guc)->submission_state.num_guc_ids / 16 > 32 ? \
> > +(guc)->submission_state.num_guc_ids / 16 : 32)
> > +
> >   /*
> >* Below is a set of functions which control the GuC scheduling state 
> > which
> >* require a lock.
> > @@ -1176,6 +1188,10 @@ int intel_guc_submission_init(struct intel_guc *guc)
> > INIT_LIST_HEAD(>submission_state.destroyed_contexts);
> > intel_gt_pm_unpark_work_init(>submission_state.destroyed_worker,
> >  destroyed_worker_func);
> > +   guc->submission_state.guc_ids_bitmap =
> > +   bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
> > +   if (!guc->submission_state.guc_ids_bitmap)
> > +   return -ENOMEM;
> > return 0;
> >   }
> > @@ -1188,6 +1204,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
> > guc_lrc_desc_pool_destroy(guc);
> > guc_flush_destroyed_contexts(guc);
> > i915_sched_engine_put(guc->sched_engine);
> > +   bitmap_free(guc->submission_state.guc_ids_bitmap);
> >   }
> >   static void queue_request(struct i915_sched_engine *sched_engine,
> > @@ -1239,18 +1256,43 @@ static void guc_submit_request(struct i915_request 
> > *rq)
> > spin_unlock_irqrestore(_engine->lock, flags);
> >   }
> > -static int new_guc_id(struct intel_guc *guc)
> > +static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
> >   {
> > -   return ida_simple_get(>submission_state.guc_ids, 0,
> > - guc->submission_state.num_guc_ids, GFP_KERNEL |
> > -

Re: [Intel-gfx] [PATCH 14/27] drm/i915/guc: Assign contexts in parent-child relationship consecutive guc_ids

2021-09-15 Thread John Harrison

On 8/20/2021 15:44, Matthew Brost wrote:

Assign contexts in parent-child relationship consecutive guc_ids. This
is accomplished by partitioning guc_id space between ones that need to
be consecutive (1/16 available guc_ids) and ones that do not (15/16 of
available guc_ids). The consecutive search is implemented via the bitmap
API.

This is a precursor to the full GuC multi-lrc implementation but aligns
to how GuC mutli-lrc interface is defined - guc_ids must be consecutive
when using the GuC multi-lrc interface.

v2:
  (Daniel Vetter)
   - Explictly state why we assign consecutive guc_ids

Signed-off-by: Matthew Brost 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc.h|   6 +-
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 107 +-
  2 files changed, 86 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 023953e77553..3f95b1b4f15c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -61,9 +61,13 @@ struct intel_guc {
 */
spinlock_t lock;
/**
-* @guc_ids: used to allocate new guc_ids
+* @guc_ids: used to allocate new guc_ids, single-lrc
 */
struct ida guc_ids;
+   /**
+* @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc
+*/
+   unsigned long *guc_ids_bitmap;
/** @num_guc_ids: number of guc_ids that can be used */
u32 num_guc_ids;
/** @max_guc_ids: max number of guc_ids that can be used */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 00d54bb00bfb..e9dfd43d29a0 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -125,6 +125,18 @@ guc_create_virtual(struct intel_engine_cs **siblings, 
unsigned int count);
  
  #define GUC_REQUEST_SIZE 64 /* bytes */
  
+/*

+ * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
+ * per the GuC submission interface. A different allocation algorithm is used
+ * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
The 'hence' clause seems to be attached to the wrong reason. The id 
space is partition because of the contiguous vs random requirements of 
multi vs single LRC, not because a different allocator is used in one 
partion vs the other.



+ * partition the guc_id space. We believe the number of multi-lrc contexts in
+ * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
+ * multi-lrc.
+ */
+#define NUMBER_MULTI_LRC_GUC_ID(guc) \
+   ((guc)->submission_state.num_guc_ids / 16 > 32 ? \
+(guc)->submission_state.num_guc_ids / 16 : 32)
+
  /*
   * Below is a set of functions which control the GuC scheduling state which
   * require a lock.
@@ -1176,6 +1188,10 @@ int intel_guc_submission_init(struct intel_guc *guc)
INIT_LIST_HEAD(>submission_state.destroyed_contexts);
intel_gt_pm_unpark_work_init(>submission_state.destroyed_worker,
 destroyed_worker_func);
+   guc->submission_state.guc_ids_bitmap =
+   bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
+   if (!guc->submission_state.guc_ids_bitmap)
+   return -ENOMEM;
  
  	return 0;

  }
@@ -1188,6 +1204,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
guc_lrc_desc_pool_destroy(guc);
guc_flush_destroyed_contexts(guc);
i915_sched_engine_put(guc->sched_engine);
+   bitmap_free(guc->submission_state.guc_ids_bitmap);
  }
  
  static void queue_request(struct i915_sched_engine *sched_engine,

@@ -1239,18 +1256,43 @@ static void guc_submit_request(struct i915_request *rq)
spin_unlock_irqrestore(_engine->lock, flags);
  }
  
-static int new_guc_id(struct intel_guc *guc)

+static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
  {
-   return ida_simple_get(>submission_state.guc_ids, 0,
- guc->submission_state.num_guc_ids, GFP_KERNEL |
- __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+   int ret;
+
+   GEM_BUG_ON(intel_context_is_child(ce));
+
+   if (intel_context_is_parent(ce))
+   ret = 
bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
+ NUMBER_MULTI_LRC_GUC_ID(guc),
+ 
order_base_2(ce->guc_number_children
+  + 1));
+   else
+   ret = ida_simple_get(>submission_state.guc_ids,
+NUMBER_MULTI_LRC_GUC_ID(guc),
+guc->submission_state.num_guc_ids,
+

[PATCH 14/27] drm/i915/guc: Assign contexts in parent-child relationship consecutive guc_ids

2021-08-20 Thread Matthew Brost
Assign contexts in parent-child relationship consecutive guc_ids. This
is accomplished by partitioning guc_id space between ones that need to
be consecutive (1/16 available guc_ids) and ones that do not (15/16 of
available guc_ids). The consecutive search is implemented via the bitmap
API.

This is a precursor to the full GuC multi-lrc implementation but aligns
to how GuC mutli-lrc interface is defined - guc_ids must be consecutive
when using the GuC multi-lrc interface.

v2:
 (Daniel Vetter)
  - Explictly state why we assign consecutive guc_ids

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|   6 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 107 +-
 2 files changed, 86 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 023953e77553..3f95b1b4f15c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -61,9 +61,13 @@ struct intel_guc {
 */
spinlock_t lock;
/**
-* @guc_ids: used to allocate new guc_ids
+* @guc_ids: used to allocate new guc_ids, single-lrc
 */
struct ida guc_ids;
+   /**
+* @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc
+*/
+   unsigned long *guc_ids_bitmap;
/** @num_guc_ids: number of guc_ids that can be used */
u32 num_guc_ids;
/** @max_guc_ids: max number of guc_ids that can be used */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 00d54bb00bfb..e9dfd43d29a0 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -125,6 +125,18 @@ guc_create_virtual(struct intel_engine_cs **siblings, 
unsigned int count);
 
 #define GUC_REQUEST_SIZE 64 /* bytes */
 
+/*
+ * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
+ * per the GuC submission interface. A different allocation algorithm is used
+ * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
+ * partition the guc_id space. We believe the number of multi-lrc contexts in
+ * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
+ * multi-lrc.
+ */
+#define NUMBER_MULTI_LRC_GUC_ID(guc) \
+   ((guc)->submission_state.num_guc_ids / 16 > 32 ? \
+(guc)->submission_state.num_guc_ids / 16 : 32)
+
 /*
  * Below is a set of functions which control the GuC scheduling state which
  * require a lock.
@@ -1176,6 +1188,10 @@ int intel_guc_submission_init(struct intel_guc *guc)
INIT_LIST_HEAD(>submission_state.destroyed_contexts);
intel_gt_pm_unpark_work_init(>submission_state.destroyed_worker,
 destroyed_worker_func);
+   guc->submission_state.guc_ids_bitmap =
+   bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
+   if (!guc->submission_state.guc_ids_bitmap)
+   return -ENOMEM;
 
return 0;
 }
@@ -1188,6 +1204,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
guc_lrc_desc_pool_destroy(guc);
guc_flush_destroyed_contexts(guc);
i915_sched_engine_put(guc->sched_engine);
+   bitmap_free(guc->submission_state.guc_ids_bitmap);
 }
 
 static void queue_request(struct i915_sched_engine *sched_engine,
@@ -1239,18 +1256,43 @@ static void guc_submit_request(struct i915_request *rq)
spin_unlock_irqrestore(_engine->lock, flags);
 }
 
-static int new_guc_id(struct intel_guc *guc)
+static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
 {
-   return ida_simple_get(>submission_state.guc_ids, 0,
- guc->submission_state.num_guc_ids, GFP_KERNEL |
- __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+   int ret;
+
+   GEM_BUG_ON(intel_context_is_child(ce));
+
+   if (intel_context_is_parent(ce))
+   ret = 
bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
+ NUMBER_MULTI_LRC_GUC_ID(guc),
+ 
order_base_2(ce->guc_number_children
+  + 1));
+   else
+   ret = ida_simple_get(>submission_state.guc_ids,
+NUMBER_MULTI_LRC_GUC_ID(guc),
+guc->submission_state.num_guc_ids,
+GFP_KERNEL | __GFP_RETRY_MAYFAIL |
+__GFP_NOWARN);
+   if (unlikely(ret < 0))
+   return ret;
+
+   ce->guc_id.id = ret;
+   return 0;
 }
 
 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
 {
+