Re: [PATCH v3] drm/i915/guc: Use context hints for GT frequency
On Mon, Mar 04, 2024 at 03:34:50PM -0800, Vinay Belgaumkar wrote: > Allow user to provide a low latency context hint. When set, KMD > sends a hint to GuC which results in special handling for this > context. SLPC will ramp the GT frequency aggressively every time > it switches to this context. The down freq threshold will also be > lower so GuC will ramp down the GT freq for this context more slowly. > We also disable waitboost for this context as that will interfere with > the strategy. > > We need to enable the use of SLPC Compute strategy during init, but > it will apply only to contexts that set this bit during context > creation. > > Userland can check whether this feature is supported using a new param- > I915_PARAM_HAS_CONTEXT_FREQ_HINTS. This flag is true for all guc submission > enabled platforms as they use SLPC for frequency management. > > The Mesa usage model for this flag is here - > https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint > > v2: Rename flags as per review suggestions (Rodrigo, Tvrtko). > Also, use flag bits in intel_context as it allows finer control for > toggling per engine if needed (Tvrtko). > > v3: Minor review comments (Tvrtko) > > Cc: Rodrigo Vivi > Cc: Tvrtko Ursulin > Cc: Sushma Venkatesh Reddy > Acked-by: Rodrigo Vivi Reviewed-by: Rodrigo Vivi > Signed-off-by: Vinay Belgaumkar > --- > drivers/gpu/drm/i915/gem/i915_gem_context.c | 16 -- > .../gpu/drm/i915/gem/i915_gem_context_types.h | 1 + > drivers/gpu/drm/i915/gt/intel_context_types.h | 1 + > drivers/gpu/drm/i915/gt/intel_rps.c | 4 > .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 +++ > drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 17 +++ > drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 1 + > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 6 ++ > drivers/gpu/drm/i915/i915_getparam.c | 6 ++ > include/uapi/drm/i915_drm.h | 15 + > 10 files changed, 86 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c > b/drivers/gpu/drm/i915/gem/i915_gem_context.c > index dcbfe32fd30c..81f65cab1330 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > @@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct > drm_i915_file_private *fpriv, > struct i915_gem_proto_context *pc, > struct drm_i915_gem_context_param *args) > { > + struct drm_i915_private *i915 = fpriv->i915; > int ret = 0; > > switch (args->param) { > @@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct > drm_i915_file_private *fpriv, > pc->user_flags &= ~BIT(UCONTEXT_BANNABLE); > break; > > + case I915_CONTEXT_PARAM_LOW_LATENCY: > + if (intel_uc_uses_guc_submission(_gt(i915)->uc)) > + pc->user_flags |= BIT(UCONTEXT_LOW_LATENCY); > + else > + ret = -EINVAL; > + break; > + > case I915_CONTEXT_PARAM_RECOVERABLE: > if (args->size) > ret = -EINVAL; > @@ -992,6 +1000,9 @@ static int intel_context_set_gem(struct intel_context > *ce, > if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS)) > ret = intel_context_reconfigure_sseu(ce, sseu); > > + if (test_bit(UCONTEXT_LOW_LATENCY, >user_flags)) > + __set_bit(CONTEXT_LOW_LATENCY, >flags); > + > return ret; > } > > @@ -1630,6 +1641,9 @@ i915_gem_create_context(struct drm_i915_private *i915, > if (vm) > ctx->vm = vm; > > + /* Assign early so intel_context_set_gem can access these flags */ > + ctx->user_flags = pc->user_flags; > + > mutex_init(>engines_mutex); > if (pc->num_user_engines >= 0) { > i915_gem_context_set_user_engines(ctx); > @@ -1652,8 +1666,6 @@ i915_gem_create_context(struct drm_i915_private *i915, >* is no remap info, it will be a NOP. */ > ctx->remap_slice = ALL_L3_SLICES(i915); > > - ctx->user_flags = pc->user_flags; > - > for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) > ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h > b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h > index 03bc7f9d191b..b6d97da63d1f 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h > @@ -338,6 +338,7 @@ struct i915_gem_context { > #define UCONTEXT_BANNABLE2 > #define UCONTEXT_RECOVERABLE 3 > #define UCONTEXT_PERSISTENCE 4 > +#define UCONTEXT_LOW_LATENCY 5 > > /** >* @flags: small set of booleans > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h >
Re: [PATCH v3] drm/i915/guc: Use context hints for GT frequency
On Mon, Mar 4, 2024, at 3:34 PM, Vinay Belgaumkar wrote: > Allow user to provide a low latency context hint. When set, KMD > sends a hint to GuC which results in special handling for this > context. SLPC will ramp the GT frequency aggressively every time > it switches to this context. The down freq threshold will also be > lower so GuC will ramp down the GT freq for this context more slowly. > We also disable waitboost for this context as that will interfere with > the strategy. > > We need to enable the use of SLPC Compute strategy during init, but > it will apply only to contexts that set this bit during context > creation. > > Userland can check whether this feature is supported using a new param- > I915_PARAM_HAS_CONTEXT_FREQ_HINTS. This flag is true for all guc submission > enabled platforms as they use SLPC for frequency management. > > The Mesa usage model for this flag is here - > https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint > > v2: Rename flags as per review suggestions (Rodrigo, Tvrtko). > Also, use flag bits in intel_context as it allows finer control for > toggling per engine if needed (Tvrtko). > > v3: Minor review comments (Tvrtko) > > Cc: Rodrigo Vivi > Cc: Tvrtko Ursulin > Cc: Sushma Venkatesh Reddy > Acked-by: Rodrigo Vivi > Signed-off-by: Vinay Belgaumkar > --- Acked-by: Ivan Briano
[PATCH v3] drm/i915/guc: Use context hints for GT frequency
Allow user to provide a low latency context hint. When set, KMD sends a hint to GuC which results in special handling for this context. SLPC will ramp the GT frequency aggressively every time it switches to this context. The down freq threshold will also be lower so GuC will ramp down the GT freq for this context more slowly. We also disable waitboost for this context as that will interfere with the strategy. We need to enable the use of SLPC Compute strategy during init, but it will apply only to contexts that set this bit during context creation. Userland can check whether this feature is supported using a new param- I915_PARAM_HAS_CONTEXT_FREQ_HINTS. This flag is true for all guc submission enabled platforms as they use SLPC for frequency management. The Mesa usage model for this flag is here - https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint v2: Rename flags as per review suggestions (Rodrigo, Tvrtko). Also, use flag bits in intel_context as it allows finer control for toggling per engine if needed (Tvrtko). v3: Minor review comments (Tvrtko) Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: Sushma Venkatesh Reddy Acked-by: Rodrigo Vivi Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 16 -- .../gpu/drm/i915/gem/i915_gem_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_rps.c | 4 .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 17 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 1 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 6 ++ drivers/gpu/drm/i915/i915_getparam.c | 6 ++ include/uapi/drm/i915_drm.h | 15 + 10 files changed, 86 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index dcbfe32fd30c..81f65cab1330 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, struct i915_gem_proto_context *pc, struct drm_i915_gem_context_param *args) { + struct drm_i915_private *i915 = fpriv->i915; int ret = 0; switch (args->param) { @@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, pc->user_flags &= ~BIT(UCONTEXT_BANNABLE); break; + case I915_CONTEXT_PARAM_LOW_LATENCY: + if (intel_uc_uses_guc_submission(_gt(i915)->uc)) + pc->user_flags |= BIT(UCONTEXT_LOW_LATENCY); + else + ret = -EINVAL; + break; + case I915_CONTEXT_PARAM_RECOVERABLE: if (args->size) ret = -EINVAL; @@ -992,6 +1000,9 @@ static int intel_context_set_gem(struct intel_context *ce, if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS)) ret = intel_context_reconfigure_sseu(ce, sseu); + if (test_bit(UCONTEXT_LOW_LATENCY, >user_flags)) + __set_bit(CONTEXT_LOW_LATENCY, >flags); + return ret; } @@ -1630,6 +1641,9 @@ i915_gem_create_context(struct drm_i915_private *i915, if (vm) ctx->vm = vm; + /* Assign early so intel_context_set_gem can access these flags */ + ctx->user_flags = pc->user_flags; + mutex_init(>engines_mutex); if (pc->num_user_engines >= 0) { i915_gem_context_set_user_engines(ctx); @@ -1652,8 +1666,6 @@ i915_gem_create_context(struct drm_i915_private *i915, * is no remap info, it will be a NOP. */ ctx->remap_slice = ALL_L3_SLICES(i915); - ctx->user_flags = pc->user_flags; - for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 03bc7f9d191b..b6d97da63d1f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -338,6 +338,7 @@ struct i915_gem_context { #define UCONTEXT_BANNABLE 2 #define UCONTEXT_RECOVERABLE 3 #define UCONTEXT_PERSISTENCE 4 +#define UCONTEXT_LOW_LATENCY 5 /** * @flags: small set of booleans diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 7eccbd70d89f..ed95a7b57cbb 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -130,6 +130,7 @@ struct intel_context { #define CONTEXT_PERMA_PIN 11