Re: [PATCH 2/2] drm/i915/guc: Extend w/a 14019159160
On 6/21/2024 5:46 PM, john.c.harri...@intel.com wrote: From: John Harrison There is a new part to an existing workaround, so enable that piece as well. v2: Extend even further. v3: Drop DG2 as there are CI failures still to resolve. Also re-order the parameters to a function to reduce excessive line wrapping. LGTM, Reviewed-by: Vinay Belgaumkar Signed-off-by: John Harrison --- drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c| 18 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h index 37ff539a6963d..0c709e6c15be7 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h @@ -107,6 +107,7 @@ enum { enum { GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE = 0x9001, GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED = 0x9002, + GUC_WORKAROUND_KLV_AVOID_GFX_CLEAR_WHILE_ACTIVE = 0x9006, }; #endif /* _ABI_GUC_KLVS_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index f1fe5f9054538..46fabbfc775e0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -815,8 +815,7 @@ guc_capture_prep_lists(struct intel_guc *guc) return PAGE_ALIGN(total_size); } -static void guc_waklv_enable_simple(struct intel_guc *guc, - u32 klv_id, u32 *offset, u32 *remain) +static void guc_waklv_enable_simple(struct intel_guc *guc, u32 *offset, u32 *remain, u32 klv_id) { u32 size; u32 klv_entry[] = { @@ -850,19 +849,20 @@ static void guc_waklv_init(struct intel_guc *guc) remain = guc_ads_waklv_size(guc); /* Wa_14019159160 */ - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) - guc_waklv_enable_simple(guc, - GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE, - , ); + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) { + guc_waklv_enable_simple(guc, , , + GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE); + guc_waklv_enable_simple(guc, , , + GUC_WORKAROUND_KLV_AVOID_GFX_CLEAR_WHILE_ACTIVE); + } /* Wa_16021333562 */ if ((GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 21, 1)) && (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_MEDIA_GT_IP_RANGE(gt, IP_VER(13, 0), IP_VER(13, 0)) || IS_DG2(gt->i915))) - guc_waklv_enable_simple(guc, - GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED, - , ); + guc_waklv_enable_simple(guc, , , + GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED); size = guc_ads_waklv_size(guc) - remain; if (!size)
Re: [PATCH 1/2] drm/i915/arl: Enable Wa_14019159160 for ARL
On 6/21/2024 5:46 PM, john.c.harri...@intel.com wrote: From: John Harrison The context switch out workaround also applies to ARL. Signed-off-by: John Harrison LGTM, Reviewed-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 5e60a34692af8..097fc6bd1285e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -296,7 +296,7 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) /* Wa_16019325821 */ /* Wa_14019159160 */ - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) flags |= GUC_WA_RCS_CCS_SWITCHOUT; /* diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 7995f059f30df..f1fe5f9054538 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -850,7 +850,7 @@ static void guc_waklv_init(struct intel_guc *guc) remain = guc_ads_waklv_size(guc); /* Wa_14019159160 */ - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) guc_waklv_enable_simple(guc, GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE, , );
Re: [PATCH v2] drm/i915/guc: Use context hints for GT freq
On 2/28/2024 4:54 AM, Tvrtko Ursulin wrote: On 27/02/2024 23:51, Vinay Belgaumkar wrote: Allow user to provide a low latency context hint. When set, KMD sends a hint to GuC which results in special handling for this context. SLPC will ramp the GT frequency aggressively every time it switches to this context. The down freq threshold will also be lower so GuC will ramp down the GT freq for this context more slowly. We also disable waitboost for this context as that will interfere with the strategy. We need to enable the use of SLPC Compute strategy during init, but it will apply only to contexts that set this bit during context creation. Userland can check whether this feature is supported using a new param- I915_PARAM_HAS_CONTEXT_FREQ_HINTS. This flag is true for all guc submission enabled platforms as they use SLPC for frequency management. The Mesa usage model for this flag is here - https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint v2: Rename flags as per review suggestions (Rodrigo, Tvrtko). Also, use flag bits in intel_context as it allows finer control for toggling per engine if needed (Tvrtko). Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: Sushma Venkatesh Reddy Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 15 +++-- .../gpu/drm/i915/gem/i915_gem_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_rps.c | 5 + .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 17 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 1 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 6 ++ drivers/gpu/drm/i915/i915_getparam.c | 12 +++ include/uapi/drm/i915_drm.h | 15 + 10 files changed, 92 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index dcbfe32fd30c..0799cb0b2803 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, struct i915_gem_proto_context *pc, struct drm_i915_gem_context_param *args) { + struct drm_i915_private *i915 = fpriv->i915; int ret = 0; switch (args->param) { @@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, pc->user_flags &= ~BIT(UCONTEXT_BANNABLE); break; + case I915_CONTEXT_PARAM_LOW_LATENCY: + if (intel_uc_uses_guc_submission(_gt(i915)->uc)) + pc->user_flags |= BIT(UCONTEXT_LOW_LATENCY); + else + ret = -EINVAL; + break; + case I915_CONTEXT_PARAM_RECOVERABLE: if (args->size) ret = -EINVAL; @@ -992,6 +1000,9 @@ static int intel_context_set_gem(struct intel_context *ce, if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS)) ret = intel_context_reconfigure_sseu(ce, sseu); + if (test_bit(UCONTEXT_LOW_LATENCY, >user_flags)) + set_bit(CONTEXT_LOW_LATENCY, >flags); Does not need to be atomic so can use __set_bit as higher up in the function. ok. + return ret; } @@ -1630,6 +1641,8 @@ i915_gem_create_context(struct drm_i915_private *i915, if (vm) ctx->vm = vm; + ctx->user_flags = pc->user_flags; + Given how most ctx->something assignments are at the bottom of the function I would stick a comment here saying along the lines of "assign early for intel_context_set_gem called when creating engines". ok. mutex_init(>engines_mutex); if (pc->num_user_engines >= 0) { i915_gem_context_set_user_engines(ctx); @@ -1652,8 +1665,6 @@ i915_gem_create_context(struct drm_i915_private *i915, * is no remap info, it will be a NOP. */ ctx->remap_slice = ALL_L3_SLICES(i915); - ctx->user_flags = pc->user_flags; - for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 03bc7f9d191b..b6d97da63d1f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -338,6 +338,7 @@ struct i915_gem_context { #define UCONTEXT_BANNABLE 2 #define UCONTEXT_RECOVERABLE 3 #define UCONTEXT_PERSISTENCE 4 +#define UCONTEXT_LOW_LATENCY 5 /** * @flags: small set of booleans diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 7eccbd70d89f..ed95a7b57cbb 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++
Re: [PATCH] drm/i915/guc: Add Compute context hint
On 2/23/2024 12:51 AM, Tvrtko Ursulin wrote: On 22/02/2024 23:31, Belgaumkar, Vinay wrote: On 2/22/2024 7:32 AM, Tvrtko Ursulin wrote: On 21/02/2024 21:28, Rodrigo Vivi wrote: On Wed, Feb 21, 2024 at 09:42:34AM +, Tvrtko Ursulin wrote: On 21/02/2024 00:14, Vinay Belgaumkar wrote: Allow user to provide a context hint. When this is set, KMD will send a hint to GuC which results in special handling for this context. SLPC will ramp the GT frequency aggressively every time it switches to this context. The down freq threshold will also be lower so GuC will ramp down the GT freq for this context more slowly. We also disable waitboost for this context as that will interfere with the strategy. We need to enable the use of Compute strategy during SLPC init, but it will apply only to contexts that set this bit during context creation. Userland can check whether this feature is supported using a new param- I915_PARAM_HAS_COMPUTE_CONTEXT. This flag is true for all guc submission enabled platforms since they use SLPC for freq management. The Mesa usage model for this flag is here - https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint This allows for setting it for the whole application, correct? Upsides, downsides? Are there any plans for per context? Currently there's no extension on a high level API (Vulkan/OpenGL/OpenCL/etc) that would allow the application to hint for power/freq/latency. So Mesa cannot decide when to hint. So their solution was to use .drirc and make per-application decision. I would prefer a high level extension for a more granular and informative decision. We need to work with that goal, but for now I don't see any cons on this approach. In principle yeah I doesn't harm to have the option. I am just not sure how useful this intermediate step this is with its lack of intra-process granularity. Cc: Rodrigo Vivi Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 8 +++ .../gpu/drm/i915/gem/i915_gem_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_rps.c | 8 +++ .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 17 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 1 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 7 +++ drivers/gpu/drm/i915/i915_getparam.c | 11 ++ include/uapi/drm/i915_drm.h | 15 + 9 files changed, 89 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index dcbfe32fd30c..ceab7dbe9b47 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, struct i915_gem_proto_context *pc, struct drm_i915_gem_context_param *args) { + struct drm_i915_private *i915 = fpriv->i915; int ret = 0; switch (args->param) { @@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, pc->user_flags &= ~BIT(UCONTEXT_BANNABLE); break; + case I915_CONTEXT_PARAM_IS_COMPUTE: + if (!intel_uc_uses_guc_submission(_gt(i915)->uc)) + ret = -EINVAL; + else + pc->user_flags |= BIT(UCONTEXT_COMPUTE); + break; + case I915_CONTEXT_PARAM_RECOVERABLE: if (args->size) ret = -EINVAL; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 03bc7f9d191b..db86d6f6245f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -338,6 +338,7 @@ struct i915_gem_context { #define UCONTEXT_BANNABLE 2 #define UCONTEXT_RECOVERABLE 3 #define UCONTEXT_PERSISTENCE 4 +#define UCONTEXT_COMPUTE 5 What is the GuC behaviour when SLPC_CTX_FREQ_REQ_IS_COMPUTE is set for non-compute engines? Wondering if per intel_context is what we want instead. (Which could then be the i915_context_param_engines extension to mark individual contexts as compute strategy.) Perhaps we should rename this? This is a freq-decision-strategy inside GuC that is there mostly targeting compute workloads that needs lower latency with short burst execution. But the engine itself doesn't matter. It can be applied to any engine. I have no idea if it makes sense for other engines, such as video, and what would be pros and cons in terms of PnP. But in the case we end up allowing it on any engine, then at least userspace name shouldn't be compute. :) Yes, one of the suggestions from Daniele was to have something along the lines of UCONTEXT_HIFREQ or something along those lines so we don't confu
Re: [PATCH] drm/i915/guc: Add Compute context hint
On 2/22/2024 7:32 AM, Tvrtko Ursulin wrote: On 21/02/2024 21:28, Rodrigo Vivi wrote: On Wed, Feb 21, 2024 at 09:42:34AM +, Tvrtko Ursulin wrote: On 21/02/2024 00:14, Vinay Belgaumkar wrote: Allow user to provide a context hint. When this is set, KMD will send a hint to GuC which results in special handling for this context. SLPC will ramp the GT frequency aggressively every time it switches to this context. The down freq threshold will also be lower so GuC will ramp down the GT freq for this context more slowly. We also disable waitboost for this context as that will interfere with the strategy. We need to enable the use of Compute strategy during SLPC init, but it will apply only to contexts that set this bit during context creation. Userland can check whether this feature is supported using a new param- I915_PARAM_HAS_COMPUTE_CONTEXT. This flag is true for all guc submission enabled platforms since they use SLPC for freq management. The Mesa usage model for this flag is here - https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint This allows for setting it for the whole application, correct? Upsides, downsides? Are there any plans for per context? Currently there's no extension on a high level API (Vulkan/OpenGL/OpenCL/etc) that would allow the application to hint for power/freq/latency. So Mesa cannot decide when to hint. So their solution was to use .drirc and make per-application decision. I would prefer a high level extension for a more granular and informative decision. We need to work with that goal, but for now I don't see any cons on this approach. In principle yeah I doesn't harm to have the option. I am just not sure how useful this intermediate step this is with its lack of intra-process granularity. Cc: Rodrigo Vivi Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 8 +++ .../gpu/drm/i915/gem/i915_gem_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_rps.c | 8 +++ .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 17 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 1 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 7 +++ drivers/gpu/drm/i915/i915_getparam.c | 11 ++ include/uapi/drm/i915_drm.h | 15 + 9 files changed, 89 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index dcbfe32fd30c..ceab7dbe9b47 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, struct i915_gem_proto_context *pc, struct drm_i915_gem_context_param *args) { + struct drm_i915_private *i915 = fpriv->i915; int ret = 0; switch (args->param) { @@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, pc->user_flags &= ~BIT(UCONTEXT_BANNABLE); break; + case I915_CONTEXT_PARAM_IS_COMPUTE: + if (!intel_uc_uses_guc_submission(_gt(i915)->uc)) + ret = -EINVAL; + else + pc->user_flags |= BIT(UCONTEXT_COMPUTE); + break; + case I915_CONTEXT_PARAM_RECOVERABLE: if (args->size) ret = -EINVAL; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 03bc7f9d191b..db86d6f6245f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -338,6 +338,7 @@ struct i915_gem_context { #define UCONTEXT_BANNABLE 2 #define UCONTEXT_RECOVERABLE 3 #define UCONTEXT_PERSISTENCE 4 +#define UCONTEXT_COMPUTE 5 What is the GuC behaviour when SLPC_CTX_FREQ_REQ_IS_COMPUTE is set for non-compute engines? Wondering if per intel_context is what we want instead. (Which could then be the i915_context_param_engines extension to mark individual contexts as compute strategy.) Perhaps we should rename this? This is a freq-decision-strategy inside GuC that is there mostly targeting compute workloads that needs lower latency with short burst execution. But the engine itself doesn't matter. It can be applied to any engine. I have no idea if it makes sense for other engines, such as video, and what would be pros and cons in terms of PnP. But in the case we end up allowing it on any engine, then at least userspace name shouldn't be compute. :) Yes, one of the suggestions from Daniele was to have something along the lines of UCONTEXT_HIFREQ or something along those lines so we don't confuse it with the Compute Engine. Or if we decide to call it compute and only apply to compute engines, then I would strongly
Re: [PATCH] drm/i915/mtl: Wake GT before sending H2G message
On 1/18/2024 3:50 PM, Matt Roper wrote: On Thu, Jan 18, 2024 at 03:17:28PM -0800, Vinay Belgaumkar wrote: Instead of waiting until the interrupt reaches GuC, we can grab a forcewake while triggering the H2G interrupt. GEN11_GUC_HOST_INTERRUPT is inside an "always on" domain with respect to RC6. However, there A bit of a nitpick, but technically "always on" is a description of GT register ranges that never get powered down. GEN11_GUC_HOST_INTERRUPT isn't inside the GT at all, but rather is an sgunit register and thus isn't affected by forcewake. This is just a special case where the sgunit register forwards a message back to the GT's GuC, and the workaround wants us to make sure the GT is awake before that message gets there. True, can modify the description to reflect this. could be some delays when platform is entering/exiting some higher level platform sleep states and a H2G is triggered. A forcewake ensures those sleep states have been fully exited and further processing occurs as expected. Based on this description, is adding implicit forcewake to this register really enough? Implicit forcewake powers up before a read/write, but also allows it to power back down as soon as the MMIO operation is complete. If the GuC is a bit slow to notice the interrupt, then we could wind up with a sequence like - Driver grabs forcewake and GT powers up - Driver writes 0x1901f0 to trigger GuC interrupt - Driver releases forcewake and GT powers down - GuC notices interrupt (or maybe fails to notice it because the GT powered down before it had a chance to process it?) which I'm guessing isn't actually going to satisfy this workaround. Do we actually need to keep the GT awake not just through the register operation, but also through the GuC's processing of the interrupt? If so, then we probably want to do an explicit forcewake get/put to ensure the hardware stays powered up long enough. The issue being addressed here is not GT entering C6, but the higher platform sleep states. Once we force wake GT while writing to the H2G register, that should bring us out of sleep. After clearing the forcewake (which would happen after the write for 0x1901f0 goes through), we still have C6 hysteresis and the hysteresis counters for the higher platform sleep states which should give GuC enough time to process the interrupt before we enter C6 and then subsequently these higher sleep states. Thanks, Vinay. Matt This will have an official WA soon so adding a FIXME in the comments. Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/intel_uncore.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index dfefad5a5fec..121458a31886 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1800,7 +1800,10 @@ static const struct intel_forcewake_range __mtl_fw_ranges[] = { GEN_FW_RANGE(0x24000, 0x2, 0), /* 0x24000 - 0x2407f: always on 0x24080 - 0x2: reserved */ - GEN_FW_RANGE(0x3, 0x3, FORCEWAKE_GT) + GEN_FW_RANGE(0x3, 0x3, FORCEWAKE_GT), + GEN_FW_RANGE(0x4, 0x1901ec, 0), + GEN_FW_RANGE(0x1901f0, 0x1901f0, FORCEWAKE_GT) + /* FIXME: WA to wake GT while triggering H2G */ }; /* -- 2.38.1
Re: [Intel-gfx] [PATCH v2 1/4] drm/i915: Enable Wa_16019325821
On 10/27/2023 2:18 PM, john.c.harri...@intel.com wrote: From: John Harrison Some platforms require holding RCS context switches until CCS is idle (the reverse w/a of Wa_14014475959). Some platforms require both versions. Signed-off-by: John Harrison --- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 19 +++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 7 --- drivers/gpu/drm/i915/gt/uc/intel_guc.c| 4 drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 3 ++- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 7 ++- 5 files changed, 27 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 86a04afff64b3..9cccd60a5c41d 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -743,21 +743,23 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs) } /* Wa_14014475959:dg2 */ -#define CCS_SEMAPHORE_PPHWSP_OFFSET0x540 -static u32 ccs_semaphore_offset(struct i915_request *rq) +/* Wa_16019325821 */ +#define HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET 0x540 +static u32 hold_switchout_semaphore_offset(struct i915_request *rq) { return i915_ggtt_offset(rq->context->state) + - (LRC_PPHWSP_PN * PAGE_SIZE) + CCS_SEMAPHORE_PPHWSP_OFFSET; + (LRC_PPHWSP_PN * PAGE_SIZE) + HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET; } /* Wa_14014475959:dg2 */ -static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs) +/* Wa_16019325821 */ +static u32 *hold_switchout_emit_wa_busywait(struct i915_request *rq, u32 *cs) { int i; *cs++ = MI_ATOMIC_INLINE | MI_ATOMIC_GLOBAL_GTT | MI_ATOMIC_CS_STALL | MI_ATOMIC_MOVE; - *cs++ = ccs_semaphore_offset(rq); + *cs++ = hold_switchout_semaphore_offset(rq); *cs++ = 0; *cs++ = 1; @@ -773,7 +775,7 @@ static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs) MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_EQ_SDD; *cs++ = 0; - *cs++ = ccs_semaphore_offset(rq); + *cs++ = hold_switchout_semaphore_offset(rq); *cs++ = 0; return cs; @@ -790,8 +792,9 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) cs = gen12_emit_preempt_busywait(rq, cs); /* Wa_14014475959:dg2 */ - if (intel_engine_uses_wa_hold_ccs_switchout(rq->engine)) - cs = ccs_emit_wa_busywait(rq, cs); + /* Wa_16019325821 */ + if (intel_engine_uses_wa_hold_switchout(rq->engine)) + cs = hold_switchout_emit_wa_busywait(rq, cs); rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 8769760257fd9..f08739d020332 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -584,7 +584,7 @@ struct intel_engine_cs { #define I915_ENGINE_HAS_RCS_REG_STATE BIT(9) #define I915_ENGINE_HAS_EU_PRIORITYBIT(10) #define I915_ENGINE_FIRST_RENDER_COMPUTE BIT(11) -#define I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT BIT(12) +#define I915_ENGINE_USES_WA_HOLD_SWITCHOUT BIT(12) unsigned int flags; /* @@ -694,10 +694,11 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) } /* Wa_14014475959:dg2 */ +/* Wa_16019325821 */ static inline bool -intel_engine_uses_wa_hold_ccs_switchout(struct intel_engine_cs *engine) +intel_engine_uses_wa_hold_switchout(struct intel_engine_cs *engine) { - return engine->flags & I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; + return engine->flags & I915_ENGINE_USES_WA_HOLD_SWITCHOUT; } #endif /* __INTEL_ENGINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 3f3df1166b860..0e6c160de3315 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -294,6 +294,10 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) IS_DG2(gt->i915)) flags |= GUC_WA_HOLD_CCS_SWITCHOUT; + /* Wa_16019325821 */ + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) + flags |= GUC_WA_RCS_CCS_SWITCHOUT; + /* * Wa_14012197797 * Wa_22011391025 diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 8ae1846431da7..48863188a130e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -96,8 +96,9 @@ #define GUC_WA_GAM_CREDITS BIT(10) #define GUC_WA_DUAL_QUEUE BIT(11) #define GUC_WA_RCS_RESET_BEFORE_RC6 BIT(13) -#define GUC_WA_CONTEXT_ISOLATION BIT(15) #define GUC_WA_PRE_PARSER BIT(14) +#define
Re: [Intel-gfx] [PATCH v2 3/4] drm/i915/guc: Enable Wa_14019159160
On 10/27/2023 2:18 PM, john.c.harri...@intel.com wrote: From: John Harrison Use the new w/a KLV support to enable a MTL w/a. Note, this w/a is a super-set of Wa_16019325821, so requires turning that one as well as setting the new flag for Wa_14019159160 itself. Signed-off-by: John Harrison --- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 3 ++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 + drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h | 7 drivers/gpu/drm/i915/gt/uc/intel_guc.c| 1 + drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c| 34 ++- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 1 + 6 files changed, 38 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 9cccd60a5c41d..359b21fb02ab2 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -744,6 +744,7 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs) /* Wa_14014475959:dg2 */ /* Wa_16019325821 */ +/* Wa_14019159160 */ #define HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET0x540 static u32 hold_switchout_semaphore_offset(struct i915_request *rq) { @@ -753,6 +754,7 @@ static u32 hold_switchout_semaphore_offset(struct i915_request *rq) /* Wa_14014475959:dg2 */ /* Wa_16019325821 */ +/* Wa_14019159160 */ static u32 *hold_switchout_emit_wa_busywait(struct i915_request *rq, u32 *cs) { int i; @@ -793,6 +795,7 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) /* Wa_14014475959:dg2 */ /* Wa_16019325821 */ + /* Wa_14019159160 */ if (intel_engine_uses_wa_hold_switchout(rq->engine)) cs = hold_switchout_emit_wa_busywait(rq, cs); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index f08739d020332..3b4993955a4b6 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -695,6 +695,7 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) /* Wa_14014475959:dg2 */ /* Wa_16019325821 */ +/* Wa_14019159160 */ static inline bool intel_engine_uses_wa_hold_switchout(struct intel_engine_cs *engine) { diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h index 58012edd4eb0e..bebf28e3c4794 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h @@ -101,4 +101,11 @@ enum { GUC_CONTEXT_POLICIES_KLV_NUM_IDS = 5, }; +/* + * Workaround keys: + */ +enum { + GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE = 0x9001, +}; + #endif /* _ABI_GUC_KLVS_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 0e6c160de3315..6252f32d67011 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -295,6 +295,7 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) flags |= GUC_WA_HOLD_CCS_SWITCHOUT; /* Wa_16019325821 */ + /* Wa_14019159160 */ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) flags |= GUC_WA_RCS_CCS_SWITCHOUT; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 251e7a7a05cb8..8f7298cbbc322 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -810,6 +810,25 @@ guc_capture_prep_lists(struct intel_guc *guc) return PAGE_ALIGN(total_size); } +/* Wa_14019159160 */ +static u32 guc_waklv_ra_mode(struct intel_guc *guc, u32 offset, u32 remain) +{ + u32 size; + u32 klv_entry[] = { + /* 16:16 key/length */ + FIELD_PREP(GUC_KLV_0_KEY, GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE) | + FIELD_PREP(GUC_KLV_0_LEN, 0), + /* 0 dwords data */ + }; + + size = sizeof(klv_entry); + GEM_BUG_ON(remain < size); + + iosys_map_memcpy_to(>ads_map, offset, klv_entry, size); + + return size; +} + static void guc_waklv_init(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -825,15 +844,12 @@ static void guc_waklv_init(struct intel_guc *guc) offset = guc_ads_waklv_offset(guc); remain = guc_ads_waklv_size(guc); - /* -* Add workarounds here: -* -* if (want_wa_) { -* size = guc_waklv_(guc, offset, remain); -* offset += size; -* remain -= size; -* } -*/ + /* Wa_14019159160 */ + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) { + size = guc_waklv_ra_mode(guc, offset, remain); + offset += size; + remain -= size; + } size =
Re: [Intel-gfx] [PATCH v2 2/4] drm/i915/guc: Add support for w/a KLVs
On 10/27/2023 2:18 PM, john.c.harri...@intel.com wrote: From: John Harrison To prevent running out of bits, new w/a enable flags are being added via a KLV system instead of a 32 bit flags word. Signed-off-by: John Harrison --- .../gpu/drm/i915/gt/uc/abi/guc_errors_abi.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc.h| 2 + drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c| 73 ++- drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 6 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 5 +- 5 files changed, 85 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h index dabeaf4f245f3..00d6402333f8e 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h @@ -36,6 +36,7 @@ enum intel_guc_load_status { INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START, INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID = 0x73, INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID = 0x74, + INTEL_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR= 0x75, INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END, INTEL_GUC_LOAD_STATUS_READY= 0xF0, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 2b6dfe62c8f2a..4113776ff3e19 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -198,6 +198,8 @@ struct intel_guc { struct guc_mmio_reg *ads_regset; /** @ads_golden_ctxt_size: size of the golden contexts in the ADS */ u32 ads_golden_ctxt_size; + /** @ads_waklv_size: size of workaround KLVs */ + u32 ads_waklv_size; /** @ads_capture_size: size of register lists in the ADS used for error capture */ u32 ads_capture_size; /** @ads_engine_usage_size: size of engine usage in the ADS */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 63724e17829a7..251e7a7a05cb8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -46,6 +46,10 @@ * +---+ * | padding | * +---+ <== 4K aligned + * | w/a KLVs | + * +---+ + * | padding | + * +---+ <== 4K aligned * | capture lists | * +---+ * | padding | @@ -88,6 +92,11 @@ static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc) return PAGE_ALIGN(guc->ads_golden_ctxt_size); } +static u32 guc_ads_waklv_size(struct intel_guc *guc) +{ + return PAGE_ALIGN(guc->ads_waklv_size); +} + static u32 guc_ads_capture_size(struct intel_guc *guc) { return PAGE_ALIGN(guc->ads_capture_size); @@ -113,7 +122,7 @@ static u32 guc_ads_golden_ctxt_offset(struct intel_guc *guc) return PAGE_ALIGN(offset); } -static u32 guc_ads_capture_offset(struct intel_guc *guc) +static u32 guc_ads_waklv_offset(struct intel_guc *guc) { u32 offset; @@ -123,6 +132,16 @@ static u32 guc_ads_capture_offset(struct intel_guc *guc) return PAGE_ALIGN(offset); } +static u32 guc_ads_capture_offset(struct intel_guc *guc) +{ + u32 offset; + + offset = guc_ads_waklv_offset(guc) + +guc_ads_waklv_size(guc); + + return PAGE_ALIGN(offset); +} + static u32 guc_ads_private_data_offset(struct intel_guc *guc) { u32 offset; @@ -791,6 +810,49 @@ guc_capture_prep_lists(struct intel_guc *guc) return PAGE_ALIGN(total_size); } +static void guc_waklv_init(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + u32 offset, addr_ggtt, remain, size; + + if (!intel_uc_uses_guc_submission(>uc)) + return; + + if (GUC_FIRMWARE_VER(guc) < MAKE_GUC_VER(70, 10, 0)) + return; + + GEM_BUG_ON(iosys_map_is_null(>ads_map)); + offset = guc_ads_waklv_offset(guc); + remain = guc_ads_waklv_size(guc); + + /* +* Add workarounds here: +* +* if (want_wa_) { +* size = guc_waklv_(guc, offset, remain); +* offset += size; +* remain -= size; +* } +*/ + + size = guc_ads_waklv_size(guc) - remain; + if (!size) + return; + + offset = guc_ads_waklv_offset(guc); + addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; + + ads_blob_write(guc, ads.wa_klv_addr_lo, addr_ggtt); + ads_blob_write(guc, ads.wa_klv_addr_hi, 0); + ads_blob_write(guc,
Re: [Intel-gfx] [PATCH] drm/i915: Read a shadowed mmio register for ggtt flush
On 11/9/2023 12:35 PM, Ville Syrjälä wrote: On Thu, Nov 09, 2023 at 12:01:26PM -0800, Belgaumkar, Vinay wrote: On 11/9/2023 11:30 AM, Ville Syrjälä wrote: On Thu, Nov 09, 2023 at 11:21:48AM -0800, Vinay Belgaumkar wrote: We read RENDER_HEAD as a part of the flush. If GT is in deeper sleep states, this could lead to read errors since we are not using a forcewake. Safer to read a shadowed register instead. IIRC shadowing is only thing for writes, not reads. Sure, but reading from a shadowed register does return the cached value Does it? I suppose that would make some sense, but I don't recall that ever being stated anywhere. At least before the shadow registers existed reads would just give you zeroes when not awake. (even though we don't care about the vakue here). When GT is in deeper sleep states, it is better to read a shadowed (cached) value instead of trying to attempt an mmio register read without a force wake anyways. So you're saying reads from non-shadowed registers fails somehow when not awake? How exactly do they fail? And when reading from a shadowed register that failure never happens? We could hit problems like the one being addressed here - https://patchwork.freedesktop.org/series/125356/. Reading from a shadowed register will avoid any needless references(without a wake) to the MMIO space. Shouldn't hurt to make this change for all gens IMO. Thanks, Vinay. Thanks, Vinay. Cc: John Harrison Cc: Daniele Ceraolo Spurio Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/intel_gt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index ed32bf5b1546..ea814ea5f700 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -451,7 +451,7 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt) spin_lock_irqsave(>lock, flags); intel_uncore_posting_read_fw(uncore, -RING_HEAD(RENDER_RING_BASE)); +RING_TAIL(RENDER_RING_BASE)); spin_unlock_irqrestore(>lock, flags); } } -- 2.38.1
Re: [Intel-gfx] [PATCH] drm/i915: Read a shadowed mmio register for ggtt flush
On 11/9/2023 11:30 AM, Ville Syrjälä wrote: On Thu, Nov 09, 2023 at 11:21:48AM -0800, Vinay Belgaumkar wrote: We read RENDER_HEAD as a part of the flush. If GT is in deeper sleep states, this could lead to read errors since we are not using a forcewake. Safer to read a shadowed register instead. IIRC shadowing is only thing for writes, not reads. Sure, but reading from a shadowed register does return the cached value (even though we don't care about the vakue here). When GT is in deeper sleep states, it is better to read a shadowed (cached) value instead of trying to attempt an mmio register read without a force wake anyways. Thanks, Vinay. Cc: John Harrison Cc: Daniele Ceraolo Spurio Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/intel_gt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index ed32bf5b1546..ea814ea5f700 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -451,7 +451,7 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt) spin_lock_irqsave(>lock, flags); intel_uncore_posting_read_fw(uncore, -RING_HEAD(RENDER_RING_BASE)); +RING_TAIL(RENDER_RING_BASE)); spin_unlock_irqrestore(>lock, flags); } } -- 2.38.1
Re: [Intel-gfx] [PATCH] drm/i915/mtl: Don't set PIPE_CONTROL_FLUSH_L3
On 10/16/2023 4:24 PM, John Harrison wrote: On 10/16/2023 15:55, Vinay Belgaumkar wrote: This bit does not cause an explicit L3 flush. We already use At all? Or only on newer hardware? And as a genuine spec change or as a bug / workaround? If the hardware has re-purposed the bit then it is probably worth at least adding a comment to the bit definition to say that it is only valid up to IP version 12.70. At this point, this is a bug on MTL since this bit is not related to L3 flushes as per spec. Regarding older platforms, still checking the reason why this was added (i.e if it fixed something and will regress if removed). If not, we can extend the change for others as well in a separate patch. On older platforms, this bit seems to cause an implicit flush at best. PIPE_CONTROL_DC_FLUSH_ENABLE for that purpose. Cc: Nirmoy Das Cc: Mikka Kuoppala Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index ba4c2422b340..abbc02f3e66e 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -247,6 +247,7 @@ static int mtl_dummy_pipe_control(struct i915_request *rq) int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) { struct intel_engine_cs *engine = rq->engine; + struct intel_gt *gt = rq->engine->gt; /* * On Aux CCS platforms the invalidation of the Aux @@ -278,7 +279,8 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) * deals with Protected Memory which is not needed for * AUX CCS invalidation and lead to unwanted side effects. */ - if (mode & EMIT_FLUSH) + if ((mode & EMIT_FLUSH) && + !(IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71 Why stop at 12.71? Is the meaning only changed for 12.70 and the old/correct version will be restored in later hardware? Was trying to keep this limited to MTL for now until the above statements are verified. Thanks, Vinay. John. bit_group_1 |= PIPE_CONTROL_FLUSH_L3; bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH; @@ -812,12 +814,14 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) u32 flags = (PIPE_CONTROL_CS_STALL | PIPE_CONTROL_TLB_INVALIDATE | PIPE_CONTROL_TILE_CACHE_FLUSH | - PIPE_CONTROL_FLUSH_L3 | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_FLUSH_ENABLE); + if (!(IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71 + flags |= PIPE_CONTROL_FLUSH_L3; + /* Wa_14016712196 */ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915)) /* dummy PIPE_CONTROL + depth flush */
Re: [Intel-gfx] [PATCH 3/4] drm/i915/guc: Add support for w/a KLVs
On 9/15/2023 2:55 PM, john.c.harri...@intel.com wrote: From: John Harrison To prevent running out of bits, new w/a enable flags are being added via a KLV system instead of a 32 bit flags word. Signed-off-by: John Harrison --- .../gpu/drm/i915/gt/uc/abi/guc_errors_abi.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc.h| 3 + drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c| 64 ++- drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 6 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 5 +- 5 files changed, 77 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h index dabeaf4f245f3..00d6402333f8e 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h @@ -36,6 +36,7 @@ enum intel_guc_load_status { INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START, INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID = 0x73, INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID = 0x74, + INTEL_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR= 0x75, INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END, INTEL_GUC_LOAD_STATUS_READY= 0xF0, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 6c392bad29c19..3b1fc5f96306b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -186,6 +186,8 @@ struct intel_guc { struct guc_mmio_reg *ads_regset; /** @ads_golden_ctxt_size: size of the golden contexts in the ADS */ u32 ads_golden_ctxt_size; + /** @ads_waklv_size: size of workaround KLVs */ + u32 ads_waklv_size; /** @ads_capture_size: size of register lists in the ADS used for error capture */ u32 ads_capture_size; /** @ads_engine_usage_size: size of engine usage in the ADS */ @@ -295,6 +297,7 @@ struct intel_guc { #define MAKE_GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat)) #define MAKE_GUC_VER_STRUCT(ver) MAKE_GUC_VER((ver).major, (ver).minor, (ver).patch) #define GUC_SUBMIT_VER(guc) MAKE_GUC_VER_STRUCT((guc)->submission_version) +#define GUC_FIRMWARE_VER(guc) MAKE_GUC_VER_STRUCT((guc)->fw.file_selected.ver) static inline struct intel_guc *log_to_guc(struct intel_guc_log *log) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 63724e17829a7..792910af3a481 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -46,6 +46,10 @@ * +---+ * | padding | * +---+ <== 4K aligned + * | w/a KLVs | + * +---+ + * | padding | + * +---+ <== 4K aligned * | capture lists | * +---+ * | padding | @@ -88,6 +92,11 @@ static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc) return PAGE_ALIGN(guc->ads_golden_ctxt_size); } +static u32 guc_ads_waklv_size(struct intel_guc *guc) +{ + return PAGE_ALIGN(guc->ads_waklv_size); +} + static u32 guc_ads_capture_size(struct intel_guc *guc) { return PAGE_ALIGN(guc->ads_capture_size); @@ -113,7 +122,7 @@ static u32 guc_ads_golden_ctxt_offset(struct intel_guc *guc) return PAGE_ALIGN(offset); } -static u32 guc_ads_capture_offset(struct intel_guc *guc) +static u32 guc_ads_waklv_offset(struct intel_guc *guc) { u32 offset; @@ -123,6 +132,16 @@ static u32 guc_ads_capture_offset(struct intel_guc *guc) return PAGE_ALIGN(offset); } +static u32 guc_ads_capture_offset(struct intel_guc *guc) +{ + u32 offset; + + offset = guc_ads_waklv_offset(guc) + +guc_ads_waklv_size(guc); + + return PAGE_ALIGN(offset); +} + static u32 guc_ads_private_data_offset(struct intel_guc *guc) { u32 offset; @@ -791,6 +810,40 @@ guc_capture_prep_lists(struct intel_guc *guc) return PAGE_ALIGN(total_size); } +static void guc_waklv_init(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + u32 offset, addr_ggtt, remain, size; + + if (!intel_uc_uses_guc_submission(>uc)) + return; + + if (GUC_FIRMWARE_VER(guc) < MAKE_GUC_VER(70, 10, 0)) + return; should this be <= ? + + GEM_BUG_ON(iosys_map_is_null(>ads_map)); + offset = guc_ads_waklv_offset(guc); + remain = guc_ads_waklv_size(guc); + + /* Add workarounds here */ + extra blank line? + size = guc_ads_waklv_size(guc) - remain;
Re: [Intel-gfx] [PATCH 2/4] drm/i915: Enable Wa_16019325821
On 9/15/2023 2:55 PM, john.c.harri...@intel.com wrote: From: John Harrison Some platforms require holding RCS context switches until CCS is idle (the reverse w/a of Wa_14014475959). Some platforms require both versions. Signed-off-by: John Harrison --- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 19 +++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 7 --- drivers/gpu/drm/i915/gt/uc/intel_guc.c| 4 drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 3 ++- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 8 +++- 5 files changed, 28 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 0143445dba830..8b494825c55f2 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -733,21 +733,23 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs) } /* Wa_14014475959:dg2 */ -#define CCS_SEMAPHORE_PPHWSP_OFFSET0x540 -static u32 ccs_semaphore_offset(struct i915_request *rq) +/* Wa_16019325821 */ +#define HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET 0x540 +static u32 hold_switchout_semaphore_offset(struct i915_request *rq) { return i915_ggtt_offset(rq->context->state) + - (LRC_PPHWSP_PN * PAGE_SIZE) + CCS_SEMAPHORE_PPHWSP_OFFSET; + (LRC_PPHWSP_PN * PAGE_SIZE) + HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET; } /* Wa_14014475959:dg2 */ -static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs) +/* Wa_16019325821 */ +static u32 *hold_switchout_emit_wa_busywait(struct i915_request *rq, u32 *cs) { int i; *cs++ = MI_ATOMIC_INLINE | MI_ATOMIC_GLOBAL_GTT | MI_ATOMIC_CS_STALL | MI_ATOMIC_MOVE; - *cs++ = ccs_semaphore_offset(rq); + *cs++ = hold_switchout_semaphore_offset(rq); *cs++ = 0; *cs++ = 1; @@ -763,7 +765,7 @@ static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs) MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_EQ_SDD; *cs++ = 0; - *cs++ = ccs_semaphore_offset(rq); + *cs++ = hold_switchout_semaphore_offset(rq); *cs++ = 0; return cs; @@ -780,8 +782,9 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) cs = gen12_emit_preempt_busywait(rq, cs); /* Wa_14014475959:dg2 */ - if (intel_engine_uses_wa_hold_ccs_switchout(rq->engine)) - cs = ccs_emit_wa_busywait(rq, cs); + /* Wa_16019325821 */ + if (intel_engine_uses_wa_hold_switchout(rq->engine)) + cs = hold_switchout_emit_wa_busywait(rq, cs); rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index a7e6775980043..68fe1cef9cd94 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -573,7 +573,7 @@ struct intel_engine_cs { #define I915_ENGINE_HAS_RCS_REG_STATE BIT(9) #define I915_ENGINE_HAS_EU_PRIORITYBIT(10) #define I915_ENGINE_FIRST_RENDER_COMPUTE BIT(11) -#define I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT BIT(12) +#define I915_ENGINE_USES_WA_HOLD_SWITCHOUT BIT(12) unsigned int flags; /* @@ -683,10 +683,11 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) } /* Wa_14014475959:dg2 */ +/* Wa_16019325821 */ static inline bool -intel_engine_uses_wa_hold_ccs_switchout(struct intel_engine_cs *engine) +intel_engine_uses_wa_hold_switchout(struct intel_engine_cs *engine) { - return engine->flags & I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; + return engine->flags & I915_ENGINE_USES_WA_HOLD_SWITCHOUT; } #endif /* __INTEL_ENGINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 27df41c53b890..4001679ba0793 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -294,6 +294,10 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) IS_DG2(gt->i915)) flags |= GUC_WA_HOLD_CCS_SWITCHOUT; + /* Wa_16019325821 */ + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) + flags |= GUC_WA_RCS_CCS_SWITCHOUT; + /* * Wa_14012197797 * Wa_22011391025 diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index b4d56eccfb1f0..f97af0168a66b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -95,8 +95,9 @@ #define GUC_WA_GAM_CREDITS BIT(10) #define GUC_WA_DUAL_QUEUE BIT(11) #define GUC_WA_RCS_RESET_BEFORE_RC6 BIT(13) -#define GUC_WA_CONTEXT_ISOLATION BIT(15) #define GUC_WA_PRE_PARSER BIT(14) +#define
Re: [Intel-gfx] [PATCH 2/2] drm/i915/guc: Enable WA 14018913170
On 9/14/2023 3:28 PM, john.c.harri...@intel.com wrote: From: Daniele Ceraolo Spurio The GuC handles the WA, the KMD just needs to set the flag to enable it on the appropriate platforms. Signed-off-by: John Harrison Signed-off-by: Daniele Ceraolo Spurio --- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 6 ++ drivers/gpu/drm/i915/gt/uc/intel_guc.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 1 + 3 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 27df41c53b890..3f3df1166b860 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -319,6 +319,12 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) if (!RCS_MASK(gt)) flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST; + /* Wa_14018913170 */ + if (GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 7, 0)) { + if (IS_DG2(gt->i915) || IS_METEORLAKE(gt->i915) || IS_PONTEVECCHIO(gt->i915)) + flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6; + } + return flags; LGTM, Reviewed-by: Vinay Belgaumkar } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 6c392bad29c19..818c8c146fd47 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -295,6 +295,7 @@ struct intel_guc { #define MAKE_GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat)) #define MAKE_GUC_VER_STRUCT(ver) MAKE_GUC_VER((ver).major, (ver).minor, (ver).patch) #define GUC_SUBMIT_VER(guc) MAKE_GUC_VER_STRUCT((guc)->submission_version) +#define GUC_FIRMWARE_VER(guc) MAKE_GUC_VER_STRUCT((guc)->fw.file_selected.ver) static inline struct intel_guc *log_to_guc(struct intel_guc_log *log) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index b4d56eccfb1f0..123ad75d2eb28 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -100,6 +100,7 @@ #define GUC_WA_HOLD_CCS_SWITCHOUT BIT(17) #define GUC_WA_POLLCS BIT(18) #define GUC_WA_RCS_REGS_IN_CCS_REGS_LISTBIT(21) +#define GUC_WA_ENABLE_TSC_CHECK_ON_RC6 BIT(22) #define GUC_CTL_FEATURE 2 #define GUC_CTL_ENABLE_SLPC BIT(2)
Re: [Intel-gfx] [PATCH] drm/i915/gem: Allow users to disable waitboost
On 9/21/2023 3:41 AM, Tvrtko Ursulin wrote: On 20/09/2023 22:56, Vinay Belgaumkar wrote: Provide a bit to disable waitboost while waiting on a gem object. Waitboost results in increased power consumption by requesting RP0 while waiting for the request to complete. Add a bit in the gem_wait() IOCTL where this can be disabled. This is related to the libva API change here - Link: https://github.com/XinfengZhang/libva/commit/3d90d18c67609a73121bb71b20ee4776b54b61a7 This link does not appear to lead to userspace code using this uapi? We have asked Carl (cc'd) to post a patch for the same. Cc: Rodrigo Vivi Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 9 ++--- drivers/gpu/drm/i915/i915_request.c | 3 ++- drivers/gpu/drm/i915/i915_request.h | 1 + include/uapi/drm/i915_drm.h | 1 + 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index d4b918fb11ce..955885ec859d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -72,7 +72,8 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, struct dma_fence *fence; long ret = timeout ?: 1; - i915_gem_object_boost(resv, flags); + if (!(flags & I915_WAITBOOST_DISABLE)) + i915_gem_object_boost(resv, flags); dma_resv_iter_begin(, resv, dma_resv_usage_rw(flags & I915_WAIT_ALL)); @@ -236,7 +237,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ktime_t start; long ret; - if (args->flags != 0) + if (args->flags != 0 || args->flags != I915_GEM_WAITBOOST_DISABLE) return -EINVAL; obj = i915_gem_object_lookup(file, args->bo_handle); @@ -248,7 +249,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | I915_WAIT_PRIORITY | - I915_WAIT_ALL, + I915_WAIT_ALL | + (args->flags & I915_GEM_WAITBOOST_DISABLE ? + I915_WAITBOOST_DISABLE : 0), to_wait_timeout(args->timeout_ns)); if (args->timeout_ns > 0) { diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index f59081066a19..2957409b4b2a 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -2044,7 +2044,8 @@ long i915_request_wait_timeout(struct i915_request *rq, * but at a cost of spending more power processing the workload * (bad for battery). */ - if (flags & I915_WAIT_PRIORITY && !i915_request_started(rq)) + if (!(flags & I915_WAITBOOST_DISABLE) && (flags & I915_WAIT_PRIORITY) && + !i915_request_started(rq)) intel_rps_boost(rq); wait.tsk = current; diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 0ac55b2e4223..3cc00e8254dc 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -445,6 +445,7 @@ long i915_request_wait(struct i915_request *rq, #define I915_WAIT_INTERRUPTIBLE BIT(0) #define I915_WAIT_PRIORITY BIT(1) /* small priority bump for the request */ #define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */ +#define I915_WAITBOOST_DISABLE BIT(3) /* used by i915_gem_object_wait() */ void i915_request_show(struct drm_printer *m, const struct i915_request *rq, diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 7000e5910a1d..4adee70e39cf 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1928,6 +1928,7 @@ struct drm_i915_gem_wait { /** Handle of BO we shall wait on */ __u32 bo_handle; __u32 flags; +#define I915_GEM_WAITBOOST_DISABLE (1u<<0) Probably would be good to avoid mentioning waitboost in the uapi since so far it wasn't an explicit feature/contract. Something like I915_GEM_WAIT_BACKGROUND_PRIORITY? Low priority? sure. I also wonder if there could be a possible angle to help Rob (+cc) upstream the syncobj/fence deadline code if our media driver might make use of that somehow. Like if either we could wire up the deadline into GEM_WAIT (in a backward compatible manner), or if media could use sync fd wait instead. Assuming they have an out fence already, which may not be true. Makes sense. We could add a SET_DEADLINE flag or something similar and pass in the deadline when appropriate. Thanks, Vinay. Regards, Tvrtko /** Number of nanoseconds to wait, Returns time remaining. */ __s64 timeout_ns; };
Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Restore efficient freq earlier
On 7/21/2023 3:08 PM, Belgaumkar, Vinay wrote: On 7/21/2023 2:23 PM, Rodrigo Vivi wrote: On Fri, Jul 21, 2023 at 01:44:34PM -0700, Belgaumkar, Vinay wrote: On 7/21/2023 1:41 PM, Rodrigo Vivi wrote: On Fri, Jul 21, 2023 at 11:03:49AM -0700, Vinay Belgaumkar wrote: This should be done before the soft min/max frequencies are restored. When we disable the "Ignore efficient frequency" flag, GuC does not actually bring the requested freq down to RPn. Specifically, this scenario- - ignore efficient freq set to true - reduce min to RPn (from efficient) - suspend - resume (includes GuC load, restore soft min/max, restore efficient freq) - validate min freq has been resored to RPn This will fail if we didn't first restore(disable, in this case) efficient freq flag before setting the soft min frequency. that's strange. so guc is returning the rpe when we request the min freq during the soft config? we could alternatively change the soft config to actually get the min and not be tricked by this. But also the patch below doesn't hurt. Reviewed-by: Rodrigo Vivi (Although I'm still curious and want to understand exactly why the soft min gets messed up when we don't tell guc to ignore the efficient freq beforehand. Please help me to understand.) The soft min does not get messed up, but GuC keeps requesting RPe even after disabling efficient freq. (unless we manually set min freq to RPn AFTER disabling efficient). so it looks to me that the right solution would be to ensure that everytime that we disable the efficient freq we make sure to also set the mim freq to RPn, no?! Hmm, may not be applicable every time. What if someone disables efficient frequency while running a workload or with frequency fixed to 800, for example? I'll take that back, it should not matter. GuC will not change it's request just because we switched min lower. I will resend the patch with the min setting as well. Thanks, Vinay. Thanks, Vinay. Thanks, Vinay. Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8736 Fixes: 55f9720dbf23 ("drm/i915/guc/slpc: Provide sysfs for efficient freq") Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index ee9f83af7cf6..f16dff7c3185 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -743,6 +743,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) intel_guc_pm_intrmsk_enable(slpc_to_gt(slpc)); + /* Set cached value of ignore efficient freq */ + intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq); + slpc_get_rp_values(slpc); /* Handle the case where min=max=RPmax */ @@ -765,9 +768,6 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) /* Set cached media freq ratio mode */ intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode); - /* Set cached value of ignore efficient freq */ - intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq); - return 0; } -- 2.38.1
Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Restore efficient freq earlier
On 7/21/2023 2:23 PM, Rodrigo Vivi wrote: On Fri, Jul 21, 2023 at 01:44:34PM -0700, Belgaumkar, Vinay wrote: On 7/21/2023 1:41 PM, Rodrigo Vivi wrote: On Fri, Jul 21, 2023 at 11:03:49AM -0700, Vinay Belgaumkar wrote: This should be done before the soft min/max frequencies are restored. When we disable the "Ignore efficient frequency" flag, GuC does not actually bring the requested freq down to RPn. Specifically, this scenario- - ignore efficient freq set to true - reduce min to RPn (from efficient) - suspend - resume (includes GuC load, restore soft min/max, restore efficient freq) - validate min freq has been resored to RPn This will fail if we didn't first restore(disable, in this case) efficient freq flag before setting the soft min frequency. that's strange. so guc is returning the rpe when we request the min freq during the soft config? we could alternatively change the soft config to actually get the min and not be tricked by this. But also the patch below doesn't hurt. Reviewed-by: Rodrigo Vivi (Although I'm still curious and want to understand exactly why the soft min gets messed up when we don't tell guc to ignore the efficient freq beforehand. Please help me to understand.) The soft min does not get messed up, but GuC keeps requesting RPe even after disabling efficient freq. (unless we manually set min freq to RPn AFTER disabling efficient). so it looks to me that the right solution would be to ensure that everytime that we disable the efficient freq we make sure to also set the mim freq to RPn, no?! Hmm, may not be applicable every time. What if someone disables efficient frequency while running a workload or with frequency fixed to 800, for example? Thanks, Vinay. Thanks, Vinay. Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8736 Fixes: 55f9720dbf23 ("drm/i915/guc/slpc: Provide sysfs for efficient freq") Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index ee9f83af7cf6..f16dff7c3185 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -743,6 +743,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) intel_guc_pm_intrmsk_enable(slpc_to_gt(slpc)); + /* Set cached value of ignore efficient freq */ + intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq); + slpc_get_rp_values(slpc); /* Handle the case where min=max=RPmax */ @@ -765,9 +768,6 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) /* Set cached media freq ratio mode */ intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode); - /* Set cached value of ignore efficient freq */ - intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq); - return 0; } -- 2.38.1
Re: [PATCH] drm/i915/guc/slpc: Restore efficient freq earlier
On 7/21/2023 1:41 PM, Rodrigo Vivi wrote: On Fri, Jul 21, 2023 at 11:03:49AM -0700, Vinay Belgaumkar wrote: This should be done before the soft min/max frequencies are restored. When we disable the "Ignore efficient frequency" flag, GuC does not actually bring the requested freq down to RPn. Specifically, this scenario- - ignore efficient freq set to true - reduce min to RPn (from efficient) - suspend - resume (includes GuC load, restore soft min/max, restore efficient freq) - validate min freq has been resored to RPn This will fail if we didn't first restore(disable, in this case) efficient freq flag before setting the soft min frequency. that's strange. so guc is returning the rpe when we request the min freq during the soft config? we could alternatively change the soft config to actually get the min and not be tricked by this. But also the patch below doesn't hurt. Reviewed-by: Rodrigo Vivi (Although I'm still curious and want to understand exactly why the soft min gets messed up when we don't tell guc to ignore the efficient freq beforehand. Please help me to understand.) The soft min does not get messed up, but GuC keeps requesting RPe even after disabling efficient freq. (unless we manually set min freq to RPn AFTER disabling efficient). Thanks, Vinay. Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8736 Fixes: 55f9720dbf23 ("drm/i915/guc/slpc: Provide sysfs for efficient freq") Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index ee9f83af7cf6..f16dff7c3185 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -743,6 +743,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) intel_guc_pm_intrmsk_enable(slpc_to_gt(slpc)); + /* Set cached value of ignore efficient freq */ + intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq); + slpc_get_rp_values(slpc); /* Handle the case where min=max=RPmax */ @@ -765,9 +768,6 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) /* Set cached media freq ratio mode */ intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode); - /* Set cached value of ignore efficient freq */ - intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq); - return 0; } -- 2.38.1
Re: [Intel-gfx] [PATCH] drm/i915/guc: Dump perf_limit_reasons for debug
On 6/26/2023 11:43 PM, Dixit, Ashutosh wrote: On Mon, 26 Jun 2023 21:02:14 -0700, Belgaumkar, Vinay wrote: On 6/26/2023 8:17 PM, Dixit, Ashutosh wrote: On Mon, 26 Jun 2023 19:12:18 -0700, Vinay Belgaumkar wrote: GuC load takes longer sometimes due to GT frequency not ramping up. Add perf_limit_reasons to the existing warn print to see if frequency is being throttled. Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 364d0d546ec8..73911536a8e7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -254,6 +254,8 @@ static int guc_wait_ucode(struct intel_guc *guc) guc_warn(guc, "excessive init time: %lldms! [freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d]\n", delta_ms, intel_rps_read_actual_frequency(>gt->rps), before_freq, status, count, ret); + guc_warn(guc, "perf limit reasons = 0x%08X\n", +intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt))); Maybe just add at the end of the previous guc_warn? Its already too long a line. If I try adding on the next line checkpatch complains about splitting double quotes. In these cases of long quoted lines we generally ignore checkpatch. Because perf limit reasons is part of the "excessive init time" message it should be on the same line within the square brackets. So should not be splitting double quotes. Another idea would be something like this: guc_warn(guc, "excessive init time: %lldms! [freq = %dMHz, before = %dMHz, status = 0x%08X]\n", delta_ms, intel_rps_read_actual_frequency(>gt->rps), before_freq, status); guc_warn(guc, "excessive init time: [count = %d, ret = %d, perf limit reasons = 0x%08X]\n", count, ret, intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt))); ok, I will split iut based on freq and non-freq based debug. Thanks, Vinay. Thanks. -- Ashutosh
Re: [Intel-gfx] [PATCH] drm/i915/guc: Dump perf_limit_reasons for debug
On 6/26/2023 8:17 PM, Dixit, Ashutosh wrote: On Mon, 26 Jun 2023 19:12:18 -0700, Vinay Belgaumkar wrote: GuC load takes longer sometimes due to GT frequency not ramping up. Add perf_limit_reasons to the existing warn print to see if frequency is being throttled. Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 364d0d546ec8..73911536a8e7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -254,6 +254,8 @@ static int guc_wait_ucode(struct intel_guc *guc) guc_warn(guc, "excessive init time: %lldms! [freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d]\n", delta_ms, intel_rps_read_actual_frequency(>gt->rps), before_freq, status, count, ret); + guc_warn(guc, "perf limit reasons = 0x%08X\n", +intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt))); Maybe just add at the end of the previous guc_warn? Its already too long a line. If I try adding on the next line checkpatch complains about splitting double quotes. Thanks, Vinay. } else { guc_dbg(guc, "init took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n", delta_ms, intel_rps_read_actual_frequency(>gt->rps), -- 2.38.1
Re: [PATCH] drm/i915/guc/slpc: Apply min softlimit correctly
On 6/13/2023 7:25 PM, Dixit, Ashutosh wrote: On Fri, 09 Jun 2023 15:02:52 -0700, Vinay Belgaumkar wrote: Hi Vinay, We were skipping when min_softlimit was equal to RPn. We need to apply it rergardless as efficient frequency will push the SLPC min to RPe. regardless This will break scenarios where user sets a min softlimit < RPe before reset and then performs a GT reset. Can you explain the reason for the patch clearly in terms of variables in the code, what variable has what value and what is the bug. I am not following from the above description. Hi Ashutosh, Scenario being fixed here is exactly the one in i915_pm_freq_api reset/suspend subtests (currently in review). Test sets min freq to RPn and then performs a reset. It then checks if cur_freq is RPn. Here's the sequence that shows the problem- RPLS:/home/gta# modprobe i915 RPLS:/home/gta# echo 1 > /sys/class/drm/card0/gt/gt0/slpc_ignore_eff_freq RPLS:/home/gta# echo 300 > /sys/class/drm/card0/gt_min_freq_mhz (RPn) RPLS:/home/gta# cat /sys/class/drm/card0/gt_cur_freq_mhz --> cur == RPn as expected 300 RPLS:/home/gta# echo 1 > /sys/kernel/debug/dri/0/gt0/reset --> reset RPLS:/home/gta# cat /sys/class/drm/card0/gt_min_freq_mhz --> shows the internal cached variable correctly 300 RPLS:/home/gta# cat /sys/class/drm/card0/gt_cur_freq_mhz --> actual freq being requested by SLPC (it's not RPn!!) 700 We need to sync up driver min freq value and SLPC min after a reset/suspend. Currently, we skip if the user had manually set min to RPn (this was an optimization we had before we enabled efficient freq usage). Thanks, Vinay. Thanks. -- Ashutosh Fixes: 95ccf312a1e4 ("drm/i915/guc/slpc: Allow SLPC to use efficient frequency") Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 01b75529311c..ee9f83af7cf6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -606,7 +606,7 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc) if (unlikely(ret)) return ret; slpc_to_gt(slpc)->defaults.min_freq = slpc->min_freq_softlimit; - } else if (slpc->min_freq_softlimit != slpc->min_freq) { + } else { return intel_guc_slpc_set_min_freq(slpc, slpc->min_freq_softlimit); } -- 2.38.1
Re: [Intel-gfx] [PATCH] drm/i915/pxp/mtl: intel_pxp_init_hw needs runtime-pm inside pm-complete
On 6/1/2023 8:59 AM, Alan Previn wrote: In the case of failed suspend flow or cases where the kernel does not go into full suspend but goes from suspend_prepare back to resume_complete, we get called for a pm_complete but without runtime_pm guaranteed. Thus, ensure we take the runtime_pm when calling intel_pxp_init_hw from within intel_pxp_resume_complete. LGTM, Reviewed-by: Vinay Belgaumkar Signed-off-by: Alan Previn --- drivers/gpu/drm/i915/pxp/intel_pxp_pm.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c index 1a04067f61fc..1d184dcd63c7 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c @@ -36,6 +36,8 @@ void intel_pxp_suspend(struct intel_pxp *pxp) void intel_pxp_resume_complete(struct intel_pxp *pxp) { + intel_wakeref_t wakeref; + if (!intel_pxp_is_enabled(pxp)) return; @@ -48,7 +50,8 @@ void intel_pxp_resume_complete(struct intel_pxp *pxp) if (!HAS_ENGINE(pxp->ctrl_gt, GSC0) && !pxp->pxp_component) return; - intel_pxp_init_hw(pxp); + with_intel_runtime_pm(>ctrl_gt->i915->runtime_pm, wakeref) + intel_pxp_init_hw(pxp); } void intel_pxp_runtime_suspend(struct intel_pxp *pxp) base-commit: a66da4c33d8ede541aea9ba6d0d73b556a072d54
Re: [Intel-gfx] [PATCH v2 2/2] drm/i915/guc: Dump error capture to dmesg on CTB error
On 4/18/2023 11:17 AM, john.c.harri...@intel.com wrote: From: John Harrison In the past, There have been sporadic CTB failures which proved hard to reproduce manually. The most effective solution was to dump the GuC log at the point of failure and let the CI system do the repro. It is preferable not to dump the GuC log via dmesg for all issues as it is not always necessary and is not helpful for end users. But rather than trying to re-invent the code to do this each time it is wanted, commit the code but for DEBUG_GUC builds only. v2: Use IS_ENABLED for testing config options. LGTM, Reviewed-by: Vinay Belgaumkar Signed-off-by: John Harrison --- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 53 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 6 +++ 2 files changed, 59 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 1803a633ed648..dc5cd712f1ff5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -13,6 +13,30 @@ #include "intel_guc_ct.h" #include "intel_guc_print.h" +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) +enum { + CT_DEAD_ALIVE = 0, + CT_DEAD_SETUP, + CT_DEAD_WRITE, + CT_DEAD_DEADLOCK, + CT_DEAD_H2G_HAS_ROOM, + CT_DEAD_READ, + CT_DEAD_PROCESS_FAILED, +}; + +static void ct_dead_ct_worker_func(struct work_struct *w); + +#define CT_DEAD(ct, reason)\ + do { \ + if (!(ct)->dead_ct_reported) { \ + (ct)->dead_ct_reason |= 1 << CT_DEAD_##reason; \ + queue_work(system_unbound_wq, &(ct)->dead_ct_worker); \ + } \ + } while (0) +#else +#define CT_DEAD(ct, reason)do { } while (0) +#endif + static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct) { return container_of(ct, struct intel_guc, ct); @@ -93,6 +117,9 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct) spin_lock_init(>requests.lock); INIT_LIST_HEAD(>requests.pending); INIT_LIST_HEAD(>requests.incoming); +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) + INIT_WORK(>dead_ct_worker, ct_dead_ct_worker_func); +#endif INIT_WORK(>requests.worker, ct_incoming_request_worker_func); tasklet_setup(>receive_tasklet, ct_receive_tasklet_func); init_waitqueue_head(>wq); @@ -319,11 +346,16 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct) ct->enabled = true; ct->stall_time = KTIME_MAX; +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) + ct->dead_ct_reported = false; + ct->dead_ct_reason = CT_DEAD_ALIVE; +#endif return 0; err_out: CT_PROBE_ERROR(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err)); + CT_DEAD(ct, SETUP); return err; } @@ -434,6 +466,7 @@ static int ct_write(struct intel_guc_ct *ct, corrupted: CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n", desc->head, desc->tail, desc->status); + CT_DEAD(ct, WRITE); ctb->broken = true; return -EPIPE; } @@ -504,6 +537,7 @@ static inline bool ct_deadlocked(struct intel_guc_ct *ct) CT_ERROR(ct, "Head: %u\n (Dwords)", ct->ctbs.recv.desc->head); CT_ERROR(ct, "Tail: %u\n (Dwords)", ct->ctbs.recv.desc->tail); + CT_DEAD(ct, DEADLOCK); ct->ctbs.send.broken = true; } @@ -552,6 +586,7 @@ static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw) head, ctb->size); desc->status |= GUC_CTB_STATUS_OVERFLOW; ctb->broken = true; + CT_DEAD(ct, H2G_HAS_ROOM); return false; } @@ -908,6 +943,7 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n", desc->head, desc->tail, desc->status); ctb->broken = true; + CT_DEAD(ct, READ); return -EPIPE; } @@ -1057,6 +1093,7 @@ static bool ct_process_incoming_requests(struct intel_guc_ct *ct) if (unlikely(err)) { CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n", ERR_PTR(err), 4 * request->size, request->msg); + CT_DEAD(ct, PROCESS_FAILED); ct_free_msg(request); } @@ -1233,3 +1270,19 @@ void intel_guc_ct_print_info(struct intel_guc_ct *ct, drm_printf(p, "Tail: %u\n", ct->ctbs.recv.desc->tail); } + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) +static void ct_dead_ct_worker_func(struct work_struct *w) +{ + struct intel_guc_ct *ct = container_of(w, struct intel_guc_ct, dead_ct_worker); + struct intel_guc *guc = ct_to_guc(ct); + + if (ct->dead_ct_reported) + return; + + ct->dead_ct_reported = true; + + guc_info(guc, "CTB is dead -
Re: [Intel-gfx] [PATCH v2 1/2] drm/i915: Dump error capture to kernel log
On 4/18/2023 11:17 AM, john.c.harri...@intel.com wrote: From: John Harrison This is useful for getting debug information out in certain situations, such as failing kernel selftests and CI runs that don't log error captures. It is especially useful for things like retrieving GuC logs as GuC operation can't be tracked by adding printk or ftrace entries. v2: Add CONFIG_DRM_I915_DEBUG_GEM wrapper (review feedback by Rodrigo). Do the CI sparse warnings hold water? With that looked at, LGTM, Reviewed-by: Vinay Belgaumkar Signed-off-by: John Harrison --- drivers/gpu/drm/i915/i915_gpu_error.c | 132 ++ drivers/gpu/drm/i915/i915_gpu_error.h | 10 ++ 2 files changed, 142 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index f020c0086fbcd..03d62c250c465 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -2219,3 +2219,135 @@ void i915_disable_error_state(struct drm_i915_private *i915, int err) i915->gpu_error.first_error = ERR_PTR(err); spin_unlock_irq(>gpu_error.lock); } + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) +void intel_klog_error_capture(struct intel_gt *gt, + intel_engine_mask_t engine_mask) +{ + static int g_count; + struct drm_i915_private *i915 = gt->i915; + struct i915_gpu_coredump *error; + intel_wakeref_t wakeref; + size_t buf_size = PAGE_SIZE * 128; + size_t pos_err; + char *buf, *ptr, *next; + int l_count = g_count++; + int line = 0; + + /* Can't allocate memory during a reset */ + if (test_bit(I915_RESET_BACKOFF, >reset.flags)) { + drm_err(>i915->drm, "[Capture/%d.%d] Inside GT reset, skipping error capture :(\n", + l_count, line++); + return; + } + + error = READ_ONCE(i915->gpu_error.first_error); + if (error) { + drm_err(>drm, "[Capture/%d.%d] Clearing existing error capture first...\n", + l_count, line++); + i915_reset_error_state(i915); + } + + with_intel_runtime_pm(>runtime_pm, wakeref) + error = i915_gpu_coredump(gt, engine_mask, CORE_DUMP_FLAG_NONE); + + if (IS_ERR(error)) { + drm_err(>drm, "[Capture/%d.%d] Failed to capture error capture: %ld!\n", + l_count, line++, PTR_ERR(error)); + return; + } + + buf = kvmalloc(buf_size, GFP_KERNEL); + if (!buf) { + drm_err(>drm, "[Capture/%d.%d] Failed to allocate buffer for error capture!\n", + l_count, line++); + i915_gpu_coredump_put(error); + return; + } + + drm_info(>drm, "[Capture/%d.%d] Dumping i915 error capture for %ps...\n", +l_count, line++, __builtin_return_address(0)); + + /* Largest string length safe to print via dmesg */ +# define MAX_CHUNK800 + + pos_err = 0; + while (1) { + ssize_t got = i915_gpu_coredump_copy_to_buffer(error, buf, pos_err, buf_size - 1); + + if (got <= 0) + break; + + buf[got] = 0; + pos_err += got; + + ptr = buf; + while (got > 0) { + size_t count; + char tag[2]; + + next = strnchr(ptr, got, '\n'); + if (next) { + count = next - ptr; + *next = 0; + tag[0] = '>'; + tag[1] = '<'; + } else { + count = got; + tag[0] = '}'; + tag[1] = '{'; + } + + if (count > MAX_CHUNK) { + size_t pos; + char *ptr2 = ptr; + + for (pos = MAX_CHUNK; pos < count; pos += MAX_CHUNK) { + char chr = ptr[pos]; + + ptr[pos] = 0; + drm_info(>drm, "[Capture/%d.%d] }%s{\n", +l_count, line++, ptr2); + ptr[pos] = chr; + ptr2 = ptr + pos; + + /* +* If spewing large amounts of data via a serial console, +* this can be a very slow process. So be friendly and try +* not to cause 'softlockup on CPU' problems. +*/ + cond_resched();
Re: [Intel-gfx] [PATCH v2 0/2] Add support for dumping error captures via kernel logging
On 4/18/2023 11:17 AM, john.c.harri...@intel.com wrote: From: John Harrison Sometimes, the only effective way to debug an issue is to dump all the interesting information at the point of failure. So add support for doing that. v2: Extra CONFIG wrapping (review feedback from Rodrigo) Signed-off-by: John Harrison series LGTM, Reviewed-by: Vinay Belgaumkar John Harrison (2): drm/i915: Dump error capture to kernel log drm/i915/guc: Dump error capture to dmesg on CTB error drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 53 + drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 6 + drivers/gpu/drm/i915/i915_gpu_error.c | 132 ++ drivers/gpu/drm/i915/i915_gpu_error.h | 10 ++ 4 files changed, 201 insertions(+)
Re: [PATCH] drm/i915/guc/slpc: Disable rps_boost debugfs
On 5/12/2023 5:39 PM, Dixit, Ashutosh wrote: On Fri, 12 May 2023 16:56:03 -0700, Vinay Belgaumkar wrote: Hi Vinay, rps_boost debugfs shows host turbo related info. This is not valid when SLPC is enabled. A couple of thoughts about this. It appears people are know only about rps_boost_info and don't know about guc_slpc_info? So: a. Instead of hiding the rps_boost_info file do we need to print there saying "SLPC is enabled, go look at guc_slpc_info"? rps_boost_info has an eval() function which disables the interface when RPS is OFF. This is indeed the case here, so shouldn't we just follow that instead of trying to link the two? b. Or, even just call guc_slpc_info_show from rps_boost_show (so the two files will show the same SLPC information)? slpc_info has a lot of other info like the SLPC state, not sure that matches up with the rps_boost_info name. Thanks, Vinay. Ashutosh guc_slpc_info already shows the number of boosts. Add num_waiters there as well and disable rps_boost when SLPC is enabled. Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/7632 Signed-off-by: Vinay Belgaumkar
Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Provide sysfs for efficient freq
On 4/17/2023 6:39 PM, Andi Shyti wrote: Hi Vinay, Looks good, just few minor comments below, [...] @@ -267,13 +267,11 @@ static int run_test(struct intel_gt *gt, int test_type) } /* -* Set min frequency to RPn so that we can test the whole -* range of RPn-RP0. This also turns off efficient freq -* usage and makes results more predictable. +* Turn off efficient freq so RPn/RP0 ranges are obeyed */ - err = slpc_set_min_freq(slpc, slpc->min_freq); + err = intel_guc_slpc_set_ignore_eff_freq(slpc, true); if (err) { - pr_err("Unable to update min freq!"); + pr_err("Unable to turn off efficient freq!"); drm_err()? or gt_err()? As we are here we can use a proper printing. How is this change related to the scope of this patch? The selftest was relying on setting min freq < RP1 to disable efficient freq, now that we have an interface, the test should use that (former method will not work). Should this be a separate patch? return err; } @@ -358,9 +356,10 @@ static int run_test(struct intel_gt *gt, int test_type) break; } - /* Restore min/max frequencies */ - slpc_set_max_freq(slpc, slpc_max_freq); + /* Restore min/max frequencies and efficient flag */ slpc_set_min_freq(slpc, slpc_min_freq); + slpc_set_max_freq(slpc, slpc_max_freq); + intel_guc_slpc_set_ignore_eff_freq(slpc, false); mmhhh... do we care here about the return value? I guess we should, will add. if (igt_flush_test(gt->i915)) err = -EIO; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 026d73855f36..b1b70ee3001b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -277,6 +277,7 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc) slpc->max_freq_softlimit = 0; slpc->min_freq_softlimit = 0; + slpc->ignore_eff_freq = false; slpc->min_is_rpmax = false; slpc->boost_freq = 0; @@ -457,6 +458,31 @@ int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val) return ret; } +int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret = 0; no need to initialize ret here. ok. + + mutex_lock(>lock); + wakeref = intel_runtime_pm_get(>runtime_pm); + + ret = slpc_set_param(slpc, +SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, +val); + if (ret) { + guc_probe_error(slpc_to_guc(slpc), "Failed to set efficient freq(%d): %pe\n", + val, ERR_PTR(ret)); + goto out; + } + + slpc->ignore_eff_freq = val; nit that you can ignore: if you put this under else and save brackets and a goto. ok. Thanks, Vinay. Andi
Re: [Intel-gfx] [PATCH v3] drm/i915/guc/slpc: Provide sysfs for efficient freq
On 4/14/2023 4:49 PM, Dixit, Ashutosh wrote: On Fri, 14 Apr 2023 15:34:15 -0700, Vinay Belgaumkar wrote: @@ -457,6 +458,34 @@ int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val) return ret; } +int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret = 0; + + /* Need a lock now since waitboost can be modifying min as well */ Delete comment. ok. + mutex_lock(>lock); Actually, don't need the lock itself now so delete the lock. Or, maybe the lock prevents the race if userspace writes to the sysfs when GuC reset is going on so let's retain the lock. But the comment is wrong. yup, ok. + wakeref = intel_runtime_pm_get(>runtime_pm); + + /* Ignore efficient freq if lower min freq is requested */ Delete comment, it's wrong. ok. + ret = slpc_set_param(slpc, +SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, +val); + if (ret) { + guc_probe_error(slpc_to_guc(slpc), "Failed to set efficient freq(%d): %pe\n", + val, ERR_PTR(ret)); + goto out; + } + + slpc->ignore_eff_freq = val; + This extra line can also be deleted. ok. +out: + intel_runtime_pm_put(>runtime_pm, wakeref); + mutex_unlock(>lock); + return ret; +} + /** * intel_guc_slpc_set_min_freq() - Set min frequency limit for SLPC. * @slpc: pointer to intel_guc_slpc. @@ -482,16 +511,6 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) mutex_lock(>lock); wakeref = intel_runtime_pm_get(>runtime_pm); - /* Ignore efficient freq if lower min freq is requested */ - ret = slpc_set_param(slpc, -SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, -val < slpc->rp1_freq); - if (ret) { - guc_probe_error(slpc_to_guc(slpc), "Failed to toggle efficient freq: %pe\n", - ERR_PTR(ret)); - goto out; - } - Great, thanks! After taking care of the above, and seems there are also a couple of checkpatch errors, this is: Reviewed-by: Ashutosh Dixit Thanks, Vinay.
Re: [PATCH] drm/i915/guc: Disable PL1 power limit when loading GuC firmware
On 3/24/2023 4:31 PM, Dixit, Ashutosh wrote: On Fri, 24 Mar 2023 11:15:02 -0700, Belgaumkar, Vinay wrote: Hi Vinay, Thanks for the review. Comments inline below. Sorry about asking the same questions all over again :) Didn't look at previous versions. On 3/15/2023 8:59 PM, Ashutosh Dixit wrote: On dGfx, the PL1 power limit being enabled and set to a low value results in a low GPU operating freq. It also negates the freq raise operation which is done before GuC firmware load. As a result GuC firmware load can time out. Such timeouts were seen in the GL #8062 bug below (where the PL1 power limit was enabled and set to a low value). Therefore disable the PL1 power limit when allowed by HW when loading GuC firmware. v3 label missing in subject. v2: - Take mutex (to disallow writes to power1_max) across GuC reset/fw load - Add hwm_power_max_restore to error return code path v3 (Jani N): - Add/remove explanatory comments - Function renames - Type corrections - Locking annotation Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062 Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 9 +++ drivers/gpu/drm/i915/i915_hwmon.c | 39 +++ drivers/gpu/drm/i915/i915_hwmon.h | 7 + 3 files changed, 55 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 4ccb4be4c9cba..aa8e35a5636a0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -18,6 +18,7 @@ #include "intel_uc.h" #include "i915_drv.h" +#include "i915_hwmon.h" static const struct intel_uc_ops uc_ops_off; static const struct intel_uc_ops uc_ops_on; @@ -461,6 +462,7 @@ static int __uc_init_hw(struct intel_uc *uc) struct intel_guc *guc = >guc; struct intel_huc *huc = >huc; int ret, attempts; + bool pl1en; Init to 'false' here See next comment. GEM_BUG_ON(!intel_uc_supports_guc(uc)); GEM_BUG_ON(!intel_uc_wants_guc(uc)); @@ -491,6 +493,9 @@ static int __uc_init_hw(struct intel_uc *uc) else attempts = 1; +/* Disable a potentially low PL1 power limit to allow freq to be raised */ + i915_hwmon_power_max_disable(gt->i915, ); + intel_rps_raise_unslice(_to_gt(uc)->rps); while (attempts--) { @@ -547,6 +552,8 @@ static int __uc_init_hw(struct intel_uc *uc) intel_rps_lower_unslice(_to_gt(uc)->rps); } +i915_hwmon_power_max_restore(gt->i915, pl1en); + guc_info(guc, "submission %s\n", str_enabled_disabled(intel_uc_uses_guc_submission(uc))); guc_info(guc, "SLPC %s\n", str_enabled_disabled(intel_uc_uses_guc_slpc(uc))); @@ -563,6 +570,8 @@ static int __uc_init_hw(struct intel_uc *uc) /* Return GT back to RPn */ intel_rps_lower_unslice(_to_gt(uc)->rps); +i915_hwmon_power_max_restore(gt->i915, pl1en); if (pl1en) i915_hwmon_power_max_enable(). IMO it's better not to have checks in the main __uc_init_hw() function (if we do this we'll need to add 2 checks in __uc_init_hw()). If you really want we could do something like this inside i915_hwmon_power_max_disable/i915_hwmon_power_max_restore. But for now I am not making any changes. ok. (I can send a patch with the changes if you want to take a look but IMO it will add more logic/code but without real benefits (it will save a rmw if the limit was already disabled, but IMO this code is called so infrequently (only during GuC resets) as to not have any significant impact)). + __uc_sanitize(uc); if (!ret) { diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index ee63a8fd88fc1..769b5bda4d53f 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -444,6 +444,45 @@ hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val) } } +void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old) Shouldn't we call this i915_hwmon_package_pl1_disable()? I did think of using "pl1" in the function name but then decided to retain "power_max" because other hwmon functions for PL1 limit also use "power_max" (hwm_power_max_read/hwm_power_max_write) and currently "hwmon_power_max" is mapped to the PL1 limit. So "power_max" is used to show that all these functions deal with the PL1 power limit. There is a comment in __uc_init_hw() explaining "power_max" means the PL1 power limit. ok. + __acquires(i915->hwmon->hwmon_lock) +{ + struct i915_hwmon *hwmon = i915->hwmon; + intel_wakeref_t wakeref; + u32 r; + + if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) + return; + + /* Take mu
Re: [PATCH] drm/i915/guc: Disable PL1 power limit when loading GuC firmware
On 3/15/2023 8:59 PM, Ashutosh Dixit wrote: On dGfx, the PL1 power limit being enabled and set to a low value results in a low GPU operating freq. It also negates the freq raise operation which is done before GuC firmware load. As a result GuC firmware load can time out. Such timeouts were seen in the GL #8062 bug below (where the PL1 power limit was enabled and set to a low value). Therefore disable the PL1 power limit when allowed by HW when loading GuC firmware. v3 label missing in subject. v2: - Take mutex (to disallow writes to power1_max) across GuC reset/fw load - Add hwm_power_max_restore to error return code path v3 (Jani N): - Add/remove explanatory comments - Function renames - Type corrections - Locking annotation Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062 Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 9 +++ drivers/gpu/drm/i915/i915_hwmon.c | 39 +++ drivers/gpu/drm/i915/i915_hwmon.h | 7 + 3 files changed, 55 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 4ccb4be4c9cba..aa8e35a5636a0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -18,6 +18,7 @@ #include "intel_uc.h" #include "i915_drv.h" +#include "i915_hwmon.h" static const struct intel_uc_ops uc_ops_off; static const struct intel_uc_ops uc_ops_on; @@ -461,6 +462,7 @@ static int __uc_init_hw(struct intel_uc *uc) struct intel_guc *guc = >guc; struct intel_huc *huc = >huc; int ret, attempts; + bool pl1en; Init to 'false' here GEM_BUG_ON(!intel_uc_supports_guc(uc)); GEM_BUG_ON(!intel_uc_wants_guc(uc)); @@ -491,6 +493,9 @@ static int __uc_init_hw(struct intel_uc *uc) else attempts = 1; + /* Disable a potentially low PL1 power limit to allow freq to be raised */ + i915_hwmon_power_max_disable(gt->i915, ); + intel_rps_raise_unslice(_to_gt(uc)->rps); while (attempts--) { @@ -547,6 +552,8 @@ static int __uc_init_hw(struct intel_uc *uc) intel_rps_lower_unslice(_to_gt(uc)->rps); } + i915_hwmon_power_max_restore(gt->i915, pl1en); + guc_info(guc, "submission %s\n", str_enabled_disabled(intel_uc_uses_guc_submission(uc))); guc_info(guc, "SLPC %s\n", str_enabled_disabled(intel_uc_uses_guc_slpc(uc))); @@ -563,6 +570,8 @@ static int __uc_init_hw(struct intel_uc *uc) /* Return GT back to RPn */ intel_rps_lower_unslice(_to_gt(uc)->rps); + i915_hwmon_power_max_restore(gt->i915, pl1en); if (pl1en) i915_hwmon_power_max_enable(). + __uc_sanitize(uc); if (!ret) { diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index ee63a8fd88fc1..769b5bda4d53f 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -444,6 +444,45 @@ hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val) } } +void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old) Shouldn't we call this i915_hwmon_package_pl1_disable()? + __acquires(i915->hwmon->hwmon_lock) +{ + struct i915_hwmon *hwmon = i915->hwmon; + intel_wakeref_t wakeref; + u32 r; + + if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) + return; + + /* Take mutex to prevent concurrent hwm_power_max_write */ + mutex_lock(>hwmon_lock); + + with_intel_runtime_pm(hwmon->ddat.uncore->rpm, wakeref) + r = intel_uncore_rmw(hwmon->ddat.uncore, +hwmon->rg.pkg_rapl_limit, +PKG_PWR_LIM_1_EN, 0); Most of this code (lock and rmw parts) is already inside static void hwm_locked_with_pm_intel_uncore_rmw() , can we reuse that here? + + *old = !!(r & PKG_PWR_LIM_1_EN); +} + +void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old) + __releases(i915->hwmon->hwmon_lock) We can just call this i915_hwmon_power_max_enable() and call whenever the old value was actually enabled. That way, we have proper mirror functions. +{ + struct i915_hwmon *hwmon = i915->hwmon; + intel_wakeref_t wakeref; + + if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) + return; + + with_intel_runtime_pm(hwmon->ddat.uncore->rpm, wakeref) + intel_uncore_rmw(hwmon->ddat.uncore, +hwmon->rg.pkg_rapl_limit, +PKG_PWR_LIM_1_EN, +old ? PKG_PWR_LIM_1_EN : 0); 3rd param should be 0 here, else we will end up clearing other bits. Thanks, Vinay. + + mutex_unlock(>hwmon_lock); +} + static umode_t hwm_energy_is_visible(const struct hwm_drvdata *ddat, u32 attr) { diff --git
Re: [PATCH 3/3] drm/i915/pmu: Use common freq functions with sysfs
On 3/7/2023 9:33 PM, Ashutosh Dixit wrote: Using common freq functions with sysfs in PMU (but without taking forcewake) solves the following issues (a) missing support for MTL (b) For the requested_freq, we read it only if actual_freq is zero below (meaning, GT is in C6). So then what is the point of reading it without a force wake? It will also be zero, correct? Thanks, Vinay. missing support for older generation (prior to Gen6) (c) missing support for slpc when freq sampling has to fall back to requested freq. It also makes the PMU code future proof where sometimes code has been updated for sysfs and PMU has been missed. Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/gt/intel_rps.c | 10 -- drivers/gpu/drm/i915/gt/intel_rps.h | 1 - drivers/gpu/drm/i915/i915_pmu.c | 10 -- 3 files changed, 4 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 49df31927c0e..b03bfbe7ee23 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2046,16 +2046,6 @@ void intel_rps_sanitize(struct intel_rps *rps) rps_disable_interrupts(rps); } -u32 intel_rps_read_rpstat_fw(struct intel_rps *rps) -{ - struct drm_i915_private *i915 = rps_to_i915(rps); - i915_reg_t rpstat; - - rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1; - - return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat); -} - u32 intel_rps_read_rpstat(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index a990f985ab23..60ae27679011 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -53,7 +53,6 @@ u32 intel_rps_get_rp1_frequency(struct intel_rps *rps); u32 intel_rps_get_rpn_frequency(struct intel_rps *rps); u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps); u32 intel_rps_read_rpstat(struct intel_rps *rps); -u32 intel_rps_read_rpstat_fw(struct intel_rps *rps); void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps); void intel_rps_raise_unslice(struct intel_rps *rps); void intel_rps_lower_unslice(struct intel_rps *rps); diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index a76c5ce9513d..1a4c9fed257c 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -392,14 +392,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) * case we assume the system is running at the intended * frequency. Fortunately, the read should rarely fail! */ - val = intel_rps_read_rpstat_fw(rps); - if (val) - val = intel_rps_get_cagf(rps, val); - else - val = rps->cur_freq; + val = intel_rps_read_actual_frequency_fw(rps); + if (!val) + val = intel_rps_get_requested_frequency_fw(rps), add_sample_mult(>sample[__I915_SAMPLE_FREQ_ACT], - intel_gpu_freq(rps, val), period_ns / 1000); + val, period_ns / 1000); } if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) {
Re: [Intel-gfx] [PATCH] drm/i915/gsc: Fix the Driver-FLR completion
On 2/22/2023 1:01 PM, Alan Previn wrote: The Driver-FLR flow may inadvertently exit early before the full completion of the re-init of the internal HW state if we only poll GU_DEBUG Bit31 (polling for it to toggle from 0 -> 1). Instead we need a two-step completion wait-for-completion flow that also involves GU_CNTL. See the patch and new code comments for detail. This is new direction from HW architecture folks. v2: - Add error message for the teardown timeout (Anshuman) - Don't duplicate code in comments (Jani) LGTM, Tested-by: Vinay Belgaumkar Signed-off-by: Alan Previn Fixes: 5a44fcd73498 ("drm/i915/gsc: Do a driver-FLR on unload if GSC was loaded") --- drivers/gpu/drm/i915/intel_uncore.c | 13 - 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index f018da7ebaac..f3c46352db89 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -2749,14 +2749,25 @@ static void driver_initiated_flr(struct intel_uncore *uncore) /* Trigger the actual Driver-FLR */ intel_uncore_rmw_fw(uncore, GU_CNTL, 0, DRIVERFLR); + /* Wait for hardware teardown to complete */ + ret = intel_wait_for_register_fw(uncore, GU_CNTL, +DRIVERFLR_STATUS, 0, +flr_timeout_ms); + if (ret) { + drm_err(>drm, "Driver-FLR-teardown wait completion failed! %d\n", ret); + return; + } + + /* Wait for hardware/firmware re-init to complete */ ret = intel_wait_for_register_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS, flr_timeout_ms); if (ret) { - drm_err(>drm, "wait for Driver-FLR completion failed! %d\n", ret); + drm_err(>drm, "Driver-FLR-reinit wait completion failed! %d\n", ret); return; } + /* Clear sticky completion status */ intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS); }
Re: [Intel-gfx] [PATCH] drm/i915/mtl: Connect root sysfs entries to GT0
On 1/16/2023 10:58 AM, Andi Shyti wrote: Hi, On Thu, Jan 12, 2023 at 08:48:11PM -0800, Belgaumkar, Vinay wrote: On 1/12/2023 8:37 PM, Dixit, Ashutosh wrote: On Thu, 12 Jan 2023 20:26:34 -0800, Belgaumkar, Vinay wrote: I think the ABI was changed by the patch mentioned in the commit (a8a4f0467d70). The ABI was originally changed in 80cf8af17af04 and 56a709cf77468. In theory the ABI has never changed, we just needed to agree once and for all what to do when reading the upper level interface. There has never been a previous multitile specification before this change. There have been long and exhaustive discussions on what to do and the decision is that in some cases we show the average, in others the maximum. Never the GT0, though. Yes, you are right. @Andi, did we have a plan to update the IGT tests that use these interfaces to properly refer to the per GT entries as well? They now receive average values instead of absolute, hence will fail on a multi-GT device. I don't know what's the plan for igt's. Which tests are failing? I think we shouldn't be using the upper level interfaces at all in IGT's. Previously there has been an error printed on dmesg when this was happening. The error has been removed in order to set the ABI as agreed above. Tests like perf_mu and gem_ctx_freq will fail as they read upper level sysfs entries and expect them to change as per the test. I think this includes all of the tests that read RC6 or Trubo related sysfs entries for that matter. Thanks, Vinay. Andi
Re: [Intel-gfx] [PATCH] drm/i915/mtl: Connect root sysfs entries to GT0
On 1/12/2023 8:37 PM, Dixit, Ashutosh wrote: On Thu, 12 Jan 2023 20:26:34 -0800, Belgaumkar, Vinay wrote: I think the ABI was changed by the patch mentioned in the commit (a8a4f0467d70). The ABI was originally changed in 80cf8af17af04 and 56a709cf77468. Yes, you are right. @Andi, did we have a plan to update the IGT tests that use these interfaces to properly refer to the per GT entries as well? They now receive average values instead of absolute, hence will fail on a multi-GT device. Thanks, Vinay.
Re: [Intel-gfx] [PATCH] drm/i915/mtl: Connect root sysfs entries to GT0
On 1/12/2023 7:15 PM, Dixit, Ashutosh wrote: On Thu, 12 Jan 2023 18:27:52 -0800, Vinay Belgaumkar wrote: Reading current root sysfs entries gives a min/max of all GTs. Updating this so we return default (GT0) values when root level sysfs entries are accessed, instead of min/max for the card. Tests that are not multi GT capable will read incorrect sysfs values without this change on multi-GT platforms like MTL. Fixes: a8a4f0467d70 ("drm/i915: Fix CFI violations in gt_sysfs") We seem to be proposing to change the previous sysfs ABI with this patch? But even then it doesn't seem correct to use gt0 values for device level sysfs. Actually I received the following comment about using max freq across gt's for device level freq's (gt_act_freq_mhz etc.) from one of our users: I think the ABI was changed by the patch mentioned in the commit (a8a4f0467d70). If I am not mistaken, original behavior was to return the GT0 values (I will double check this). IMO, if that patch changed the behavior, it should have been accompanied with patches that update all the tests to use the proper per GT sysfs as well. Thanks, Vinay. - On Sun, 06 Nov 2022 08:54:04 -0800, Lawson, Lowren H wrote: Why show maximum? Wouldn’t average be more accurate to the user experience? As a user, I expect the ‘card’ frequency to be relatively accurate to the entire card. If I see 1.6GHz, but the card is behaving as if it’s running a 1.0 & 1.6GHz on the different compute tiles, I’m going to see a massive decrease in compute workload performance while at ‘maximum’ frequency. - So I am not sure why max/min were previously chosen. Why not the average? Thanks. -- Ashutosh
Re: [Intel-gfx] [PATCH v3 1/1] drm/i915/pxp: Use drm_dbg if arb session failed due to fw version
On 12/21/2022 9:49 AM, Alan Previn wrote: If PXP arb-session is being attempted on older hardware SKUs or on hardware with older, unsupported, firmware versions, then don't report the failure with a drm_error. Instead, look specifically for the API-version error reply and drm_dbg that reply. In this case, the user-space will eventually get a -ENODEV for the protected context creation which is the correct behavior and we don't create unnecessary drm_error's in our dmesg (for what is unsupported platforms). LGTM. Is there a link to where these pxp status codes are documented? Reviewed-by: Vinay Belgaumkar Changes from prio revs: v2 : - remove unnecessary newline. (Jani) v1 : - print incorrect version from input packet, not output. Signed-off-by: Alan Previn --- drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_cmn.h | 1 + drivers/gpu/drm/i915/pxp/intel_pxp_tee.c | 4 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_cmn.h b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_cmn.h index c2f23394f9b8..aaa8187a0afb 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_cmn.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_cmn.h @@ -17,6 +17,7 @@ */ enum pxp_status { PXP_STATUS_SUCCESS = 0x0, + PXP_STATUS_ERROR_API_VERSION = 0x1002, PXP_STATUS_OP_NOT_PERMITTED = 0x4013 }; diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c index d50354bfb993..73aa8015f828 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c @@ -298,6 +298,10 @@ int intel_pxp_tee_cmd_create_arb_session(struct intel_pxp *pxp, if (ret) drm_err(>drm, "Failed to send tee msg ret=[%d]\n", ret); + else if (msg_out.header.status == PXP_STATUS_ERROR_API_VERSION) + drm_dbg(>drm, "PXP firmware version unsupported, requested: " + "CMD-ID-[0x%08x] on API-Ver-[0x%08x]\n", + msg_in.header.command_id, msg_in.header.api_version); else if (msg_out.header.status != 0x0) drm_warn(>drm, "PXP firmware failed arb session init request ret=[0x%08x]\n", msg_out.header.status); base-commit: cc44a1e87ea6b788868878295119398966f98a81
Re: [PATCH 1/1] drm/i915/mtl: Enable Idle Messaging for GSC CS
On 11/15/2022 5:44 AM, Badal Nilawar wrote: From: Vinay Belgaumkar By defaut idle mesaging is disabled for GSC CS so to unblock RC6 entry on media tile idle messaging need to be enabled. v2: - Fix review comments (Vinay) - Set GSC idle hysterisis to 5 us (Badal) Bspec: 71496 Cc: Daniele Ceraolo Spurio Signed-off-by: Vinay Belgaumkar Signed-off-by: Badal Nilawar --- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 18 ++ drivers/gpu/drm/i915/gt/intel_gt_regs.h | 4 2 files changed, 22 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index b0a4a2dbe3ee..5522885b2db0 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -15,6 +15,22 @@ #include "intel_rc6.h" #include "intel_ring.h" #include "shmem_utils.h" +#include "intel_gt_regs.h" + +static void intel_gsc_idle_msg_enable(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + + if (IS_METEORLAKE(i915) && engine->id == GSC0) { + intel_uncore_write(engine->gt->uncore, + RC_PSMI_CTRL_GSCCS, + _MASKED_BIT_DISABLE(IDLE_MSG_DISABLE)); + /* 5 us hysterisis */ + intel_uncore_write(engine->gt->uncore, + PWRCTX_MAXCNT_GSCCS, + 0xA); + } +} static void dbg_poison_ce(struct intel_context *ce) { @@ -275,6 +291,8 @@ void intel_engine_init__pm(struct intel_engine_cs *engine) intel_wakeref_init(>wakeref, rpm, _ops); intel_engine_init_heartbeat(engine); + + intel_gsc_idle_msg_enable(engine); } /** diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 07031e03f80c..20472eb15364 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -913,6 +913,10 @@ #define MSG_IDLE_FW_MASK REG_GENMASK(13, 9) #define MSG_IDLE_FW_SHIFT9 +#define RC_PSMI_CTRL_GSCCS _MMIO(0x11a050) Alignment still seems off? Other than that, Reviewed-by: Vinay Belgaumkar +#define IDLE_MSG_DISABLE BIT(0) +#define PWRCTX_MAXCNT_GSCCS_MMIO(0x11a054) + #define FORCEWAKE_MEDIA_GEN9 _MMIO(0xa270) #define FORCEWAKE_RENDER_GEN9 _MMIO(0xa278)
Re: [PATCH 2/2] drm/i915/mtl: Enable Idle Messaging for GSC CS
On 10/31/2022 8:36 PM, Badal Nilawar wrote: From: Vinay Belgaumkar By defaut idle mesaging is disabled for GSC CS so to unblock RC6 entry on media tile idle messaging need to be enabled. C6 entry instead of RC6. Also *needs*. Bspec: 71496 Cc: Daniele Ceraolo Spurio Signed-off-by: Vinay Belgaumkar Signed-off-by: Badal Nilawar --- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 12 drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 +++ 2 files changed, 15 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index b0a4a2dbe3ee..8d391f8fd861 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -15,6 +15,7 @@ #include "intel_rc6.h" #include "intel_ring.h" #include "shmem_utils.h" +#include "intel_gt_regs.h" static void dbg_poison_ce(struct intel_context *ce) { @@ -271,10 +272,21 @@ static const struct intel_wakeref_ops wf_ops = { void intel_engine_init__pm(struct intel_engine_cs *engine) { + struct drm_i915_private *i915 = engine->i915; struct intel_runtime_pm *rpm = engine->uncore->rpm; intel_wakeref_init(>wakeref, rpm, _ops); intel_engine_init_heartbeat(engine); + + if (IS_METEORLAKE(i915) && engine->id == GSC0) { + intel_uncore_write(engine->gt->uncore, + RC_PSMI_CTRL_GSCCS, + _MASKED_BIT_DISABLE(IDLE_MSG_DISABLE)); + drm_dbg(>drm, + "Set GSC CS Idle Reg to: 0x%x", + intel_uncore_read(engine->gt->uncore, RC_PSMI_CTRL_GSCCS)); Do we need the debug print here? + } + } /** diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index f4624262dc81..176902a9f2a2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -908,6 +908,9 @@ #define MSG_IDLE_FW_MASK REG_GENMASK(13, 9) #define MSG_IDLE_FW_SHIFT9 +#define RC_PSMI_CTRL_GSCCS _MMIO(0x11a050) +#define IDLE_MSG_DISABLE BIT(0) Is the alignment off? Thanks, Vinay. + #define FORCEWAKE_MEDIA_GEN9 _MMIO(0xa270) #define FORCEWAKE_RENDER_GEN9 _MMIO(0xa278)
Re: [Intel-gfx] [PATCH v3] drm/i915/slpc: Use platform limits for min/max frequency
On 10/21/2022 10:26 PM, Dixit, Ashutosh wrote: On Fri, 21 Oct 2022 18:38:57 -0700, Belgaumkar, Vinay wrote: On 10/20/2022 3:57 PM, Dixit, Ashutosh wrote: On Tue, 18 Oct 2022 11:30:31 -0700, Vinay Belgaumkar wrote: Hi Vinay, diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index 4c6e9257e593..e42bc215e54d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -234,6 +234,7 @@ static int run_test(struct intel_gt *gt, int test_type) enum intel_engine_id id; struct igt_spinner spin; u32 slpc_min_freq, slpc_max_freq; + u32 saved_min_freq; int err = 0; if (!intel_uc_uses_guc_slpc(>uc)) @@ -252,20 +253,35 @@ static int run_test(struct intel_gt *gt, int test_type) return -EIO; } - /* -* FIXME: With efficient frequency enabled, GuC can request -* frequencies higher than the SLPC max. While this is fixed -* in GuC, we level set these tests with RPn as min. -*/ - err = slpc_set_min_freq(slpc, slpc->min_freq); - if (err) - return err; + if (slpc_min_freq == slpc_max_freq) { + /* Server parts will have min/max clamped to RP0 */ + if (slpc->min_is_rpmax) { + err = slpc_set_min_freq(slpc, slpc->min_freq); + if (err) { + pr_err("Unable to update min freq on server part"); + return err; + } - if (slpc->min_freq == slpc->rp0_freq) { - pr_err("Min/Max are fused to the same value\n"); - return -EINVAL; + } else { + pr_err("Min/Max are fused to the same value\n"); + return -EINVAL; Sorry but I am not following this else case here. Why are we saying min/max are fused to the same value? In this case we can't do "slpc_set_min_freq(slpc, slpc->min_freq)" ? That is, we can't change SLPC min freq? This would be an error case due to a faulty part. We may come across a part where min/max is fused to the same value. But even then the original check is much clearer since it is actually comparing the fused freq's: if (slpc->min_freq == slpc->rp0_freq) Because if min/max have been changed slpc_min_freq and slpc_max_freq are no longer fused freq. And also this check should be right at the top of run_test, right after if (!intel_uc_uses_guc_slpc), rather than in the middle here (otherwise because we are basically not doing any error rewinding so causing memory leaks if any of the functions return error). ok. + } + } else { + /* +* FIXME: With efficient frequency enabled, GuC can request +* frequencies higher than the SLPC max. While this is fixed +* in GuC, we level set these tests with RPn as min. +*/ + err = slpc_set_min_freq(slpc, slpc->min_freq); + if (err) + return err; } So let's do what is suggested above and then see what remains here and if we need all these code changes. Most likely we can just do unconditionally what we were doing before, i.e.: err = slpc_set_min_freq(slpc, slpc->min_freq); if (err) return err; + saved_min_freq = slpc_min_freq; + + /* New temp min freq = RPn */ + slpc_min_freq = slpc->min_freq; Why do we need saved_min_freq? We can retain slpc_min_freq and in the check below: if (max_act_freq <= slpc_min_freq) We can just change the check to: if (max_act_freq <= slpc->min_freq) Looks like to have been a bug in the original code? Not a bug, it wasn't needed until we didn't have server parts (slpc_min_freq would typically be slpc->min_freq on non-server parts). + intel_gt_pm_wait_for_idle(gt); intel_gt_pm_get(gt); for_each_engine(engine, gt, id) { @@ -347,7 +363,7 @@ static int run_test(struct intel_gt *gt, int test_type) /* Restore min/max frequencies */ slpc_set_max_freq(slpc, slpc_max_freq); - slpc_set_min_freq(slpc, slpc_min_freq); + slpc_set_min_freq(slpc, saved_min_freq); if (igt_flush_test(gt->i915)) err = -EIO; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index fdd895f73f9f..b7cdeec44bd3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -263,6 +263,7 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc) slpc->max_freq_softlimit = 0; slpc->min_freq_softlimit = 0; + slpc->min_is_rpmax = false; slpc->boost_freq = 0; atomic_set(>num_wait
Re: [Intel-gfx] [PATCH v4] drm/i915/slpc: Optmize waitboost for SLPC
On 10/22/2022 12:22 PM, Dixit, Ashutosh wrote: On Sat, 22 Oct 2022 10:56:03 -0700, Belgaumkar, Vinay wrote: Hi Vinay, diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index fc23c562d9b2..32e1f5dde5bb 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1016,9 +1016,15 @@ void intel_rps_boost(struct i915_request *rq) if (rps_uses_slpc(rps)) { slpc = rps_to_slpc(rps); + if (slpc->min_freq_softlimit == slpc->boost_freq) + return; nit but is it possible that 'slpc->min_freq_softlimit > slpc->boost_freq' (looks possible to me from the code though we might not have intended it)? Then we can change this to: if (slpc->min_freq_softlimit >= slpc->boost_freq) return; Any comment about this? It looks clearly possible to me from the code. So with the above change this is: Reviewed-by: Ashutosh Dixit Agree. Thanks, Vinay.
Re: [Intel-gfx] [PATCH v4] drm/i915/slpc: Optmize waitboost for SLPC
On 10/21/2022 7:11 PM, Dixit, Ashutosh wrote: On Fri, 21 Oct 2022 17:24:52 -0700, Vinay Belgaumkar wrote: Hi Vinay, Waitboost (when SLPC is enabled) results in a H2G message. This can result in thousands of messages during a stress test and fill up an already full CTB. There is no need to request for RP0 if boost_freq and the min softlimit are the same. v2: Add the tracing back, and check requested freq in the worker thread (Tvrtko) v3: Check requested freq in dec_waiters as well v4: Only check min_softlimit against boost_freq. Limit this optimization for server parts for now. Sorry I didn't follow. Why are we saying limit this only to server? This: if (slpc->min_freq_softlimit == slpc->boost_freq) return; The condition above should work for client too if it is true? But yes it is typically true automatically for server but not for client. Is that what you mean? yes. For client, min_freq_softlimit would typically be RPn. Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/intel_rps.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index fc23c562d9b2..32e1f5dde5bb 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1016,9 +1016,15 @@ void intel_rps_boost(struct i915_request *rq) if (rps_uses_slpc(rps)) { slpc = rps_to_slpc(rps); + if (slpc->min_freq_softlimit == slpc->boost_freq) + return; nit but is it possible that 'slpc->min_freq_softlimit > slpc->boost_freq' (looks possible to me from the code though we might not have intended it)? Then we can change this to: if (slpc->min_freq_softlimit >= slpc->boost_freq) return; + /* Return if old value is non zero */ - if (!atomic_fetch_inc(>num_waiters)) + if (!atomic_fetch_inc(>num_waiters)) { + GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", +rq->fence.context, rq->fence.seqno); Another possibility would have been to add the trace to slpc_boost_work but this is matches host turbo so I think it is fine here. schedule_work(>boost_work); + } return; } Thanks. -- Ashutosh
Re: [Intel-gfx] [PATCH v3] drm/i915/slpc: Use platform limits for min/max frequency
On 10/20/2022 3:57 PM, Dixit, Ashutosh wrote: On Tue, 18 Oct 2022 11:30:31 -0700, Vinay Belgaumkar wrote: Hi Vinay, diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index 4c6e9257e593..e42bc215e54d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -234,6 +234,7 @@ static int run_test(struct intel_gt *gt, int test_type) enum intel_engine_id id; struct igt_spinner spin; u32 slpc_min_freq, slpc_max_freq; + u32 saved_min_freq; int err = 0; if (!intel_uc_uses_guc_slpc(>uc)) @@ -252,20 +253,35 @@ static int run_test(struct intel_gt *gt, int test_type) return -EIO; } - /* -* FIXME: With efficient frequency enabled, GuC can request -* frequencies higher than the SLPC max. While this is fixed -* in GuC, we level set these tests with RPn as min. -*/ - err = slpc_set_min_freq(slpc, slpc->min_freq); - if (err) - return err; + if (slpc_min_freq == slpc_max_freq) { + /* Server parts will have min/max clamped to RP0 */ + if (slpc->min_is_rpmax) { + err = slpc_set_min_freq(slpc, slpc->min_freq); + if (err) { + pr_err("Unable to update min freq on server part"); + return err; + } - if (slpc->min_freq == slpc->rp0_freq) { - pr_err("Min/Max are fused to the same value\n"); - return -EINVAL; + } else { + pr_err("Min/Max are fused to the same value\n"); + return -EINVAL; Sorry but I am not following this else case here. Why are we saying min/max are fused to the same value? In this case we can't do "slpc_set_min_freq(slpc, slpc->min_freq)" ? That is, we can't change SLPC min freq? This would be an error case due to a faulty part. We may come across a part where min/max is fused to the same value. diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index fdd895f73f9f..b7cdeec44bd3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -263,6 +263,7 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc) slpc->max_freq_softlimit = 0; slpc->min_freq_softlimit = 0; + slpc->min_is_rpmax = false; slpc->boost_freq = 0; atomic_set(>num_waiters, 0); @@ -588,6 +589,32 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc) return 0; } +static bool is_slpc_min_freq_rpmax(struct intel_guc_slpc *slpc) +{ + int slpc_min_freq; + + if (intel_guc_slpc_get_min_freq(slpc, _min_freq)) + return false; I am wondering what happens if the above fails on server? Should we return true or false on server and what are the consequences of returning false on server? Any case I think we should at least put a drm_err or something here just in case this ever fails so we'll know something weird happened. Makes sense. Thanks, Vinay. + + if (slpc_min_freq == SLPC_MAX_FREQ_MHZ) + return true; + else + return false; +} + +static void update_server_min_softlimit(struct intel_guc_slpc *slpc) +{ + /* For server parts, SLPC min will be at RPMax. +* Use min softlimit to clamp it to RP0 instead. +*/ + if (is_slpc_min_freq_rpmax(slpc) && + !slpc->min_freq_softlimit) { + slpc->min_is_rpmax = true; + slpc->min_freq_softlimit = slpc->rp0_freq; + (slpc_to_gt(slpc))->defaults.min_freq = slpc->min_freq_softlimit; + } +} + static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc) { /* Force SLPC to used platform rp0 */ @@ -647,6 +674,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) slpc_get_rp_values(slpc); + /* Handle the case where min=max=RPmax */ + update_server_min_softlimit(slpc); + /* Set SLPC max limit to RP0 */ ret = slpc_use_fused_rp0(slpc); if (unlikely(ret)) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index 82a98f78f96c..11975a31c9d0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -9,6 +9,8 @@ #include "intel_guc_submission.h" #include "intel_guc_slpc_types.h" +#define SLPC_MAX_FREQ_MHZ 4250 This seems to be really a value (255 converted to freq) so seems ok to intepret in MHz. Thanks. -- Ashutosh
Re: [Intel-gfx] [PATCH v3] drm/i915/slpc: Optmize waitboost for SLPC
On 10/21/2022 11:40 AM, Dixit, Ashutosh wrote: On Fri, 21 Oct 2022 11:24:42 -0700, Belgaumkar, Vinay wrote: On 10/20/2022 4:36 PM, Dixit, Ashutosh wrote: On Thu, 20 Oct 2022 13:16:00 -0700, Belgaumkar, Vinay wrote: On 10/20/2022 11:33 AM, Dixit, Ashutosh wrote: On Wed, 19 Oct 2022 17:29:44 -0700, Vinay Belgaumkar wrote: Hi Vinay, Waitboost (when SLPC is enabled) results in a H2G message. This can result in thousands of messages during a stress test and fill up an already full CTB. There is no need to request for RP0 if GuC is already requesting the same. But how are we sure that the freq will remain at RP0 in the future (when the waiting request or any requests which are ahead execute)? In the current waitboost implementation, set_param is sent to GuC ahead of the waiting request to ensure that the freq would be max when this waiting request executed on the GPU and the freq is kept at max till this request retires (considering just one waiting request). How can we ensure this if we don't send the waitboost set_param to GuC? There is no way to guarantee the frequency will remain at RP0 till the request retires. As a theoretical example, lets say the request boosted freq to RP0, but a user changed min freq using sysfs immediately after. That would be a bug. If waitboost is in progress and in the middle user changed min freq, I would expect the freq to revert to the new min only after the waitboost phase was over. The problem here is that GuC is unaware of this "boosting" phenomenon. Setting the min_freq_softlimit as well to boost when we send a boost request might help with this issue. In any case, I am not referring to this case. Since FW controls the freq there is nothing preventing FW to change the freq unless we raise min to max which is what waitboost does. Ok, so maybe the solution here is to check if min_softlimit is already at boost freq, as it tracks the min freq changes. That should take care of server parts automatically as well. Correct, yes that would be the right way to do it. Actually, rethinking, it's not going to work for client GPUs. We cannot clobber the min_softlimit as the user may have set it. So, I'll just make this change to optimize it for server parts for now. Thanks, Vinay. Thanks. -- Ashutosh Waitboost is done by a pending request to "hurry" the current requests. If GT is already at boost frequency, that purpose is served. FW can bring the freq down later before the waiting request is scheduled. Also, host algorithm already has this optimization as well. Host turbo is different from SLPC. Host turbo controls the freq algorithm so it knows freq will not come down till it itself brings the freq down. Unlike SLPC where FW is controling the freq. Therefore host turbo doesn't ever need to do a MMIO read but only needs to refer to its own state (rps->cur_freq etc.). True. Host algorithm has a periodic timer where it updates frequency. Here, it checks num_waiters and sets client_boost every time that is non-zero. I had assumed we'll do this optimization for server parts where min is already RP0 in which case we can completely disable waitboost. But this patch is something else. Hopefully the softlimit changes above will help with client and server. Thanks, Vinay. Thanks. -- Ashutosh v2: Add the tracing back, and check requested freq in the worker thread (Tvrtko) v3: Check requested freq in dec_waiters as well Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/intel_rps.c | 3 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 14 +++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index fc23c562d9b2..18b75cf08d1b 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1016,6 +1016,9 @@ void intel_rps_boost(struct i915_request *rq) if (rps_uses_slpc(rps)) { slpc = rps_to_slpc(rps); + GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", +rq->fence.context, rq->fence.seqno); + /* Return if old value is non zero */ if (!atomic_fetch_inc(>num_waiters)) schedule_work(>boost_work); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index b7cdeec44bd3..9dbdbab1515a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -227,14 +227,19 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) static void slpc_boost_work(struct work_struct *work) { struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work); + struct intel_rps *rps = _to_gt(slpc)->rps; int err; /*
Re: [Intel-gfx] [PATCH v3] drm/i915/slpc: Optmize waitboost for SLPC
On 10/20/2022 4:36 PM, Dixit, Ashutosh wrote: On Thu, 20 Oct 2022 13:16:00 -0700, Belgaumkar, Vinay wrote: On 10/20/2022 11:33 AM, Dixit, Ashutosh wrote: On Wed, 19 Oct 2022 17:29:44 -0700, Vinay Belgaumkar wrote: Hi Vinay, Waitboost (when SLPC is enabled) results in a H2G message. This can result in thousands of messages during a stress test and fill up an already full CTB. There is no need to request for RP0 if GuC is already requesting the same. But how are we sure that the freq will remain at RP0 in the future (when the waiting request or any requests which are ahead execute)? In the current waitboost implementation, set_param is sent to GuC ahead of the waiting request to ensure that the freq would be max when this waiting request executed on the GPU and the freq is kept at max till this request retires (considering just one waiting request). How can we ensure this if we don't send the waitboost set_param to GuC? There is no way to guarantee the frequency will remain at RP0 till the request retires. As a theoretical example, lets say the request boosted freq to RP0, but a user changed min freq using sysfs immediately after. That would be a bug. If waitboost is in progress and in the middle user changed min freq, I would expect the freq to revert to the new min only after the waitboost phase was over. The problem here is that GuC is unaware of this "boosting" phenomenon. Setting the min_freq_softlimit as well to boost when we send a boost request might help with this issue. In any case, I am not referring to this case. Since FW controls the freq there is nothing preventing FW to change the freq unless we raise min to max which is what waitboost does. Ok, so maybe the solution here is to check if min_softlimit is already at boost freq, as it tracks the min freq changes. That should take care of server parts automatically as well. Waitboost is done by a pending request to "hurry" the current requests. If GT is already at boost frequency, that purpose is served. FW can bring the freq down later before the waiting request is scheduled. Also, host algorithm already has this optimization as well. Host turbo is different from SLPC. Host turbo controls the freq algorithm so it knows freq will not come down till it itself brings the freq down. Unlike SLPC where FW is controling the freq. Therefore host turbo doesn't ever need to do a MMIO read but only needs to refer to its own state (rps->cur_freq etc.). True. Host algorithm has a periodic timer where it updates frequency. Here, it checks num_waiters and sets client_boost every time that is non-zero. I had assumed we'll do this optimization for server parts where min is already RP0 in which case we can completely disable waitboost. But this patch is something else. Hopefully the softlimit changes above will help with client and server. Thanks, Vinay. Thanks. -- Ashutosh v2: Add the tracing back, and check requested freq in the worker thread (Tvrtko) v3: Check requested freq in dec_waiters as well Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/intel_rps.c | 3 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 14 +++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index fc23c562d9b2..18b75cf08d1b 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1016,6 +1016,9 @@ void intel_rps_boost(struct i915_request *rq) if (rps_uses_slpc(rps)) { slpc = rps_to_slpc(rps); + GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", +rq->fence.context, rq->fence.seqno); + /* Return if old value is non zero */ if (!atomic_fetch_inc(>num_waiters)) schedule_work(>boost_work); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index b7cdeec44bd3..9dbdbab1515a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -227,14 +227,19 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) static void slpc_boost_work(struct work_struct *work) { struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work); + struct intel_rps *rps = _to_gt(slpc)->rps; int err; /* * Raise min freq to boost. It's possible that * this is greater than current max. But it will * certainly be limited by RP0. An error setting -* the min param is not fatal. +* the min param is not fatal. No need to boost +* if we are already requesting it. */ + if (intel_rps_get_requested_frequency(rps) == slpc->boost_freq) + return; +
Re: [Intel-gfx] [PATCH v3] drm/i915/slpc: Optmize waitboost for SLPC
On 10/20/2022 11:33 AM, Dixit, Ashutosh wrote: On Wed, 19 Oct 2022 17:29:44 -0700, Vinay Belgaumkar wrote: Hi Vinay, Waitboost (when SLPC is enabled) results in a H2G message. This can result in thousands of messages during a stress test and fill up an already full CTB. There is no need to request for RP0 if GuC is already requesting the same. But how are we sure that the freq will remain at RP0 in the future (when the waiting request or any requests which are ahead execute)? In the current waitboost implementation, set_param is sent to GuC ahead of the waiting request to ensure that the freq would be max when this waiting request executed on the GPU and the freq is kept at max till this request retires (considering just one waiting request). How can we ensure this if we don't send the waitboost set_param to GuC? There is no way to guarantee the frequency will remain at RP0 till the request retires. As a theoretical example, lets say the request boosted freq to RP0, but a user changed min freq using sysfs immediately after. Waitboost is done by a pending request to "hurry" the current requests. If GT is already at boost frequency, that purpose is served. Also, host algorithm already has this optimization as well. Thanks, Vinay. I had assumed we'll do this optimization for server parts where min is already RP0 in which case we can completely disable waitboost. But this patch is something else. Thanks. -- Ashutosh v2: Add the tracing back, and check requested freq in the worker thread (Tvrtko) v3: Check requested freq in dec_waiters as well Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/intel_rps.c | 3 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 14 +++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index fc23c562d9b2..18b75cf08d1b 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1016,6 +1016,9 @@ void intel_rps_boost(struct i915_request *rq) if (rps_uses_slpc(rps)) { slpc = rps_to_slpc(rps); + GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", +rq->fence.context, rq->fence.seqno); + /* Return if old value is non zero */ if (!atomic_fetch_inc(>num_waiters)) schedule_work(>boost_work); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index b7cdeec44bd3..9dbdbab1515a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -227,14 +227,19 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) static void slpc_boost_work(struct work_struct *work) { struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work); + struct intel_rps *rps = _to_gt(slpc)->rps; int err; /* * Raise min freq to boost. It's possible that * this is greater than current max. But it will * certainly be limited by RP0. An error setting -* the min param is not fatal. +* the min param is not fatal. No need to boost +* if we are already requesting it. */ + if (intel_rps_get_requested_frequency(rps) == slpc->boost_freq) + return; + mutex_lock(>lock); if (atomic_read(>num_waiters)) { err = slpc_force_min_freq(slpc, slpc->boost_freq); @@ -728,6 +733,7 @@ int intel_guc_slpc_set_boost_freq(struct intel_guc_slpc *slpc, u32 val) void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc) { + struct intel_rps *rps = _to_gt(slpc)->rps; /* * Return min back to the softlimit. * This is called during request retire, @@ -735,8 +741,10 @@ void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc) * set_param fails. */ mutex_lock(>lock); - if (atomic_dec_and_test(>num_waiters)) - slpc_force_min_freq(slpc, slpc->min_freq_softlimit); + if (atomic_dec_and_test(>num_waiters)) { + if (intel_rps_get_requested_frequency(rps) != slpc->min_freq_softlimit) + slpc_force_min_freq(slpc, slpc->min_freq_softlimit); + } mutex_unlock(>lock); } -- 2.35.1
Re: [PATCH v2] drm/i915/slpc: Optmize waitboost for SLPC
On 10/19/2022 4:05 PM, Vinay Belgaumkar wrote: Waitboost (when SLPC is enabled) results in a H2G message. This can result in thousands of messages during a stress test and fill up an already full CTB. There is no need to request for RP0 if GuC is already requesting the same. v2: Add the tracing back, and check requested freq in the worker thread (Tvrtko) Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/intel_rps.c | 3 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 7 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index fc23c562d9b2..18b75cf08d1b 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1016,6 +1016,9 @@ void intel_rps_boost(struct i915_request *rq) if (rps_uses_slpc(rps)) { slpc = rps_to_slpc(rps); + GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", +rq->fence.context, rq->fence.seqno); + /* Return if old value is non zero */ if (!atomic_fetch_inc(>num_waiters)) The issue when we move the req freq check into the slpc_work is that we are incrementing num_waiters. That will trigger a de-boost and result in a H2G. Need to check the req frequency there as well. Thanks, Vinay. schedule_work(>boost_work); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index b7cdeec44bd3..7ab96221be7e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -227,14 +227,19 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) static void slpc_boost_work(struct work_struct *work) { struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work); + struct intel_rps *rps = _to_gt(slpc)->rps; int err; /* * Raise min freq to boost. It's possible that * this is greater than current max. But it will * certainly be limited by RP0. An error setting -* the min param is not fatal. +* the min param is not fatal. No need to boost +* if we are already requesting it. */ + if (intel_rps_get_requested_frequency(rps) == slpc->boost_freq) + return; + mutex_lock(>lock); if (atomic_read(>num_waiters)) { err = slpc_force_min_freq(slpc, slpc->boost_freq);
Re: [Intel-gfx] [PATCH] drm/i915/slpc: Optmize waitboost for SLPC
On 10/19/2022 2:12 PM, Belgaumkar, Vinay wrote: On 10/19/2022 12:40 AM, Tvrtko Ursulin wrote: On 18/10/2022 23:15, Vinay Belgaumkar wrote: Waitboost (when SLPC is enabled) results in a H2G message. This can result in thousands of messages during a stress test and fill up an already full CTB. There is no need to request for RP0 if GuC is already requesting the same. Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/intel_rps.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index fc23c562d9b2..a20ae4fceac8 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1005,13 +1005,20 @@ void intel_rps_dec_waiters(struct intel_rps *rps) void intel_rps_boost(struct i915_request *rq) { struct intel_guc_slpc *slpc; + struct intel_rps *rps = _ONCE(rq->engine)->gt->rps; if (i915_request_signaled(rq) || i915_request_has_waitboost(rq)) return; + /* If GuC is already requesting RP0, skip */ + if (rps_uses_slpc(rps)) { + slpc = rps_to_slpc(rps); + if (intel_rps_get_requested_frequency(rps) == slpc->rp0_freq) One correction here is this should be slpc->boost_freq. + return; + } + Feels a little bit like a layering violation. Wait boost reference counts and request markings will changed based on asynchronous state - a mmio read. Also, a little below we have this: """ /* Serializes with i915_request_retire() */ if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, >fence.flags)) { struct intel_rps *rps = _ONCE(rq->engine)->gt->rps; if (rps_uses_slpc(rps)) { slpc = rps_to_slpc(rps); /* Return if old value is non zero */ if (!atomic_fetch_inc(>num_waiters)) ***>>>> Wouldn't it skip doing anything here already? <<<<*** It will skip only if boost is already happening. This patch is trying to prevent even that first one if possible. schedule_work(>boost_work); return; } if (atomic_fetch_inc(>num_waiters)) return; """ But I wonder if this is not a layering violation already. Looks like one for me at the moment. And as it happens there is an ongoing debug of clvk slowness where I was a bit puzzled by the lack of "boost fence" in trace_printk logs - but now I see how that happens. Does not feel right to me that we lose that tracing with SLPC. Agreed. Will add the trace to the SLPC case as well. However, the question is what does that trace indicate? Even in the host case, we log the trace, but may skip the actual boost as the req is already matching boost freq. IMO, we should log the trace only when we actually decide to boost. On second thoughts, that trace only tracks the boost fence, which is set in this case. So, might be ok to have it regardless. We count the num_boosts anyways if we ever wanted to know how many of those actually went on to boost the freq. So in general - why the correct approach wouldn't be to solve this in the worker - which perhaps should fork to slpc specific branch and do the consolidations/skips based on mmio reads in there? sure, I can move the mmio read to the SLPC worker thread. Thanks, Vinay. Regards, Tvrtko /* Serializes with i915_request_retire() */ if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, >fence.flags)) { - struct intel_rps *rps = _ONCE(rq->engine)->gt->rps; if (rps_uses_slpc(rps)) { slpc = rps_to_slpc(rps);
Re: [Intel-gfx] [PATCH] drm/i915/slpc: Optmize waitboost for SLPC
On 10/19/2022 12:40 AM, Tvrtko Ursulin wrote: On 18/10/2022 23:15, Vinay Belgaumkar wrote: Waitboost (when SLPC is enabled) results in a H2G message. This can result in thousands of messages during a stress test and fill up an already full CTB. There is no need to request for RP0 if GuC is already requesting the same. Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/intel_rps.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index fc23c562d9b2..a20ae4fceac8 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1005,13 +1005,20 @@ void intel_rps_dec_waiters(struct intel_rps *rps) void intel_rps_boost(struct i915_request *rq) { struct intel_guc_slpc *slpc; + struct intel_rps *rps = _ONCE(rq->engine)->gt->rps; if (i915_request_signaled(rq) || i915_request_has_waitboost(rq)) return; + /* If GuC is already requesting RP0, skip */ + if (rps_uses_slpc(rps)) { + slpc = rps_to_slpc(rps); + if (intel_rps_get_requested_frequency(rps) == slpc->rp0_freq) One correction here is this should be slpc->boost_freq. + return; + } + Feels a little bit like a layering violation. Wait boost reference counts and request markings will changed based on asynchronous state - a mmio read. Also, a little below we have this: """ /* Serializes with i915_request_retire() */ if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, >fence.flags)) { struct intel_rps *rps = _ONCE(rq->engine)->gt->rps; if (rps_uses_slpc(rps)) { slpc = rps_to_slpc(rps); /* Return if old value is non zero */ if (!atomic_fetch_inc(>num_waiters)) *** Wouldn't it skip doing anything here already? *** It will skip only if boost is already happening. This patch is trying to prevent even that first one if possible. schedule_work(>boost_work); return; } if (atomic_fetch_inc(>num_waiters)) return; """ But I wonder if this is not a layering violation already. Looks like one for me at the moment. And as it happens there is an ongoing debug of clvk slowness where I was a bit puzzled by the lack of "boost fence" in trace_printk logs - but now I see how that happens. Does not feel right to me that we lose that tracing with SLPC. Agreed. Will add the trace to the SLPC case as well. However, the question is what does that trace indicate? Even in the host case, we log the trace, but may skip the actual boost as the req is already matching boost freq. IMO, we should log the trace only when we actually decide to boost. So in general - why the correct approach wouldn't be to solve this in the worker - which perhaps should fork to slpc specific branch and do the consolidations/skips based on mmio reads in there? sure, I can move the mmio read to the SLPC worker thread. Thanks, Vinay. Regards, Tvrtko /* Serializes with i915_request_retire() */ if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, >fence.flags)) { - struct intel_rps *rps = _ONCE(rq->engine)->gt->rps; if (rps_uses_slpc(rps)) { slpc = rps_to_slpc(rps);
Re: [PATCH v2] drm/i915/slpc: Use platform limits for min/max frequency
On 10/13/2022 3:28 PM, Dixit, Ashutosh wrote: On Thu, 13 Oct 2022 08:55:24 -0700, Vinay Belgaumkar wrote: Hi Vinay, GuC will set the min/max frequencies to theoretical max on ATS-M. This will break kernel ABI, so limit min/max frequency to RP0(platform max) instead. Isn't what we are calling "theoretical max" or "RPmax" really just -1U (0x)? Though I have heard this is not a max value but -1U indicates FW default values unmodified by host SW, which would mean frequencies are fully controlled by FW (min == max == -1U). But if this were the case I don't know why this would be the case only for server, why doesn't FW set these for clients too to indicate it is fully in control? FW sets max to -1U for client products(we already pull it down to RP0). It additionally makes min=max for server parts. So the question what does -1U actually represent? Is it the RPmax value or does -1U represent "FW defaults"? Also this concept of using -1U as "FW defaults" is present in Level0/OneAPI (and likely in firmware) but we seem to have blocked in the i915 ABI. I understand we may not be able to make such changes at present but this provides some context for the review comments below. diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index fdd895f73f9f..11613d373a49 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -263,6 +263,7 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc) slpc->max_freq_softlimit = 0; slpc->min_freq_softlimit = 0; + slpc->min_is_rpmax = false; slpc->boost_freq = 0; atomic_set(>num_waiters, 0); @@ -588,6 +589,31 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc) return 0; } +static bool is_slpc_min_freq_rpmax(struct intel_guc_slpc *slpc) +{ + int slpc_min_freq; + + if (intel_guc_slpc_get_min_freq(slpc, _min_freq)) + return false; + + if (slpc_min_freq > slpc->rp0_freq) or >=. If what we are calling "rpmax" really -1U then why don't we just check for -1U here? u32 slpc_min_freq; if (slpc_min_freq == -1U) That'll work similarly too. Only time slpc_min_freq is greater than rp0 is for a server part. + return true; + else + return false; +} + +static void update_server_min_softlimit(struct intel_guc_slpc *slpc) +{ + /* For server parts, SLPC min will be at RPMax. +* Use min softlimit to clamp it to RP0 instead. +*/ + if (is_slpc_min_freq_rpmax(slpc) && + !slpc->min_freq_softlimit) { + slpc->min_is_rpmax = true; + slpc->min_freq_softlimit = slpc->rp0_freq; Isn't it safer to use a platform check such as IS_ATSM or IS_XEHPSDV (or even #define IS_SERVER()) to set min freq to RP0 rather than this -1U value from FW? What if -1U means "FW defaults" and FW starts setting this on client products tomorrow? We are not checking for -1 specifically, but only if FW has set min > RP0 as an indicator. Also, might be worth having IS_SERVER at some point if there are other places we need this info as well. Also, we need to set gt->defaults.min_freq here. yes, need to add that. Thanks, Vinay. Thanks. -- Ashutosh + } +} + static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc) { /* Force SLPC to used platform rp0 */ @@ -647,6 +673,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) slpc_get_rp_values(slpc); + /* Handle the case where min=max=RPmax */ + update_server_min_softlimit(slpc); + /* Set SLPC max limit to RP0 */ ret = slpc_use_fused_rp0(slpc); if (unlikely(ret)) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h index 73d208123528..a6ef53b04e04 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h @@ -19,6 +19,9 @@ struct intel_guc_slpc { bool supported; bool selected; + /* Indicates this is a server part */ + bool min_is_rpmax; + /* platform frequency limits */ u32 min_freq; u32 rp0_freq; -- 2.35.1
Re: [PATCH] drm/i915/slpc: Use platform limits for min/max frequency
On 10/13/2022 8:14 AM, Das, Nirmoy wrote: On 10/12/2022 8:26 PM, Vinay Belgaumkar wrote: GuC will set the min/max frequencies to theoretical max on ATS-M. This will break kernel ABI, so limit min/max frequency to RP0(platform max) instead. Also modify the SLPC selftest to update the min frequency when we have a server part so that we can iterate between platform min and max. Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/selftest_slpc.c | 40 +-- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 29 ++ .../gpu/drm/i915/gt/uc/intel_guc_slpc_types.h | 3 ++ 3 files changed, 60 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index 4c6e9257e593..1f84362af737 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -234,6 +234,7 @@ static int run_test(struct intel_gt *gt, int test_type) enum intel_engine_id id; struct igt_spinner spin; u32 slpc_min_freq, slpc_max_freq; + u32 saved_min_freq; int err = 0; if (!intel_uc_uses_guc_slpc(>uc)) @@ -252,20 +253,35 @@ static int run_test(struct intel_gt *gt, int test_type) return -EIO; } - /* - * FIXME: With efficient frequency enabled, GuC can request - * frequencies higher than the SLPC max. While this is fixed - * in GuC, we level set these tests with RPn as min. - */ - err = slpc_set_min_freq(slpc, slpc->min_freq); - if (err) - return err; - if (slpc->min_freq == slpc->rp0_freq) { - pr_err("Min/Max are fused to the same value\n"); - return -EINVAL; + /* Servers will have min/max clamped to RP0 */ This should be "server parts". Tested the patch with Riana's suggested changes. Acked-by: Nirmoy Das with above changes. Thanks, v2 sent with corrections. Vinay. Nirmoy + if (slpc->min_is_rpmax) { + err = slpc_set_min_freq(slpc, slpc->min_freq); + if (err) { + pr_err("Unable to update min freq on server part"); + return err; + } + + } else { + pr_err("Min/Max are fused to the same value\n"); + return -EINVAL; + } + } else { + /* + * FIXME: With efficient frequency enabled, GuC can request + * frequencies higher than the SLPC max. While this is fixed + * in GuC, we level set these tests with RPn as min. + */ + err = slpc_set_min_freq(slpc, slpc->min_freq); + if (err) + return err; } + saved_min_freq = slpc_min_freq; + + /* New temp min freq = RPn */ + slpc_min_freq = slpc->min_freq; + intel_gt_pm_wait_for_idle(gt); intel_gt_pm_get(gt); for_each_engine(engine, gt, id) { @@ -347,7 +363,7 @@ static int run_test(struct intel_gt *gt, int test_type) /* Restore min/max frequencies */ slpc_set_max_freq(slpc, slpc_max_freq); - slpc_set_min_freq(slpc, slpc_min_freq); + slpc_set_min_freq(slpc, saved_min_freq); if (igt_flush_test(gt->i915)) err = -EIO; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index fdd895f73f9f..11613d373a49 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -263,6 +263,7 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc) slpc->max_freq_softlimit = 0; slpc->min_freq_softlimit = 0; + slpc->min_is_rpmax = false; slpc->boost_freq = 0; atomic_set(>num_waiters, 0); @@ -588,6 +589,31 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc) return 0; } +static bool is_slpc_min_freq_rpmax(struct intel_guc_slpc *slpc) +{ + int slpc_min_freq; + + if (intel_guc_slpc_get_min_freq(slpc, _min_freq)) + return false; + + if (slpc_min_freq > slpc->rp0_freq) + return true; + else + return false; +} + +static void update_server_min_softlimit(struct intel_guc_slpc *slpc) +{ + /* For server parts, SLPC min will be at RPMax. + * Use min softlimit to clamp it to RP0 instead. + */ + if (is_slpc_min_freq_rpmax(slpc) && + !slpc->min_freq_softlimit) { + slpc->min_is_rpmax = true; + slpc->min_freq_softlimit = slpc->rp0_freq; + } +} + static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc) { /* Force SLPC to used platform rp0 */ @@ -647,6 +673,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) slpc_get_rp_values(slpc); + /* Handle the case where min=max=RPmax */ + update_server_min_softlimit(slpc); + /* Set SLPC max limit to RP0 */ ret = slpc_use_fused_rp0(slpc); if (unlikely(ret)) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h index 73d208123528..a6ef53b04e04 100644 ---
Re: [Intel-gfx] [PATCH] drm/i915/slpc: Use platform limits for min/max frequency
On 10/13/2022 4:34 AM, Tauro, Riana wrote: On 10/12/2022 11:56 PM, Vinay Belgaumkar wrote: GuC will set the min/max frequencies to theoretical max on ATS-M. This will break kernel ABI, so limit min/max frequency to RP0(platform max) instead. Also modify the SLPC selftest to update the min frequency when we have a server part so that we can iterate between platform min and max. Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/selftest_slpc.c | 40 +-- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 29 ++ .../gpu/drm/i915/gt/uc/intel_guc_slpc_types.h | 3 ++ 3 files changed, 60 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index 4c6e9257e593..1f84362af737 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -234,6 +234,7 @@ static int run_test(struct intel_gt *gt, int test_type) enum intel_engine_id id; struct igt_spinner spin; u32 slpc_min_freq, slpc_max_freq; + u32 saved_min_freq; int err = 0; if (!intel_uc_uses_guc_slpc(>uc)) @@ -252,20 +253,35 @@ static int run_test(struct intel_gt *gt, int test_type) return -EIO; } - /* - * FIXME: With efficient frequency enabled, GuC can request - * frequencies higher than the SLPC max. While this is fixed - * in GuC, we level set these tests with RPn as min. - */ - err = slpc_set_min_freq(slpc, slpc->min_freq); - if (err) - return err; - if (slpc->min_freq == slpc->rp0_freq) { This has to be (slpc_min_freq == slpc_max_freq) instead of (slpc->min_freq == slpc->rp0_freq). Servers will have min/max softlimits clamped to RP0 Agree. will send out v2. Thanks, Vinay. Thanks Riana - pr_err("Min/Max are fused to the same value\n"); - return -EINVAL; + /* Servers will have min/max clamped to RP0 */ + if (slpc->min_is_rpmax) { + err = slpc_set_min_freq(slpc, slpc->min_freq); + if (err) { + pr_err("Unable to update min freq on server part"); + return err; + } + + } else { + pr_err("Min/Max are fused to the same value\n"); + return -EINVAL; + } + } else { + /* + * FIXME: With efficient frequency enabled, GuC can request + * frequencies higher than the SLPC max. While this is fixed + * in GuC, we level set these tests with RPn as min. + */ + err = slpc_set_min_freq(slpc, slpc->min_freq); + if (err) + return err; } + saved_min_freq = slpc_min_freq; + + /* New temp min freq = RPn */ + slpc_min_freq = slpc->min_freq; + intel_gt_pm_wait_for_idle(gt); intel_gt_pm_get(gt); for_each_engine(engine, gt, id) { @@ -347,7 +363,7 @@ static int run_test(struct intel_gt *gt, int test_type) /* Restore min/max frequencies */ slpc_set_max_freq(slpc, slpc_max_freq); - slpc_set_min_freq(slpc, slpc_min_freq); + slpc_set_min_freq(slpc, saved_min_freq); if (igt_flush_test(gt->i915)) err = -EIO; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index fdd895f73f9f..11613d373a49 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -263,6 +263,7 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc) slpc->max_freq_softlimit = 0; slpc->min_freq_softlimit = 0; + slpc->min_is_rpmax = false; slpc->boost_freq = 0; atomic_set(>num_waiters, 0); @@ -588,6 +589,31 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc) return 0; } +static bool is_slpc_min_freq_rpmax(struct intel_guc_slpc *slpc) +{ + int slpc_min_freq; + + if (intel_guc_slpc_get_min_freq(slpc, _min_freq)) + return false; + + if (slpc_min_freq > slpc->rp0_freq) + return true; + else + return false; +} + +static void update_server_min_softlimit(struct intel_guc_slpc *slpc) +{ + /* For server parts, SLPC min will be at RPMax. + * Use min softlimit to clamp it to RP0 instead. + */ + if (is_slpc_min_freq_rpmax(slpc) && + !slpc->min_freq_softlimit) { + slpc->min_is_rpmax = true; + slpc->min_freq_softlimit = slpc->rp0_freq; + } +} + static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc) { /* Force SLPC to used platform rp0 */ @@ -647,6 +673,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) slpc_get_rp_values(slpc); + /* Handle the case where min=max=RPmax */ + update_server_min_softlimit(slpc); + /* Set SLPC max limit to RP0 */ ret = slpc_use_fused_rp0(slpc); if (unlikely(ret)) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h index
Re: [PATCH 1/2] drm/i915: Add a wrapper for frequency debugfs
On 10/4/2022 12:36 AM, Jani Nikula wrote: On Mon, 03 Oct 2022, Vinay Belgaumkar wrote: Move it to the RPS source file. The idea was that the 1st patch would be non-functional code movement. This is still a functional change. Or you can do the functional changes first, and then move code, as long as you don't combine code movement with functional changes. Yup, will move the SLPC check to the second patch as well. Please also mark your patch revisions and note the changes. There's no indication this series is v2. ok. Thanks, Vinay. BR, Jani. Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 157 +--- drivers/gpu/drm/i915/gt/intel_rps.c | 169 ++ drivers/gpu/drm/i915/gt/intel_rps.h | 3 + 3 files changed, 173 insertions(+), 156 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 9fd4d9255a97..4319d6cdafe2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -344,162 +344,7 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p) drm_printf(p, "efficient (RPe) frequency: %d MHz\n", intel_gpu_freq(rps, rps->efficient_freq)); } else if (GRAPHICS_VER(i915) >= 6) { - u32 rp_state_limits; - u32 gt_perf_status; - struct intel_rps_freq_caps caps; - u32 rpmodectl, rpinclimit, rpdeclimit; - u32 rpstat, cagf, reqf; - u32 rpcurupei, rpcurup, rpprevup; - u32 rpcurdownei, rpcurdown, rpprevdown; - u32 rpupei, rpupt, rpdownei, rpdownt; - u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask; - - rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS); - gen6_rps_get_freq_caps(rps, ); - if (IS_GEN9_LP(i915)) - gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS); - else - gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS); - - /* RPSTAT1 is in the GT power well */ - intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); - - reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ); - if (GRAPHICS_VER(i915) >= 9) { - reqf >>= 23; - } else { - reqf &= ~GEN6_TURBO_DISABLE; - if (IS_HASWELL(i915) || IS_BROADWELL(i915)) - reqf >>= 24; - else - reqf >>= 25; - } - reqf = intel_gpu_freq(rps, reqf); - - rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL); - rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); - rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); - - rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1); - rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; - rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; - rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; - rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; - rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; - rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; - - rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI); - rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); - - rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); - rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); - - cagf = intel_rps_read_actual_frequency(rps); - - intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); - - if (GRAPHICS_VER(i915) >= 11) { - pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE); - pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK); - /* -* The equivalent to the PM ISR & IIR cannot be read -* without affecting the current state of the system -*/ - pm_isr = 0; - pm_iir = 0; - } else if (GRAPHICS_VER(i915) >= 8) { - pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2)); - pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2)); - pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2)); - pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2)); -
Re: [PATCH] drm/i915/guc/slpc: Allow SLPC to use efficient frequency
On 8/15/2022 10:32 AM, Rodrigo Vivi wrote: On Sun, Aug 14, 2022 at 04:46:54PM -0700, Vinay Belgaumkar wrote: Host Turbo operates at efficient frequency when GT is not idle unless the user or workload has forced it to a higher level. Replicate the same behavior in SLPC by allowing the algorithm to use efficient frequency. We had disabled it during boot due to concerns that it might break kernel ABI for min frequency. However, this is not the case since SLPC will still abide by the (min,max) range limits. With this change, min freq will be at efficient frequency level at init instead of fused min (RPn). If user chooses to reduce min freq below the efficient freq, we will turn off usage of efficient frequency and honor the user request. When a higher value is written, it will get toggled back again. The patch also corrects the register which needs to be read for obtaining the correct efficient frequency for Gen9+. We see much better perf numbers with benchmarks like glmark2 with efficient frequency usage enabled as expected. BugLink: https://gitlab.freedesktop.org/drm/intel/-/issues/5468 Cc: Rodrigo Vivi First of all sorry for looking to the old patch first... I was delayed in my inbox flow. Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/intel_rps.c | 3 + drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 66 +++-- drivers/gpu/drm/i915/intel_mchbar_regs.h| 3 + 3 files changed, 40 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index c7d381ad90cf..281a086fc265 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1108,6 +1108,9 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *c } else { caps->rp0_freq = (rp_state_cap >> 0) & 0xff; caps->rp1_freq = (rp_state_cap >> 8) & 0xff; + caps->rp1_freq = REG_FIELD_GET(RPE_MASK, + intel_uncore_read(to_gt(i915)->uncore, + GEN10_FREQ_INFO_REC)); This register is only gen10+ while the func is gen6+. either we handle the platform properly or we create a new rpe_freq tracker somewhere and if that's available we use this rpe, otherwise we use the hw fused rp1 which is a good enough, but it is not the actual one resolved by pcode, like this new RPe one. sure. caps->min_freq = (rp_state_cap >> 16) & 0xff; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index e1fa1f32f29e..70a2af5f518d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -465,6 +465,29 @@ int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val) return ret; } +static int slpc_ignore_eff_freq(struct intel_guc_slpc *slpc, bool ignore) I know this code was already there, but I do have some questions around this and maybe we can simplify now that are touching this function. +{ + int ret = 0; + + if (ignore) { + ret = slpc_set_param(slpc, +SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, +ignore); + if (!ret) + return slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + slpc->min_freq); why do we need to touch this min request here? true, not needed anymore. + } else { + ret = slpc_unset_param(slpc, + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY); do we really need the unset param? for me using set_param(SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, freq < rpe_freq) was enough... Yup, removed this helper function as discussed. Thanks, Vinay. + if (!ret) + return slpc_unset_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ); + } + + return ret; +} + /** * intel_guc_slpc_set_min_freq() - Set min frequency limit for SLPC. * @slpc: pointer to intel_guc_slpc. @@ -491,6 +514,14 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) with_intel_runtime_pm(>runtime_pm, wakeref) { + /* Ignore efficient freq if lower min freq is requested */ + ret = slpc_ignore_eff_freq(slpc, val < slpc->rp1_freq); + if (unlikely(ret)) { + i915_probe_error(i915, "Failed to toggle efficient freq (%pe)\n", +ERR_PTR(ret)); + return ret; + } + ret = slpc_set_param(slpc, SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Allow SLPC to use efficient frequency
On 8/15/2022 9:51 AM, Rodrigo Vivi wrote: On Tue, Aug 09, 2022 at 05:03:06PM -0700, Vinay Belgaumkar wrote: Host Turbo operates at efficient frequency when GT is not idle unless the user or workload has forced it to a higher level. Replicate the same behavior in SLPC by allowing the algorithm to use efficient frequency. We had disabled it during boot due to concerns that it might break kernel ABI for min frequency. However, this is not the case, since SLPC will still abide by the (min,max) range limits and pcode forces frequency to 0 anyways when GT is in C6. We also see much better perf numbers with benchmarks like glmark2 with efficient frequency usage enabled. Fixes: 025cb07bebfa ("drm/i915/guc/slpc: Cache platform frequency limits") Signed-off-by: Vinay Belgaumkar I'm honestly surprised that our CI passed cleanly. What happens when user request both min and max < rpe? I'm sure that in this case GuC SLPC will put us to rpe ignoring our requests. Or is this good enough for the users expectation because of the soft limits showing the requested freq and we not asking to guc what it currently has as minimal? I just want to be sure that we are not causing any confusion for end users out there in the case they request some min/max below RPe and start seeing mismatches on the expectation because GuC is forcing the real min request to RPe. My suggestion is to ignore the RPe whenever we have a min request below it. So GuC respects our (and users) chosen min. And restore whenever min request is abobe rpe. Yup, I have already sent a patch yesterday with that change, doesn't look like CI has run on it yet. This was the old version. Thanks, Vinay. Thanks, Rodrigo. --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 52 - 1 file changed, 52 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index e1fa1f32f29e..4b824da3048a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -137,17 +137,6 @@ static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) return ret > 0 ? -EPROTO : ret; } -static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id) -{ - u32 request[] = { - GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, - SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1), - id, - }; - - return intel_guc_send(guc, request, ARRAY_SIZE(request)); -} - static bool slpc_is_running(struct intel_guc_slpc *slpc) { return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING; @@ -201,16 +190,6 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value) return ret; } -static int slpc_unset_param(struct intel_guc_slpc *slpc, - u8 id) -{ - struct intel_guc *guc = slpc_to_guc(slpc); - - GEM_BUG_ON(id >= SLPC_MAX_PARAM); - - return guc_action_slpc_unset_param(guc, id); -} - static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) { struct drm_i915_private *i915 = slpc_to_i915(slpc); @@ -597,29 +576,6 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc) return 0; } -static int slpc_ignore_eff_freq(struct intel_guc_slpc *slpc, bool ignore) -{ - int ret = 0; - - if (ignore) { - ret = slpc_set_param(slpc, -SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, -ignore); - if (!ret) - return slpc_set_param(slpc, - SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, - slpc->min_freq); - } else { - ret = slpc_unset_param(slpc, - SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY); - if (!ret) - return slpc_unset_param(slpc, - SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ); - } - - return ret; -} - static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc) { /* Force SLPC to used platform rp0 */ @@ -679,14 +635,6 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) slpc_get_rp_values(slpc); - /* Ignore efficient freq and set min to platform min */ - ret = slpc_ignore_eff_freq(slpc, true); - if (unlikely(ret)) { - i915_probe_error(i915, "Failed to set SLPC min to RPn (%pe)\n", -ERR_PTR(ret)); - return ret; - } - /* Set SLPC max limit to RP0 */ ret = slpc_use_fused_rp0(slpc); if (unlikely(ret)) { -- 2.35.1
Re: [PATCH] drm/i915/guc/slpc: Allow SLPC to use efficient frequency
On 8/14/2022 4:46 PM, Vinay Belgaumkar wrote: Host Turbo operates at efficient frequency when GT is not idle unless the user or workload has forced it to a higher level. Replicate the same behavior in SLPC by allowing the algorithm to use efficient frequency. We had disabled it during boot due to concerns that it might break kernel ABI for min frequency. However, this is not the case since SLPC will still abide by the (min,max) range limits. With this change, min freq will be at efficient frequency level at init instead of fused min (RPn). If user chooses to reduce min freq below the efficient freq, we will turn off usage of efficient frequency and honor the user request. When a higher value is written, it will get toggled back again. The patch also corrects the register which needs to be read for obtaining the correct efficient frequency for Gen9+. We see much better perf numbers with benchmarks like glmark2 with efficient frequency usage enabled as expected. BugLink: https://gitlab.freedesktop.org/drm/intel/-/issues/5468 Cc: Rodrigo Vivi Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/intel_rps.c | 3 + drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 66 +++-- drivers/gpu/drm/i915/intel_mchbar_regs.h| 3 + 3 files changed, 40 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index c7d381ad90cf..281a086fc265 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1108,6 +1108,9 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *c } else { caps->rp0_freq = (rp_state_cap >> 0) & 0xff; caps->rp1_freq = (rp_state_cap >> 8) & 0xff; Forgot to remove old code here. Will do so for the next revision as it does not affect the patch. Thanks, Vinay. + caps->rp1_freq = REG_FIELD_GET(RPE_MASK, + intel_uncore_read(to_gt(i915)->uncore, + GEN10_FREQ_INFO_REC)); caps->min_freq = (rp_state_cap >> 16) & 0xff; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index e1fa1f32f29e..70a2af5f518d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -465,6 +465,29 @@ int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val) return ret; } +static int slpc_ignore_eff_freq(struct intel_guc_slpc *slpc, bool ignore) +{ + int ret = 0; + + if (ignore) { + ret = slpc_set_param(slpc, +SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, +ignore); + if (!ret) + return slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + slpc->min_freq); + } else { + ret = slpc_unset_param(slpc, + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY); + if (!ret) + return slpc_unset_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ); + } + + return ret; +} + /** * intel_guc_slpc_set_min_freq() - Set min frequency limit for SLPC. * @slpc: pointer to intel_guc_slpc. @@ -491,6 +514,14 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) with_intel_runtime_pm(>runtime_pm, wakeref) { + /* Ignore efficient freq if lower min freq is requested */ + ret = slpc_ignore_eff_freq(slpc, val < slpc->rp1_freq); + if (unlikely(ret)) { + i915_probe_error(i915, "Failed to toggle efficient freq (%pe)\n", +ERR_PTR(ret)); + return ret; + } + ret = slpc_set_param(slpc, SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, val); @@ -587,7 +618,9 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc) return ret; if (!slpc->min_freq_softlimit) { - slpc->min_freq_softlimit = slpc->min_freq; + ret = intel_guc_slpc_get_min_freq(slpc, >min_freq_softlimit); + if (unlikely(ret)) + return ret; slpc_to_gt(slpc)->defaults.min_freq = slpc->min_freq_softlimit; } else if (slpc->min_freq_softlimit != slpc->min_freq) { return intel_guc_slpc_set_min_freq(slpc, @@ -597,29 +630,6 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc) return 0; } -static int slpc_ignore_eff_freq(struct intel_guc_slpc *slpc, bool ignore) -{ - int
Re: [PATCH] drm/i915/guc/slpc: Add a new SLPC selftest
On 6/24/2022 8:59 PM, Dixit, Ashutosh wrote: On Thu, 23 Jun 2022 16:33:20 -0700, Vinay Belgaumkar wrote: +static int max_granted_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, u32 *max_act_freq) +{ + struct intel_gt *gt = rps_to_gt(rps); + u32 perf_limit_reasons; + int err = 0; - igt_spinner_end(); - st_engine_heartbeat_enable(engine); - } + err = slpc_set_min_freq(slpc, slpc->rp0_freq); + if (err) + return err; - pr_info("Max actual frequency for %s was %d\n", - engine->name, max_act_freq); + *max_act_freq = intel_rps_read_actual_frequency(rps); + if (!(*max_act_freq == slpc->rp0_freq)) { nit but '*max_act_freq != slpc->rp0_freq' + /* Check if there was some throttling by pcode */ + perf_limit_reasons = intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS); - /* Actual frequency should rise above min */ - if (max_act_freq == slpc_min_freq) { - pr_err("Actual freq did not rise above min\n"); + /* If not, this is an error */ + if (!(perf_limit_reasons && GT0_PERF_LIMIT_REASONS_MASK)) { Still wrong, should be & not && + pr_err("Pcode did not grant max freq\n"); err = -EINVAL; - } + } else { + pr_info("Pcode throttled frequency 0x%x\n", perf_limit_reasons); Another question, why are we using pr_err/info here rather than drm_err/info? pr_err/info is ok for mock selftests since there is no drm device but that is not the case here, I think this is done in other selftests too but maybe fix this as well if we are making so many changes here? Anyway can do later too. Yup, will send a separate patch to change them to drm_err/info. Thanks, Vinay. So let's settle issues in v2 thread first. Thanks. -- Ashutosh
Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Add a new SLPC selftest
On 6/24/2022 8:59 PM, Dixit, Ashutosh wrote: On Thu, 23 Jun 2022 16:21:46 -0700, Belgaumkar, Vinay wrote: On 6/22/2022 1:32 PM, Dixit, Ashutosh wrote: On Fri, 10 Jun 2022 16:47:12 -0700, Vinay Belgaumkar wrote: This test will validate we can achieve actual frequency of RP0. Pcode grants frequencies based on what GuC is requesting. However, thermal throttling can limit what is being granted. Add a test to request for max, but don't fail the test if RP0 is not granted due to throttle reasons. Also optimize the selftest by using a common run_test function to avoid code duplication. The refactoring does change the order of operations (changing the freq vs spawning the spinner) but should be fine I think. Yes, we now start the spinner outside the for loop, so that freq changes occur quickly. This ensures we don't mess with SLPC algorithm's history by frequently restarting the WL in the for loop. Rename the "clamp" tests to vary_max_freq and vary_min_freq. Either is ok, but maybe "clamp" names were ok I think since they verify req freq is clamped at min/max. True, though clamp usually is associated with limiting, whereas we actually increase the min. v2: Fix compile warning Fixes 8ee2c227822e ("drm/i915/guc/slpc: Add SLPC selftest") Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/selftest_slpc.c | 323 1 file changed, 158 insertions(+), 165 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index b768cea5943d..099129aae9a5 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -8,6 +8,11 @@ #define delay_for_h2g() usleep_range(H2G_DELAY, H2G_DELAY + 1) #define FREQUENCY_REQ_UNIT DIV_ROUND_CLOSEST(GT_FREQUENCY_MULTIPLIER, \ GEN9_FREQ_SCALER) +enum test_type { + VARY_MIN, + VARY_MAX, + MAX_GRANTED +}; static int slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 freq) { @@ -36,147 +41,120 @@ static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq) return ret; } -static int live_slpc_clamp_min(void *arg) +static int vary_max_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, + u32 *max_act_freq) Please run checkpatch, indentation seems off. I had run it. Not sure why this wasn't caught. Need to use 'checkpatch --strict'. ok. { - struct drm_i915_private *i915 = arg; - struct intel_gt *gt = to_gt(i915); - struct intel_guc_slpc *slpc = >uc.guc.slpc; - struct intel_rps *rps = >rps; - struct intel_engine_cs *engine; - enum intel_engine_id id; - struct igt_spinner spin; + u32 step, max_freq, req_freq; + u32 act_freq; u32 slpc_min_freq, slpc_max_freq; int err = 0; - if (!intel_uc_uses_guc_slpc(>uc)) - return 0; - - if (igt_spinner_init(, gt)) - return -ENOMEM; + slpc_min_freq = slpc->min_freq; + slpc_max_freq = slpc->rp0_freq; nit but we don't really need such variables since we don't change their values, we should just use slpc->min_freq, slpc->rp0_freq directly. I'd change this in all places in this patch. I will remove it from the sub-functions, but will need to keep the one in the main run_test(). We should query SLPC's min and max and then restore that at the end of the test. It is possible that SLPC's min is different from platform min for certain skus. Sorry, I am not following. The tests are varying freq between platform min to platform max correct? And platform min can be different from slpc min? So why don't the tests start at slpc min rather than platform min? Can't this return error? Will start the tests from platform min -> platform max, that way we remain consistent. And shouldn't slpc->min set to the real slpc min rather than to the platform min when slpc initializes (in intel_guc_slpc_enable() or slpc_get_rp_values())? (I am assuming the issue is only for the min and not the max but not sure). Certain conditions may result in SLPC setting the min to a different value. We can worry about that in a different patch. So I'd expect everywhere a consistent set of freq's be used, in run_test() and the actual vary_min/max_freq tests and also in the main driver. Agree. - if (intel_guc_slpc_get_max_freq(slpc, _max_freq)) { - pr_err("Could not get SLPC max freq\n"); - return -EIO; - } - - if (intel_guc_slpc_get_min_freq(slpc, _min_freq)) { - pr_err("Could not get SLPC min freq\n"); - return -EIO; Why do we need these two function calls? Can't we just use slpc->rp0_freq and slpc->min_freq as we are doing in the vary_min/max_freq() functions above? Same as above. Also, as mentioned below I think here we should just do:
Re: [PATCH] drm/i915/guc/slpc: Add a new SLPC selftest
On 6/24/2022 8:59 PM, Dixit, Ashutosh wrote: On Thu, 23 Jun 2022 16:33:20 -0700, Vinay Belgaumkar wrote: +static int max_granted_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, u32 *max_act_freq) +{ + struct intel_gt *gt = rps_to_gt(rps); + u32 perf_limit_reasons; + int err = 0; - igt_spinner_end(); - st_engine_heartbeat_enable(engine); - } + err = slpc_set_min_freq(slpc, slpc->rp0_freq); + if (err) + return err; - pr_info("Max actual frequency for %s was %d\n", - engine->name, max_act_freq); + *max_act_freq = intel_rps_read_actual_frequency(rps); + if (!(*max_act_freq == slpc->rp0_freq)) { nit but '*max_act_freq != slpc->rp0_freq' ok. + /* Check if there was some throttling by pcode */ + perf_limit_reasons = intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS); - /* Actual frequency should rise above min */ - if (max_act_freq == slpc_min_freq) { - pr_err("Actual freq did not rise above min\n"); + /* If not, this is an error */ + if (!(perf_limit_reasons && GT0_PERF_LIMIT_REASONS_MASK)) { Still wrong, should be & not && yup, third time's the charm. + pr_err("Pcode did not grant max freq\n"); err = -EINVAL; - } + } else { + pr_info("Pcode throttled frequency 0x%x\n", perf_limit_reasons); Another question, why are we using pr_err/info here rather than drm_err/info? pr_err/info is ok for mock selftests since there is no drm device but that is not the case here, I think this is done in other selftests too but maybe fix this as well if we are making so many changes here? Anyway can do later too. So let's settle issues in v2 thread first. Thanks, Vinay. Thanks. -- Ashutosh
Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Add a new SLPC selftest
On 6/22/2022 1:32 PM, Dixit, Ashutosh wrote: On Fri, 10 Jun 2022 16:47:12 -0700, Vinay Belgaumkar wrote: This test will validate we can achieve actual frequency of RP0. Pcode grants frequencies based on what GuC is requesting. However, thermal throttling can limit what is being granted. Add a test to request for max, but don't fail the test if RP0 is not granted due to throttle reasons. Also optimize the selftest by using a common run_test function to avoid code duplication. The refactoring does change the order of operations (changing the freq vs spawning the spinner) but should be fine I think. Yes, we now start the spinner outside the for loop, so that freq changes occur quickly. This ensures we don't mess with SLPC algorithm's history by frequently restarting the WL in the for loop. Rename the "clamp" tests to vary_max_freq and vary_min_freq. Either is ok, but maybe "clamp" names were ok I think since they verify req freq is clamped at min/max. True, though clamp usually is associated with limiting, whereas we actually increase the min. v2: Fix compile warning Fixes 8ee2c227822e ("drm/i915/guc/slpc: Add SLPC selftest") Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/selftest_slpc.c | 323 1 file changed, 158 insertions(+), 165 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index b768cea5943d..099129aae9a5 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -8,6 +8,11 @@ #define delay_for_h2g() usleep_range(H2G_DELAY, H2G_DELAY + 1) #define FREQUENCY_REQ_UNITDIV_ROUND_CLOSEST(GT_FREQUENCY_MULTIPLIER, \ GEN9_FREQ_SCALER) +enum test_type { + VARY_MIN, + VARY_MAX, + MAX_GRANTED +}; static int slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 freq) { @@ -36,147 +41,120 @@ static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq) return ret; } -static int live_slpc_clamp_min(void *arg) +static int vary_max_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, + u32 *max_act_freq) Please run checkpatch, indentation seems off. I had run it. Not sure why this wasn't caught. { - struct drm_i915_private *i915 = arg; - struct intel_gt *gt = to_gt(i915); - struct intel_guc_slpc *slpc = >uc.guc.slpc; - struct intel_rps *rps = >rps; - struct intel_engine_cs *engine; - enum intel_engine_id id; - struct igt_spinner spin; + u32 step, max_freq, req_freq; + u32 act_freq; u32 slpc_min_freq, slpc_max_freq; int err = 0; - if (!intel_uc_uses_guc_slpc(>uc)) - return 0; - - if (igt_spinner_init(, gt)) - return -ENOMEM; + slpc_min_freq = slpc->min_freq; + slpc_max_freq = slpc->rp0_freq; nit but we don't really need such variables since we don't change their values, we should just use slpc->min_freq, slpc->rp0_freq directly. I'd change this in all places in this patch. I will remove it from the sub-functions, but will need to keep the one in the main run_test(). We should query SLPC's min and max and then restore that at the end of the test. It is possible that SLPC's min is different from platform min for certain skus. - if (intel_guc_slpc_get_max_freq(slpc, _max_freq)) { - pr_err("Could not get SLPC max freq\n"); - return -EIO; - } - - if (intel_guc_slpc_get_min_freq(slpc, _min_freq)) { - pr_err("Could not get SLPC min freq\n"); - return -EIO; Why do we need these two function calls? Can't we just use slpc->rp0_freq and slpc->min_freq as we are doing in the vary_min/max_freq() functions above? Same as above. Also, as mentioned below I think here we should just do: slpc_set_max_freq(slpc, slpc->rp0_freq); slpc_set_min_freq(slpc, slpc->min_freq); to restore freq to a known state before starting the test (just in case a previous test changed the values). Any test that changes the frequencies should restore them as well. - } - - if (slpc_min_freq == slpc_max_freq) { - pr_err("Min/Max are fused to the same value\n"); - return -EINVAL; What if they are actually equal? I think basically the max/min freq test loops will just not be entered (so effectively the tests will just skip). The granted freq test will be fine. So I think we can just delete this if statement? (It is showing deleted above in the patch but is in the new code somewhere too). Actually, we should set it to min/rp0 if this is the case. That change will be in a separate patch. This is needed for certain cases. - } - - intel_gt_pm_wait_for_idle(gt); - intel_gt_pm_get(gt); - for_each_engine(engine, gt, id) { - struct i915_request *rq; - u32
Re: [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
On 6/21/2022 5:26 PM, Dixit, Ashutosh wrote: On Sat, 14 May 2022 23:05:06 -0700, Vinay Belgaumkar wrote: SLPC min/max frequency updates require H2G calls. We are seeing timeouts when GuC channel is backed up and it is unable to respond in a timely fashion causing warnings and affecting CI. This is seen when waitboosting happens during a stress test. this patch updates the waitboost path to use a non-blocking H2G call instead, which returns as soon as the message is successfully transmitted. Overall I am ok moving waitboost to use the non-blocking H2G. We can consider increasing the timeout in wait_for_ct_request_update() to be a separate issue for blocking cases and we can handle that separately. Still there a couple of issues with this patch mentioned below. v2: Use drm_notice to report any errors that might occur while sending the waitboost H2G request (Tvrtko) Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 44 + 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 1db833da42df..e5e869c96262 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc) return data->header.global_state; } +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value) +{ + u32 request[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), + id, + value, + }; + int ret; + + ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0); + + return ret > 0 ? -EPROTO : ret; +} + +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + + GEM_BUG_ON(id >= SLPC_MAX_PARAM); + + return guc_action_slpc_set_param_nb(guc, id, value); +} + static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) { u32 request[] = { @@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) */ with_intel_runtime_pm(>runtime_pm, wakeref) { - ret = slpc_set_param(slpc, -SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, -freq); - if (ret) - i915_probe_error(i915, "Unable to force min freq to %u: %d", -freq, ret); + /* Non-blocking request will avoid stalls */ + ret = slpc_set_param_nb(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + freq); } return ret; @@ -222,6 +244,8 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) static void slpc_boost_work(struct work_struct *work) { struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work); + struct drm_i915_private *i915 = slpc_to_i915(slpc); + int err; /* * Raise min freq to boost. It's possible that @@ -231,8 +255,12 @@ static void slpc_boost_work(struct work_struct *work) */ mutex_lock(>lock); if (atomic_read(>num_waiters)) { - slpc_force_min_freq(slpc, slpc->boost_freq); - slpc->num_boosts++; + err = slpc_force_min_freq(slpc, slpc->boost_freq); + if (!err) + slpc->num_boosts++; + else + drm_notice(>drm, "Failed to send waitboost request (%d)\n", + err); The issue I have is what happens when we de-boost (restore min freq to its previous value in intel_guc_slpc_dec_waiters()). It would seem that that call is fairly important to get the min freq down when there are no pending requests. Therefore what do we do in that case? This is the function: void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc) { mutex_lock(>lock); if (atomic_dec_and_test(>num_waiters)) slpc_force_min_freq(slpc, slpc->min_freq_softlimit); mutex_unlock(>lock); } 1. First it would seem that at the minimum we need a similar drm_notice() in intel_guc_slpc_dec_waiters(). That would mean we need to put the drm_notice() back in slpc_force_min_freq() (replacing i915_probe_error()) rather than in slpc_boost_work() above? Sure. 2. Further, if de-boosting is important then maybe as was being discussed in v1 of this patch (see the bottom of https://patchwork.freedesktop.org/patch/485004/?series=103598=1) do we need to use intel_guc_send_busy_loop() in the intel_guc_slpc_dec_waiters() code path? Using a busy_loop here would
Re: [PATCH] drm/i915: Add global forcewake status to drpc
On 6/17/2022 1:53 PM, Dixit, Ashutosh wrote: On Fri, 17 Jun 2022 13:25:34 -0700, Vinay Belgaumkar wrote: We have seen multiple RC6 issues where it is useful to know which global forcewake bits are set. Add this to the 'drpc' debugfs output. A couple of optional nits below to look at but otherwise this is: Reviewed-by: Ashutosh Dixit +static u32 mt_fwake_status(struct intel_uncore *uncore) +{ + return intel_uncore_read_fw(uncore, FORCEWAKE_MT); +} + static int vlv_drpc(struct seq_file *m) { struct intel_gt *gt = m->private; struct intel_uncore *uncore = gt->uncore; - u32 rcctl1, pw_status; + u32 rcctl1, pw_status, mt_fwake; + mt_fwake = mt_fwake_status(uncore); I would get rid of the function and just duplicate the intel_uncore_read_fw(). Made it a function in case we can find the equivalent register for ILK. Though, I am not sure if ILK even had the concept of MT fwake. pw_status = intel_uncore_read(uncore, VLV_GTLC_PW_STATUS); rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL); seq_printf(m, "RC6 Enabled: %s\n", str_yes_no(rcctl1 & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1; + seq_printf(m, "Multi-threaded Forcewake: 0x%x\n", mt_fwake); Is "Multi-threaded Forcewake Request" (the Bspec register name) a more descriptive print? Same for gen6_drpc() below. Thanks! Sure. Thanks, Vinay.
Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
On 5/6/2022 9:43 AM, John Harrison wrote: On 5/6/2022 00:18, Tvrtko Ursulin wrote: On 05/05/2022 19:36, John Harrison wrote: On 5/5/2022 10:21, Belgaumkar, Vinay wrote: On 5/5/2022 5:13 AM, Tvrtko Ursulin wrote: On 05/05/2022 06:40, Vinay Belgaumkar wrote: SLPC min/max frequency updates require H2G calls. We are seeing timeouts when GuC channel is backed up and it is unable to respond in a timely fashion causing warnings and affecting CI. Is it the "Unable to force min freq" error? Do you have a link to the GitLab issue to add to commit message? We don't have a specific error for this one, but have seen similar issues with other H2G which are blocking. This is seen when waitboosting happens during a stress test. this patch updates the waitboost path to use a non-blocking H2G call instead, which returns as soon as the message is successfully transmitted. AFAIU with this approach, when CT channel is congested, you instead achieve silent dropping of the waitboost request, right? We are hoping it makes it, but just not waiting for it to complete. We are not 'hoping it makes it'. We know for a fact that it will make it. We just don't know when. The issue is not about whether the waitboost request itself gets dropped/lost it is about the ack that comes back. The GuC will process the message and it will send an ack. It's just a question of whether the i915 driver has given up waiting for it yet. And if it has, then you get the initial 'timed out waiting for ack' followed by a later 'got unexpected ack' message. Whereas, if we make the call asynchronous, there is no ack. i915 doesn't bother waiting and it won't get surprised later. Also, note that this is only an issue when GuC itself is backed up. Normally that requires the creation/destruction of large numbers of contexts in rapid succession (context management is about the slowest thing we do with GuC). Some of the IGTs and selftests do that with thousands of contexts all at once. Those are generally where we see this kind of problem. It would be highly unlikely (but not impossible) to hit it in real world usage. Goto -> The general design philosophy of H2G messages is that asynchronous mode should be used for everything if at all possible. It is fire and forget and will all get processed in the order sent (same as batch buffer execution, really). Synchronous messages should only be used when an ack/status is absolutely required. E.g. start of day initialisation or things like TLB invalidation where we need to know that a cache has been cleared/flushed before updating memory from the CPU. John. It sounds like a potentially important feedback from the field to lose so easily. How about you added drm_notice to the worker when it fails? Or simply a "one line patch" to replace i915_probe_error (!?) with drm_notice and keep the blocking behavior. (I have no idea what is the typical time to drain the CT buffer, and so to decide whether waiting or dropping makes more sense for effectiveness of waitboosting.) Or since the congestion /should not/ happen in production, then the argument is why complicate with more code, in which case going with one line patch is an easy way forward? Here. Where I did hint I understood the "should not happen in production angle". So statement is GuC is congested in processing requests, but the h2g buffer is not congested so no chance intel_guc_send_nb() will fail with no space in that buffer? Sounds a bit un-intuitive. That's two different things. The problem of no space in the H2G buffer is the same whether the call is sent blocking or non-blocking. The wait-for-space version is intel_guc_send_busy_loop() rather than intel_guc_send_nb(). NB: _busy_loop is a wrapper around _nb, so the wait-for-space version is also non-blocking ;). If a non-looping version is used (blocking or otherwise) it will return -EBUSY if there is no space. So both the original SLPC call and this non-blocking version will still get an immediate EBUSY return code if the H2G channel is backed up completely. Whether the code should be handling EBUSY or not is another matter. Vinay, does anything higher up do a loop on EBUSY? If not, maybe it should be using the _busy_loop() call instead? The blocking vs non-blocking is about waiting for a response if the command is successfully sent. The blocking case will sit and spin for a reply, the non-blocking assumes success and expects an asynchronous error report on failure. The assumption being that the call can't fail unless something is already broken - i915 sending invalid data to GuC for example. And thus any failure is in the BUG_ON category rather than the try again with a different approach and/or try again later category. This is the point of the change. We are currently getting timeout errors when the H2G channel has space so the command can be sent, but the channel already contains a lo
Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
On 5/6/2022 12:18 AM, Tvrtko Ursulin wrote: On 05/05/2022 19:36, John Harrison wrote: On 5/5/2022 10:21, Belgaumkar, Vinay wrote: On 5/5/2022 5:13 AM, Tvrtko Ursulin wrote: On 05/05/2022 06:40, Vinay Belgaumkar wrote: SLPC min/max frequency updates require H2G calls. We are seeing timeouts when GuC channel is backed up and it is unable to respond in a timely fashion causing warnings and affecting CI. Is it the "Unable to force min freq" error? Do you have a link to the GitLab issue to add to commit message? We don't have a specific error for this one, but have seen similar issues with other H2G which are blocking. This is seen when waitboosting happens during a stress test. this patch updates the waitboost path to use a non-blocking H2G call instead, which returns as soon as the message is successfully transmitted. AFAIU with this approach, when CT channel is congested, you instead achieve silent dropping of the waitboost request, right? We are hoping it makes it, but just not waiting for it to complete. We are not 'hoping it makes it'. We know for a fact that it will make it. We just don't know when. The issue is not about whether the waitboost request itself gets dropped/lost it is about the ack that comes back. The GuC will process the message and it will send an ack. It's just a question of whether the i915 driver has given up waiting for it yet. And if it has, then you get the initial 'timed out waiting for ack' followed by a later 'got unexpected ack' message. Whereas, if we make the call asynchronous, there is no ack. i915 doesn't bother waiting and it won't get surprised later. Also, note that this is only an issue when GuC itself is backed up. Normally that requires the creation/destruction of large numbers of contexts in rapid succession (context management is about the slowest thing we do with GuC). Some of the IGTs and selftests do that with thousands of contexts all at once. Those are generally where we see this kind of problem. It would be highly unlikely (but not impossible) to hit it in real world usage. Goto -> The general design philosophy of H2G messages is that asynchronous mode should be used for everything if at all possible. It is fire and forget and will all get processed in the order sent (same as batch buffer execution, really). Synchronous messages should only be used when an ack/status is absolutely required. E.g. start of day initialisation or things like TLB invalidation where we need to know that a cache has been cleared/flushed before updating memory from the CPU. John. It sounds like a potentially important feedback from the field to lose so easily. How about you added drm_notice to the worker when it fails? Or simply a "one line patch" to replace i915_probe_error (!?) with drm_notice and keep the blocking behavior. (I have no idea what is the typical time to drain the CT buffer, and so to decide whether waiting or dropping makes more sense for effectiveness of waitboosting.) Or since the congestion /should not/ happen in production, then the argument is why complicate with more code, in which case going with one line patch is an easy way forward? Here. Where I did hint I understood the "should not happen in production angle". So statement is GuC is congested in processing requests, but the h2g buffer is not congested so no chance intel_guc_send_nb() will fail with no space in that buffer? Sounds a bit un-intuitive. Anyway, it sounds okay to me to use the non-blocking, but I would like to see some logging if the unexpected does happen. Hence I was suggesting the option of adding drm_notice logging if the send fails from the worker. (Because I think other callers would already propagate the error, like sysfs.) err = slpc_force_min_freq(slpc, slpc->boost_freq); if (!err) slpc->num_boosts++; else drm_notice(... "Failed to send waitboost request (%d)", err); Ok, makes sense. Will send out another rev with this change. Thanks, Vinay. Something like that. Regards, Tvrtko Even if we soften the blow here, the actual timeout error occurs in the intel_guc_ct.c code, so we cannot hide that error anyways. Making this call non-blocking will achieve both things. Thanks, Vinay. Regards, Tvrtko Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 38 - 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 1db833da42df..c852f73cf521 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc) return data->header.global_state; } +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value) +{ + u32 reques
Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost
On 5/5/2022 5:13 AM, Tvrtko Ursulin wrote: On 05/05/2022 06:40, Vinay Belgaumkar wrote: SLPC min/max frequency updates require H2G calls. We are seeing timeouts when GuC channel is backed up and it is unable to respond in a timely fashion causing warnings and affecting CI. Is it the "Unable to force min freq" error? Do you have a link to the GitLab issue to add to commit message? We don't have a specific error for this one, but have seen similar issues with other H2G which are blocking. This is seen when waitboosting happens during a stress test. this patch updates the waitboost path to use a non-blocking H2G call instead, which returns as soon as the message is successfully transmitted. AFAIU with this approach, when CT channel is congested, you instead achieve silent dropping of the waitboost request, right? We are hoping it makes it, but just not waiting for it to complete. It sounds like a potentially important feedback from the field to lose so easily. How about you added drm_notice to the worker when it fails? Or simply a "one line patch" to replace i915_probe_error (!?) with drm_notice and keep the blocking behavior. (I have no idea what is the typical time to drain the CT buffer, and so to decide whether waiting or dropping makes more sense for effectiveness of waitboosting.) Or since the congestion /should not/ happen in production, then the argument is why complicate with more code, in which case going with one line patch is an easy way forward? Even if we soften the blow here, the actual timeout error occurs in the intel_guc_ct.c code, so we cannot hide that error anyways. Making this call non-blocking will achieve both things. Thanks, Vinay. Regards, Tvrtko Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 38 - 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 1db833da42df..c852f73cf521 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc) return data->header.global_state; } +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value) +{ + u32 request[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), + id, + value, + }; + int ret; + + ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0); + + return ret > 0 ? -EPROTO : ret; +} + +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + + GEM_BUG_ON(id >= SLPC_MAX_PARAM); + + return guc_action_slpc_set_param_nb(guc, id, value); +} + static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) { u32 request[] = { @@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) */ with_intel_runtime_pm(>runtime_pm, wakeref) { - ret = slpc_set_param(slpc, - SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, - freq); - if (ret) - i915_probe_error(i915, "Unable to force min freq to %u: %d", - freq, ret); + /* Non-blocking request will avoid stalls */ + ret = slpc_set_param_nb(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + freq); } return ret; @@ -231,8 +253,8 @@ static void slpc_boost_work(struct work_struct *work) */ mutex_lock(>lock); if (atomic_read(>num_waiters)) { - slpc_force_min_freq(slpc, slpc->boost_freq); - slpc->num_boosts++; + if (!slpc_force_min_freq(slpc, slpc->boost_freq)) + slpc->num_boosts++; } mutex_unlock(>lock); }
Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use i915_probe_error instead of drm_err
On 4/13/2022 11:41 PM, Anshuman Gupta wrote: On 2022-04-13 at 04:18:52 +0530, Vinay Belgaumkar wrote: This will ensure we don't have false positives when we run error injection tests. Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 42 ++--- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index b170238aa15c..639de3c10545 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -152,8 +152,8 @@ static int slpc_query_task_state(struct intel_guc_slpc *slpc) ret = guc_action_slpc_query(guc, offset); if (unlikely(ret)) As commit logs describe, this code patch can hit, when we run error injection test. Do we need unlikely() here? Br, Anshuman Gupta. I think we still need the unlikely(). Majority of the time, we still need the compiler optimization. Only in the rare case of running the error injection test will it not be needed. Thanks, Vinay. - drm_err(>drm, "Failed to query task state (%pe)\n", - ERR_PTR(ret)); + i915_probe_error(i915, "Failed to query task state (%pe)\n", +ERR_PTR(ret)); drm_clflush_virt_range(slpc->vaddr, SLPC_PAGE_SIZE_BYTES); @@ -170,8 +170,8 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value) ret = guc_action_slpc_set_param(guc, id, value); if (ret) - drm_err(>drm, "Failed to set param %d to %u (%pe)\n", - id, value, ERR_PTR(ret)); + i915_probe_error(i915, "Failed to set param %d to %u (%pe)\n", +id, value, ERR_PTR(ret)); return ret; } @@ -211,8 +211,8 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, freq); if (ret) - drm_err(>drm, "Unable to force min freq to %u: %d", - freq, ret); + i915_probe_error(i915, "Unable to force min freq to %u: %d", +freq, ret); } return ret; @@ -247,9 +247,9 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc) err = intel_guc_allocate_and_map_vma(guc, size, >vma, (void **)>vaddr); if (unlikely(err)) { - drm_err(>drm, - "Failed to allocate SLPC struct (err=%pe)\n", - ERR_PTR(err)); + i915_probe_error(i915, +"Failed to allocate SLPC struct (err=%pe)\n", +ERR_PTR(err)); return err; } @@ -316,15 +316,15 @@ static int slpc_reset(struct intel_guc_slpc *slpc) ret = guc_action_slpc_reset(guc, offset); if (unlikely(ret < 0)) { - drm_err(>drm, "SLPC reset action failed (%pe)\n", - ERR_PTR(ret)); + i915_probe_error(i915, "SLPC reset action failed (%pe)\n", +ERR_PTR(ret)); return ret; } if (!ret) { if (wait_for(slpc_is_running(slpc), SLPC_RESET_TIMEOUT_MS)) { - drm_err(>drm, "SLPC not enabled! State = %s\n", - slpc_get_state_string(slpc)); + i915_probe_error(i915, "SLPC not enabled! State = %s\n", +slpc_get_state_string(slpc)); return -EIO; } } @@ -616,8 +616,8 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) ret = slpc_reset(slpc); if (unlikely(ret < 0)) { - drm_err(>drm, "SLPC Reset event returned (%pe)\n", - ERR_PTR(ret)); + i915_probe_error(i915, "SLPC Reset event returned (%pe)\n", +ERR_PTR(ret)); return ret; } @@ -632,24 +632,24 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) /* Ignore efficient freq and set min to platform min */ ret = slpc_ignore_eff_freq(slpc, true); if (unlikely(ret)) { - drm_err(>drm, "Failed to set SLPC min to RPn (%pe)\n", - ERR_PTR(ret)); + i915_probe_error(i915, "Failed to set SLPC min to RPn (%pe)\n", +ERR_PTR(ret)); return ret; } /* Set SLPC max limit to RP0 */ ret = slpc_use_fused_rp0(slpc); if (unlikely(ret)) { - drm_err(>drm, "Failed to set SLPC max to RP0 (%pe)\n", - ERR_PTR(ret)); + i915_probe_error(i915, "Failed to set SLPC max to RP0 (%pe)\n", +
Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Correct the param count for unset param
On 2/17/2022 1:41 AM, Tvrtko Ursulin wrote: On 16/02/2022 18:15, Vinay Belgaumkar wrote: SLPC unset param H2G only needs one parameter - the id of the param. Fixes: 025cb07bebfa ("drm/i915/guc/slpc: Cache platform frequency limits") How serious is this? Does it need backporting? If so: Cc: # v5.15+ ? This path (unset_param) is not being exercised currently, so not very serious. Thanks, Vinay. Regards, Tvrtko Suggested-by: Umesh Nerlige Ramappa Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 13b27b8ff74e..ba21ace973da 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -110,7 +110,7 @@ static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id) { u32 request[] = { GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, - SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 2), + SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1), id, };
Re: [PATCH] drm/i915/guc: Update guc shim control programming on newer platforms
On 1/20/2022 2:24 PM, Daniele Ceraolo Spurio wrote: Starting from xehpsdv, bit 0 of of the GuC shim control register has been repurposed, while bit 2 is now reserved, so we need to avoid setting those for their old meaning on newer platforms. Cc: Vinay Belgaumkar Cc: Stuart Summers Signed-off-by: Daniele Ceraolo Spurio --- drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index f773e7f35bc1a..40f7d4779c9ec 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -15,13 +15,15 @@ static void guc_prepare_xfer(struct intel_uncore *uncore) { - u32 shim_flags = GUC_DISABLE_SRAM_INIT_TO_ZEROES | -GUC_ENABLE_READ_CACHE_LOGIC | -GUC_ENABLE_MIA_CACHING | + u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC | GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA | GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA | GUC_ENABLE_MIA_CLOCK_GATING; + if (GRAPHICS_VER_FULL(uncore->i915) < IP_VER(12, 50)) + shim_flags |= GUC_DISABLE_SRAM_INIT_TO_ZEROES | + GUC_ENABLE_MIA_CACHING; + LGTM. Reviewed-by: Vinay Belgaumkar /* Must program this register before loading the ucode with DMA */ intel_uncore_write(uncore, GUC_SHIM_CONTROL, shim_flags);
Re: [PATCH 3/3] drm/i915/gt: Improve "race-to-idle" at low frequencies
On 11/17/2021 2:49 PM, Vinay Belgaumkar wrote: From: Chris Wilson While the power consumption is proportional to the frequency, there is also a static draw for active gates. The longer we are able to powergate (rc6), the lower the static draw. Thus there is a sweetspot in the frequency/power curve where we run at higher frequency in order to sleep longer, aka race-to-idle. This is more evident at lower frequencies, so let's look to bump the frequency if we think we will benefit by sleeping longer at the higher frequency and so conserving power. Signed-off-by: Chris Wilson Cc: Vinay Belgaumkar Cc: Tvrtko Ursulin Data collected does show some power savings. Reviewed-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/intel_rps.c | 31 - 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 3675ac93ded0..6af3231982af 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -63,6 +63,22 @@ static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) intel_uncore_write_fw(uncore, reg, val); } +static bool race_to_idle(struct intel_rps *rps, u64 busy, u64 dt) +{ + unsigned int this = rps->cur_freq; + unsigned int next = rps->cur_freq + 1; + u64 next_dt = next * max(busy, dt); + + /* +* Compare estimated time spent in rc6 at the next power bin. If +* we expect to sleep longer than the estimated increased power +* cost of running at a higher frequency, it will be reduced power +* consumption overall. +*/ + return (((next_dt - this * busy) >> 10) * this * this > + ((next_dt - next * busy) >> 10) * next * next); +} + static void rps_timer(struct timer_list *t) { struct intel_rps *rps = from_timer(rps, t, timer); @@ -133,7 +149,7 @@ static void rps_timer(struct timer_list *t) if (!max_busy[i]) break; - busy += div_u64(max_busy[i], 1 << i); + busy += max_busy[i] >> i; } GT_TRACE(rps_to_gt(rps), "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n", @@ -141,13 +157,18 @@ static void rps_timer(struct timer_list *t) max_busy[0], max_busy[1], max_busy[2], rps->pm_interval); - if (100 * busy > rps->power.up_threshold * dt && - rps->cur_freq < rps->max_freq_softlimit) { + if (rps->cur_freq < rps->max_freq_softlimit && + race_to_idle(rps, max_busy[0], dt)) { + rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD; + rps->pm_interval = 1; + schedule_work(>work); + } else if (rps->cur_freq < rps->max_freq_softlimit && + 100 * busy > rps->power.up_threshold * dt) { rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD; rps->pm_interval = 1; schedule_work(>work); - } else if (100 * busy < rps->power.down_threshold * dt && - rps->cur_freq > rps->min_freq_softlimit) { + } else if (rps->cur_freq > rps->min_freq_softlimit && + 100 * busy < rps->power.down_threshold * dt) { rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD; rps->pm_interval = 1; schedule_work(>work);
Re: [PATCH 2/3] drm/i915/gt: Compare average group occupancy for RPS evaluation
On 11/17/2021 2:49 PM, Vinay Belgaumkar wrote: From: Chris Wilson Currently, we inspect each engine individually and measure the occupancy of that engine over the last evaluation interval. If that exceeds our busyness thresholds, we decide to increase the GPU frequency. However, under a load balancer, we should consider the occupancy of entire engine groups, as work may be spread out across the group. In doing so, we prefer wide over fast, power consumption is approximately proportional to the square of the frequency. However, since the load balancer is greedy, the first idle engine gets all the work, and preferrentially reuses the last active engine, under light loads all work is assigned to one engine, and so that engine appears very busy. But if the work happened to overlap slightly, the workload would spread across multiple engines, reducing each individual engine's runtime, and so reducing the rps contribution, keeping the frequency low. Instead, when considering the contribution, consider the contribution over the entire engine group (capacity). Signed-off-by: Chris Wilson Cc: Vinay Belgaumkar Cc: Tvrtko Ursulin Reviewed-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/intel_rps.c | 48 - 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 07ff7ba7b2b7..3675ac93ded0 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -7,6 +7,7 @@ #include "i915_drv.h" #include "intel_breadcrumbs.h" +#include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_clock_utils.h" #include "intel_gt_irq.h" @@ -65,26 +66,45 @@ static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) static void rps_timer(struct timer_list *t) { struct intel_rps *rps = from_timer(rps, t, timer); - struct intel_engine_cs *engine; - ktime_t dt, last, timestamp; - enum intel_engine_id id; + struct intel_gt *gt = rps_to_gt(rps); + ktime_t dt, last, timestamp = 0; s64 max_busy[3] = {}; + int i, j; - timestamp = 0; - for_each_engine(engine, rps_to_gt(rps), id) { - s64 busy; - int i; + /* Compare average occupancy over each engine group */ + for (i = 0; i < ARRAY_SIZE(gt->engine_class); i++) { + s64 busy = 0; + int count = 0; + + for (j = 0; j < ARRAY_SIZE(gt->engine_class[i]); j++) { + struct intel_engine_cs *engine; - dt = intel_engine_get_busy_time(engine, ); - last = engine->stats.rps; - engine->stats.rps = dt; + engine = gt->engine_class[i][j]; + if (!engine) + continue; - busy = ktime_to_ns(ktime_sub(dt, last)); - for (i = 0; i < ARRAY_SIZE(max_busy); i++) { - if (busy > max_busy[i]) - swap(busy, max_busy[i]); + dt = intel_engine_get_busy_time(engine, ); + last = engine->stats.rps; + engine->stats.rps = dt; + + if (!intel_engine_pm_is_awake(engine)) + continue; + + busy += ktime_to_ns(ktime_sub(dt, last)); + count++; + } + + if (count > 1) + busy = div_u64(busy, count); + if (busy <= max_busy[ARRAY_SIZE(max_busy) - 1]) + continue; + + for (j = 0; j < ARRAY_SIZE(max_busy); j++) { + if (busy > max_busy[j]) + swap(busy, max_busy[j]); } } + last = rps->pm_timestamp; rps->pm_timestamp = timestamp;
Re: [PATCH 1/3] drm/i915/guc/slpc: Define and initialize boost frequency
On 11/1/2021 1:26 PM, Dixit, Ashutosh wrote: On Sun, 31 Oct 2021 21:39:35 -0700, Belgaumkar, Vinay wrote: Define helpers and struct members required to record boost info. Boost frequency is initialized to RP0 at SLPC init. Also define num_waiters which can track the pending boost requests. Boost will be done by scheduling a worker thread. This will allow us to make H2G calls inside an interrupt context. Initialize the "to not make H2G calls from interrupt context" is probably better. +static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret = 0; + + lockdep_assert_held(>lock); + + /** nit: this I believe should just be /* ok. /** I believe shows up in kerneldoc so shouldn't be used unless we want something in kerneldoc. +* This function is a little different as compared to +* intel_guc_slpc_set_min_freq(). Softlimit will not be updated +* here since this is used to temporarily change min freq, +* for example, during a waitboost. Caller is responsible for +* checking bounds. +*/ + + with_intel_runtime_pm(>runtime_pm, wakeref) { + ret = slpc_set_param(slpc, +SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, +freq); + if (ret) + drm_err(>drm, "Unable to force min freq to %u: %d", Probably drm_err_ratelimited since it's called at run time not only at init? Not sure if drm_err_once suffizes, probably not. Keeping it drm_err as discussed offline. + freq, ret); + } + + return ret; +} + +static void slpc_boost_work(struct work_struct *work) +{ + struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work); + + /* Raise min freq to boost. It's possible that +* this is greater than current max. But it will +* certainly be limited by RP0. An error setting +* the min param is not fatal. +*/ nit: do we follow the following format for multi-line comments, Documentation/process/coding-style.rst mentions this: /* * Line 1 * Line 2 */ Ok. Thanks, Vinay.
Re: [PATCH 2/3] drm/i915/guc/slpc: Add waitboost functionality for SLPC
On 11/1/2021 1:28 PM, Dixit, Ashutosh wrote: On Sun, 31 Oct 2021 21:39:36 -0700, Belgaumkar, Vinay wrote: @@ -945,6 +960,17 @@ void intel_rps_boost(struct i915_request *rq) if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, >fence.flags)) { struct intel_rps *rps = _ONCE(rq->engine)->gt->rps; + if (rps_uses_slpc(rps)) { + slpc = rps_to_slpc(rps); + + /* Return if old value is non zero */ + if (atomic_fetch_inc(>num_waiters)) + return; + + if (intel_rps_get_requested_frequency(rps) < slpc->boost_freq) I think this check is not needed because: a. The waitboost code only changes min_freq. i915 code should not depend on how GuC changes requested_freq in response to change in min_freq. b. What is more worrisome is that when we "de-boost" we set min_freq to min_freq_softlimit. If GuC e.g. has a delay in bringing requested_freq down and intel_rps_boost() gets called meanwhile we will miss the one opportunity we have to boost the freq (when num_waiters goes from 0 to 1. Asking GuC to boost when actual_freq is already boost_freq is harmless in comparison). So to avoid this risk of missing the chance to boost I think we should delete this check and replace the code above with something like: if (rps_uses_slpc(rps)) { struct intel_guc_slpc *slpc = rps_to_slpc(rps); if (slpc->boost_freq <= slpc->min_freq_softlimit) return; if (!atomic_fetch_inc(>num_waiters)) schedule_work(>boost_work); return; } Note that this check: if (slpc->boost_freq <= slpc->min_freq_softlimit) return; (which is basically a degenerate case in which we don't have to do anything), can be probably be implemented when boost_freq is set in sysfs, or may already be encompassed in "val < slpc->min_freq" in intel_guc_slpc_set_boost_freq() in which case this check can also be skipped from this function. We already have that check in set_boost_freq function. So, just adding the atomic_fetch_inc check. +void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc) +{ + /* Return min back to the softlimit. +* This is called during request retire, +* so we don't need to fail that if the +* set_param fails. +*/ nit: maybe follow kernel multi-line comment format. Ok. Thanks, Vinay.
Re: [PATCH v2 0/3] drm/i915/guc/slpc: Implement waitboost for SLPC
On 11/1/2021 1:24 PM, Dixit, Ashutosh wrote: On Sun, 31 Oct 2021 21:39:34 -0700, Belgaumkar, Vinay wrote: Waitboost is a legacy feature implemented in the Host Turbo algorithm. This patch set implements it for the SLPC path. A "boost" happens when user calls gem_wait ioctl on a submission that has not landed on HW yet. Afaiu user doesn't have to call gem_wait, the boost will happen whenever a request waits to be submitted to GuC because of an unmet depedency. This has to be done from i915 because GuC has not yet seen the request. Rest of the cover letter is fine. Ok, thanks, Vinay.
Re: [PATCH] drm/i915/guc/slpc: remove unneeded clflush calls
On 9/14/2021 12:51 PM, Lucas De Marchi wrote: The clflush calls here aren't doing anything since we are not writting something and flushing the cache lines to be visible to GuC. Here the intention seems to be to make sure whatever GuC has written is visible to the CPU before we read them. However a clflush from the CPU side is the wrong instruction to use. From code inspection on the other clflush() calls in i915/gt/uc/ these are the only ones with this behavrior. The others are apparently making sure what we write is visible to GuC. Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 65a3e7fdb2b2..2e996b77df80 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -108,7 +108,6 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc) GEM_BUG_ON(!slpc->vma); - drm_clflush_virt_range(slpc->vaddr, sizeof(u32)); data = slpc->vaddr; return data->header.global_state; @@ -172,8 +171,6 @@ static int slpc_query_task_state(struct intel_guc_slpc *slpc) drm_err(>drm, "Failed to query task state (%pe)\n", ERR_PTR(ret)); - drm_clflush_virt_range(slpc->vaddr, SLPC_PAGE_SIZE_BYTES); - LGTM. Reviewed-by: Vinay Belgaumkar return ret; }
Re: [Intel-gfx] [PATCH 13/14] drm/i915/guc/slpc: Add SLPC selftest
On 7/29/2021 4:40 PM, Matthew Brost wrote: On Wed, Jul 28, 2021 at 02:11:43PM -0700, Vinay Belgaumkar wrote: Tests that exercise the SLPC get/set frequency interfaces. Clamp_max will set max frequency to multiple levels and check that SLPC requests frequency lower than or equal to it. Clamp_min will set min frequency to different levels and check if SLPC requests are higher or equal to those levels. v2: Address review comments (Michal W) v3: Checkpatch() corrections v4: Remove unnecessary header file (Matthew Brost) Signed-off-by: Vinay Belgaumkar Overall looks but need to address the checkpatch warnings, have question / nit below. --- drivers/gpu/drm/i915/gt/intel_rps.c | 1 + drivers/gpu/drm/i915/gt/selftest_slpc.c | 309 ++ .../drm/i915/selftests/i915_live_selftests.h | 1 + 3 files changed, 311 insertions(+) create mode 100644 drivers/gpu/drm/i915/gt/selftest_slpc.c diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 49db8ed9f80d..7a2aa0031cf6 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2330,4 +2330,5 @@ EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_rps.c" +#include "selftest_slpc.c" #endif diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c new file mode 100644 index ..119d012d1e1e --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#define NUM_STEPS 5 +#define H2G_DELAY 5 +#define delay_for_h2g() usleep_range(H2G_DELAY, H2G_DELAY + 1) + +static int slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 freq) +{ + int ret; + + ret = intel_guc_slpc_set_min_freq(slpc, freq); + if (ret) + pr_err("Could not set min frequency to [%u]\n", freq); + else /* Delay to ensure h2g completes */ + delay_for_h2g(); + + return ret; +} + +static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq) +{ + int ret; + + ret = intel_guc_slpc_set_max_freq(slpc, freq); + if (ret) + pr_err("Could not set maximum frequency [%u]\n", + freq); + else /* Delay to ensure h2g completes */ + delay_for_h2g(); + + return ret; +} + +int live_slpc_clamp_min(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_gt *gt = >gt; + struct intel_guc_slpc *slpc = >uc.guc.slpc; + struct intel_rps *rps = >rps; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + u32 slpc_min_freq, slpc_max_freq; + int err = 0; + + if (!intel_uc_uses_guc_slpc(>uc)) + return 0; + + if (igt_spinner_init(, gt)) + return -ENOMEM; + + if (intel_guc_slpc_get_max_freq(slpc, _max_freq)) { + pr_err("Could not get SLPC max freq\n"); + return -EIO; + } + + if (intel_guc_slpc_get_min_freq(slpc, _min_freq)) { + pr_err("Could not get SLPC min freq\n"); + return -EIO; + } + + if (slpc_min_freq == slpc_max_freq) { + pr_err("Min/Max are fused to the same value\n"); + return -EINVAL; + } + + intel_gt_pm_wait_for_idle(gt); + intel_gt_pm_get(gt); + for_each_engine(engine, gt, id) { + struct i915_request *rq; + u32 step, min_freq, req_freq; + u32 act_freq, max_act_freq; + + if (!intel_engine_can_store_dword(engine)) + continue; + + /* Go from min to max in 5 steps */ + step = (slpc_max_freq - slpc_min_freq) / NUM_STEPS; + max_act_freq = slpc_min_freq; + for (min_freq = slpc_min_freq; min_freq < slpc_max_freq; + min_freq += step) { + err = slpc_set_min_freq(slpc, min_freq); + if (err) + break; + + st_engine_heartbeat_disable(engine); + + rq = igt_spinner_create_request(, + engine->kernel_context, + MI_NOOP); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + st_engine_heartbeat_enable(engine); + break; + } + + i915_request_add(rq); + + if (!igt_wait_for_spinner(, rq)) { + pr_err("%s: Spinner did not start\n", + engine->name); + igt_spinner_end(); +
Re: [Intel-gfx] [PATCH 11/14] drm/i915/guc/slpc: Cache platform frequency limits
On 7/29/2021 9:21 AM, Michal Wajdeczko wrote: On 28.07.2021 23:11, Vinay Belgaumkar wrote: Cache rp0, rp1 and rpn platform limits into SLPC structure for range checking while setting min/max frequencies. Also add "soft" limits which keep track of frequency changes made from userland. These are initially set to platform min and max. v2: Address review comments (Michal W) v3: Formatting (Michal W) v4: Add separate function to parse rp values (Michal W) v5: Perform range checking for set min/max (Michal W) Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 115 ++ .../gpu/drm/i915/gt/uc/intel_guc_slpc_types.h | 9 ++ drivers/gpu/drm/i915/i915_reg.h | 3 + 3 files changed, 127 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 742918875593..bfd5fb0751fd 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -94,6 +94,9 @@ static int slpc_shared_data_init(struct intel_guc_slpc *slpc) return err; } + slpc->max_freq_softlimit = 0; + slpc->min_freq_softlimit = 0; shouldn't this be in intel_guc_slpc_init() ? No, we want to maintain softlimits across suspend resume. + return err; } @@ -124,6 +127,18 @@ static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) return ret > 0 ? -EPROTO : ret; } +static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id) +{ + u32 request[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 2), + id, + }; + + return intel_guc_send(guc, request, ARRAY_SIZE(request)); +} + + static bool slpc_is_running(struct intel_guc_slpc *slpc) { return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING; @@ -177,6 +192,16 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value) return ret; } +static int slpc_unset_param(struct intel_guc_slpc *slpc, + u8 id) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + + GEM_BUG_ON(id >= SLPC_MAX_PARAM); + + return guc_action_slpc_unset_param(guc, id); +} + static const char *slpc_global_state_to_string(enum slpc_global_state state) { switch (state) { @@ -307,6 +332,11 @@ int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val) intel_wakeref_t wakeref; int ret; + if ((val < slpc->min_freq) || + (val > slpc->rp0_freq) || + (val < slpc->min_freq_softlimit)) + return -EINVAL; + with_intel_runtime_pm(>runtime_pm, wakeref) { ret = slpc_set_param(slpc, SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ, @@ -317,6 +347,8 @@ int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val) ret = -EIO; } + slpc->max_freq_softlimit = val; + return ret; } @@ -363,6 +395,11 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) intel_wakeref_t wakeref; int ret; + if ((val < slpc->min_freq) || + (val > slpc->rp0_freq) || + (val > slpc->max_freq_softlimit)) + return -EINVAL; + with_intel_runtime_pm(>runtime_pm, wakeref) { ret = slpc_set_param(slpc, SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, @@ -373,6 +410,8 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) ret = -EIO; } + slpc->min_freq_softlimit = val; + return ret; } @@ -418,6 +457,71 @@ void intel_guc_pm_intrmsk_enable(struct intel_gt *gt) GEN6_PMINTRMSK, pm_intrmsk_mbz, 0); } +static int slpc_set_softlimits(struct intel_guc_slpc *slpc) +{ + int ret = 0; + + /* +* Softlimits are initially equivalent to platform limits +* unless they have deviated from defaults, in which case, +* we retain the values and set min/max accordingly. +*/ + if (!slpc->max_freq_softlimit) + slpc->max_freq_softlimit = slpc->rp0_freq; + else if (slpc->max_freq_softlimit != slpc->rp0_freq) + ret = intel_guc_slpc_set_max_freq(slpc, + slpc->max_freq_softlimit); if this fails, shouldn't we reset max_freq_softlimit to platform limit ? otherwise we could be with some potentially bad value forever Well, if this call fails, it's likely the next set_max_freq call will also fail, so not much point. Also, it will likely (?) just retain the old value, which is fine. + + if (!slpc->min_freq_softlimit) + slpc->min_freq_softlimit = slpc->min_freq; + else if (slpc->min_freq_softlimit != slpc->min_freq) +
Re: [PATCH 13/15] drm/i915/guc/slpc: Sysfs hooks for SLPC
On 7/27/2021 9:59 AM, Michal Wajdeczko wrote: On 26.07.2021 21:07, Vinay Belgaumkar wrote: Update the get/set min/max freq hooks to work for SLPC case as well. Consolidate helpers for requested/min/max frequency get/set to intel_rps where the proper action can be taken depending on whether SLPC is enabled. v2: Add wrappers for getting rp0/1/n frequencies, update softlimits in set min/max SLPC functions. Also check for boundary conditions before setting them. v3: Address review comments (Michal W) Acked-by: Michal Wajdeczko Signed-off-by: Vinay Belgaumkar Signed-off-by: Tvrtko Ursulin Signed-off-by: Sujaritha Sundaresan --- drivers/gpu/drm/i915/gt/intel_rps.c | 165 drivers/gpu/drm/i915/gt/intel_rps.h | 11 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 14 ++ drivers/gpu/drm/i915/i915_pmu.c | 2 +- drivers/gpu/drm/i915/i915_reg.h | 2 + drivers/gpu/drm/i915/i915_sysfs.c | 77 ++--- 6 files changed, 207 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index e858eeb2c59d..48d4147165a9 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -37,6 +37,13 @@ static struct intel_uncore *rps_to_uncore(struct intel_rps *rps) return rps_to_gt(rps)->uncore; } +static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + return >uc.guc.slpc; +} + static bool rps_uses_slpc(struct intel_rps *rps) { struct intel_gt *gt = rps_to_gt(rps); @@ -1960,6 +1967,164 @@ u32 intel_rps_read_actual_frequency(struct intel_rps *rps) return freq; } +u32 intel_rps_read_punit_req(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + + return intel_uncore_read(uncore, GEN6_RPNSWREQ); +} + +u32 intel_rps_get_req(struct intel_rps *rps, u32 pureq) hmm, "rps" looks to be not needed here btw, shouldn't this function be static ? sure. +{ + u32 req = pureq >> GEN9_SW_REQ_UNSLICE_RATIO_SHIFT; + + return req; +} + +u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps) +{ + u32 freq = intel_rps_get_req(rps, intel_rps_read_punit_req(rps)); + + return intel_gpu_freq(rps, freq); +} + +u32 intel_rps_get_requested_frequency(struct intel_rps *rps) +{ + if (rps_uses_slpc(rps)) + return intel_rps_read_punit_req_frequency(rps); + else + return intel_gpu_freq(rps, rps->cur_freq); +} + +u32 intel_rps_get_max_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->max_freq_softlimit; + else + return intel_gpu_freq(rps, rps->max_freq_softlimit); +} + +u32 intel_rps_get_rp0_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->rp0_freq; + else + return intel_gpu_freq(rps, rps->rp0_freq); +} + +u32 intel_rps_get_rp1_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->rp1_freq; + else + return intel_gpu_freq(rps, rps->rp1_freq); +} + +u32 intel_rps_get_rpn_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->min_freq; + else + return intel_gpu_freq(rps, rps->min_freq); +} + +int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + int ret = 0; + + if (rps_uses_slpc(rps)) + return intel_guc_slpc_set_max_freq(slpc, val); few above functions are implemented as nice dispatcher if (rps_uses_slpc(rps)) return ... slpc stuff; else return ... gpu stuff; can we have something similar here ? likely just putting below code into helper will do the trick ok. + + mutex_lock(>lock); + + val = intel_freq_opcode(rps, val); + if (val < rps->min_freq || + val > rps->max_freq || + val < rps->min_freq_softlimit) { + ret = -EINVAL; + goto unlock; + } + + if (val > rps->rp0_freq) + drm_dbg(>drm, "User requested overclocking to %d\n", + intel_gpu_freq(rps, val)); + + rps->max_freq_softlimit = val; + + val = clamp_t(int, rps->cur_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); + + /* +* We still need *_set_rps to process the new max_delay and +* update the interrupt limits and
Re: [Intel-gfx] [PATCH 08/15] drm/i915/guc/slpc: Add methods to set min/max frequency
On 7/27/2021 8:24 AM, Michal Wajdeczko wrote: On 26.07.2021 21:07, Vinay Belgaumkar wrote: Add param set h2g helpers to set the min and max frequencies s/h2g/H2G for use by SLPC. v2: Address review comments (Michal W) v3: Check for positive error code (Michal W) Signed-off-by: Sundaresan Sujaritha Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 89 - drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 2 + 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index f5808d2acbca..63656640189c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -109,6 +109,21 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc) return data->header.global_state; } +static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) +{ + u32 request[] = { + INTEL_GUC_ACTION_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), + id, + value, + }; + int ret; + + ret = intel_guc_send(guc, request, ARRAY_SIZE(request)); + + return ret > 0 ? -EPROTO : ret; +} + static bool slpc_is_running(struct intel_guc_slpc *slpc) { return (slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING); @@ -118,7 +133,7 @@ static int guc_action_slpc_query(struct intel_guc *guc, u32 offset) { u32 request[] = { INTEL_GUC_ACTION_SLPC_REQUEST, - SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2), + SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2), this should be fixed in original patch offset, 0, }; @@ -146,6 +161,15 @@ static int slpc_query_task_state(struct intel_guc_slpc *slpc) return ret; } +static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + + GEM_BUG_ON(id >= SLPC_MAX_PARAM); + + return guc_action_slpc_set_param(guc, id, value); +} + static const char *slpc_global_state_to_string(enum slpc_global_state state) { const char *str = NULL; @@ -251,6 +275,69 @@ static u32 slpc_decode_max_freq(struct intel_guc_slpc *slpc) GT_FREQUENCY_MULTIPLIER, GEN9_FREQ_SCALER); } +/** + * intel_guc_slpc_set_max_freq() - Set max frequency limit for SLPC. + * @slpc: pointer to intel_guc_slpc. + * @val: frequency (MHz) + * + * This function will invoke GuC SLPC action to update the max frequency + * limit for unslice. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret; + + with_intel_runtime_pm(>runtime_pm, wakeref) { + ret = slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ, + val); + if (ret) { + drm_err(>drm, + "Set max frequency unslice returned (%pe)\n", ERR_PTR(ret)); maybe generic error reporting could be moved to slpc_set_param() ? + /* Return standardized err code for sysfs */ + ret = -EIO; at this point we don't know if this function is for sysfs only I would sanitize error in "store" hook if really needed ssize_t slpc_max_freq_store(... const char *buf, size_t count) { ... err = intel_guc_slpc_set_max_freq(slpc, val); return err ? -EIO : count; that's the problem, sysfs wrapper will need to check for -EIO and -EINVAL, we want the ability to return either. Thanks, Vinay. } + } + } + + return ret; +} + +/** + * intel_guc_slpc_set_min_freq() - Set min frequency limit for SLPC. + * @slpc: pointer to intel_guc_slpc. + * @val: frequency (MHz) + * + * This function will invoke GuC SLPC action to update the min unslice + * frequency. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) +{ + int ret; + struct intel_guc *guc = slpc_to_guc(slpc); + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + intel_wakeref_t wakeref; + + with_intel_runtime_pm(>runtime_pm, wakeref) { + ret = slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + val); + if (ret) { + drm_err(>drm, + "Set min frequency for unslice returned (%pe)\n", ERR_PTR(ret)); + /* Return standardized err code for sysfs */ + ret = -EIO; + } + } same here
Re: [PATCH 12/15] drm/i915/guc/slpc: Cache platform frequency limits
On 7/27/2021 9:00 AM, Michal Wajdeczko wrote: On 26.07.2021 21:07, Vinay Belgaumkar wrote: Cache rp0, rp1 and rpn platform limits into SLPC structure for range checking while setting min/max frequencies. Also add "soft" limits which keep track of frequency changes made from userland. These are initially set to platform min and max. v2: Address review comments (Michal W) v3: Formatting (Michal W) Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 97 +++ .../gpu/drm/i915/gt/uc/intel_guc_slpc_types.h | 11 +++ drivers/gpu/drm/i915/i915_reg.h | 3 + 3 files changed, 111 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index c79dba60b2e6..a98cbf274862 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -94,6 +94,9 @@ static int slpc_shared_data_init(struct intel_guc_slpc *slpc) return err; } + slpc->max_freq_softlimit = 0; + slpc->min_freq_softlimit = 0; + return err; } @@ -124,6 +127,18 @@ static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) return ret > 0 ? -EPROTO : ret; } +static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id) +{ + u32 request[] = { + INTEL_GUC_ACTION_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 2), + id, + }; + + return intel_guc_send(guc, request, ARRAY_SIZE(request)); +} + + static bool slpc_is_running(struct intel_guc_slpc *slpc) { return (slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING); @@ -170,6 +185,16 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value) return guc_action_slpc_set_param(guc, id, value); } +static int slpc_unset_param(struct intel_guc_slpc *slpc, + u8 id) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + + GEM_BUG_ON(id >= SLPC_MAX_PARAM); + + return guc_action_slpc_unset_param(guc, id); +} + static const char *slpc_global_state_to_string(enum slpc_global_state state) { const char *str = NULL; @@ -406,6 +431,55 @@ void intel_guc_pm_intrmsk_enable(struct intel_gt *gt) GEN6_PMINTRMSK, pm_intrmsk_mbz, 0); } +static int intel_guc_slpc_set_softlimits(struct intel_guc_slpc *slpc) nit: "intel_" prefix not needed for static function ok. +{ + int ret = 0; + + /* Softlimits are initially equivalent to platform limits +* unless they have deviated from defaults, in which case, +* we retain the values and set min/max accordingly. +*/ fix style for multi-line comment done. + if (!slpc->max_freq_softlimit) + slpc->max_freq_softlimit = slpc->rp0_freq; + else if (slpc->max_freq_softlimit != slpc->rp0_freq) + ret = intel_guc_slpc_set_max_freq(slpc, + slpc->max_freq_softlimit); + + if (!slpc->min_freq_softlimit) + slpc->min_freq_softlimit = slpc->min_freq; + else if (slpc->min_freq_softlimit != slpc->min_freq) + ret = intel_guc_slpc_set_min_freq(slpc, + slpc->min_freq_softlimit); + + return ret; +} + +static void intel_guc_slpc_ignore_eff_freq(struct intel_guc_slpc *slpc, bool ignore) +{ + if (ignore) { + /* A failure here does not affect the algorithm in a fatal way */ is this comment just for "ignore" case or whole function ? (as you don't check for errors in "else" case anyway) moved it above. + slpc_set_param(slpc, + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, + ignore); + slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + slpc->min_freq); + } else { + slpc_unset_param(slpc, + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY); + slpc_unset_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ); + } +} + +static void intel_guc_slpc_use_fused_rp0(struct intel_guc_slpc *slpc) +{ + /* Force slpc to used platform rp0 */ s/slpc/SLPC ok. + slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ, + slpc->rp0_freq); hmm, likely indent is wrong, did you run checkpatch.pl ? Fixed. +} + /* * intel_guc_slpc_enable() - Start SLPC * @slpc: pointer to intel_guc_slpc. @@ -423,6 +497,7 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) { struct drm_i915_private *i915 = slpc_to_i915(slpc); struct slpc_shared_data *data; + u32 rp_state_cap; int ret; GEM_BUG_ON(!slpc->vma); @@ -460,6 +535,28 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
Re: [PATCH 07/15] drm/i915/guc/slpc: Remove BUG_ON in guc_submission_disable
On 7/27/2021 5:20 PM, Matthew Brost wrote: On Mon, Jul 26, 2021 at 12:07:52PM -0700, Vinay Belgaumkar wrote: The assumption when it was added was there would be no wakerefs held. However, if we fail to enable SLPC, we will still be holding a wakeref. So this is if intel_guc_slpc_enable() fails, right? Not seeing where the wakeref is taken. It also seems wrong not to drop the wakeref before calling intel_guc_submission_disable, hence the GEM_BUG_ON in this function. Can you explain this bit more? I should change the desc a little. The BUG_ON assumed GT would not be awake i.e at shutdown, and there would be 0 GT_PM references. However, this slpc_enable is in gt_resume path (gt_init_hw calls uc_init_hw). Here, gt_pm_get reference is held, so it will result in BUG_ON when submission_disable is called. Thanks, Vinay. Matt Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index b6338742a594..48cbd800ca54 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2523,10 +2523,6 @@ void intel_guc_submission_enable(struct intel_guc *guc) void intel_guc_submission_disable(struct intel_guc *guc) { - struct intel_gt *gt = guc_to_gt(guc); - - GEM_BUG_ON(gt->awake); /* GT should be parked first */ - /* Note: By the time we're here, GuC may have already been reset */ } -- 2.25.0
Re: [PATCH 11/15] drm/i915/guc/slpc: Enable ARAT timer interrupt
On 7/27/2021 8:40 AM, Matthew Brost wrote: On Mon, Jul 26, 2021 at 12:07:56PM -0700, Vinay Belgaumkar wrote: This interrupt is enabled during RPS initialization, and now needs to be done by SLPC code. It allows ARAT timer expiry interrupts to get forwarded to GuC. Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 16 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 2 ++ drivers/gpu/drm/i915/gt/uc/intel_uc.c | 8 3 files changed, 26 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 995d3d4807a3..c79dba60b2e6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -392,6 +392,20 @@ int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val) return ret; } +void intel_guc_pm_intrmsk_enable(struct intel_gt *gt) +{ + u32 pm_intrmsk_mbz = 0; + + /* Allow GuC to receive ARAT timer expiry event. I've been berated for using comments like this this by other engineers. I personally don't care at all (nor does checkpatch) but if you want to avoid the wrath of others I'd change this to what I have below: /* * Allow GuC to receive ARAT timer expiry event. * This interrupt register is setup by RPS code * when host based Turbo is enabled. */ Same goes for comment below of same style. Either way, patch looks good to me. With that: Reviewed-by: Matthew Brost Fixed. Thanks, Vinay. +* This interrupt register is setup by RPS code +* when host based Turbo is enabled. +*/ + pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; + + intel_uncore_rmw(gt->uncore, + GEN6_PMINTRMSK, pm_intrmsk_mbz, 0); +} + /* * intel_guc_slpc_enable() - Start SLPC * @slpc: pointer to intel_guc_slpc. @@ -439,6 +453,8 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) slpc_query_task_state(slpc); + intel_guc_pm_intrmsk_enable(>gt); + /* min and max frequency limits being used by SLPC */ drm_info(>drm, "SLPC min freq: %u Mhz, max is %u Mhz\n", slpc_decode_min_freq(slpc), diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index d133c8020c16..f128143cc1d8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -9,6 +9,7 @@ #include "intel_guc_submission.h" #include "intel_guc_slpc_types.h" +struct intel_gt; struct drm_printer; static inline bool intel_guc_slpc_is_supported(struct intel_guc *guc) @@ -35,5 +36,6 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val); int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val); int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val); int intel_guc_slpc_info(struct intel_guc_slpc *slpc, struct drm_printer *p); +void intel_guc_pm_intrmsk_enable(struct intel_gt *gt); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index b98c14f8c229..9238bc076605 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -652,6 +652,7 @@ void intel_uc_suspend(struct intel_uc *uc) static int __uc_resume(struct intel_uc *uc, bool enable_communication) { struct intel_guc *guc = >guc; + struct intel_gt *gt = guc_to_gt(guc); int err; if (!intel_guc_is_fw_running(guc)) @@ -663,6 +664,13 @@ static int __uc_resume(struct intel_uc *uc, bool enable_communication) if (enable_communication) guc_enable_communication(guc); + /* If we are only resuming GuC communication but not reloading +* GuC, we need to ensure the ARAT timer interrupt is enabled +* again. In case of GuC reload, it is enabled during SLPC enable. +*/ + if (enable_communication && intel_uc_uses_guc_slpc(uc)) + intel_guc_pm_intrmsk_enable(gt); + err = intel_guc_resume(guc); if (err) { DRM_DEBUG_DRIVER("Failed to resume GuC, err=%d", err); -- 2.25.0
Re: [PATCH 10/15] drm/i915/guc/slpc: Add debugfs for SLPC info
On 7/27/2021 8:37 AM, Michal Wajdeczko wrote: On 26.07.2021 21:07, Vinay Belgaumkar wrote: This prints out relevant SLPC info from the SLPC shared structure. We will send a h2g message which forces SLPC to update the s/h2g/H2G ok. shared data structure with latest information before reading it. v2: Address review comments (Michal W) v3: Remove unnecessary tasks from slpc_info (Michal W) Signed-off-by: Vinay Belgaumkar Signed-off-by: Sundaresan Sujaritha --- .../gpu/drm/i915/gt/uc/intel_guc_debugfs.c| 22 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 29 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 4 ++- 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c index 72ddfff42f7d..3244e54b1337 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c @@ -12,6 +12,7 @@ #include "gt/uc/intel_guc_ct.h" #include "gt/uc/intel_guc_ads.h" #include "gt/uc/intel_guc_submission.h" +#include "gt/uc/intel_guc_slpc.h" static int guc_info_show(struct seq_file *m, void *data) { @@ -50,11 +51,32 @@ static int guc_registered_contexts_show(struct seq_file *m, void *data) } DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_registered_contexts); +static int guc_slpc_info_show(struct seq_file *m, void *unused) +{ + struct intel_guc *guc = m->private; + struct intel_guc_slpc *slpc = >slpc; + struct drm_printer p = drm_seq_file_printer(m); + + if (!intel_guc_slpc_is_used(guc)) + return -ENODEV; + + return intel_guc_slpc_info(slpc, ); +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_slpc_info); + +static bool intel_eval_slpc_support(void *data) +{ + struct intel_guc *guc = (struct intel_guc *)data; + + return intel_guc_slpc_is_used(guc); +} + void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root) { static const struct debugfs_gt_file files[] = { { "guc_info", _info_fops, NULL }, { "guc_registered_contexts", _registered_contexts_fops, NULL }, + { "guc_slpc_info", _slpc_info_fops, _eval_slpc_support}, }; if (!intel_guc_is_supported(guc)) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index c653bba3b5eb..995d3d4807a3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -448,6 +448,35 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) return 0; } +int intel_guc_slpc_info(struct intel_guc_slpc *slpc, struct drm_printer *p) nit: intel_guc_slpc_print_info ? ok. +{ + struct drm_i915_private *i915 = guc_to_gt(slpc_to_guc(slpc))->i915; use slpc_to_i915() ok. + struct slpc_shared_data *data = slpc->vaddr; + struct slpc_task_state_data *slpc_tasks; + intel_wakeref_t wakeref; + int ret = 0; + + GEM_BUG_ON(!slpc->vma); + + with_intel_runtime_pm(>runtime_pm, wakeref) { + ret = slpc_query_task_state(slpc); + + if (!ret) { + slpc_tasks = >task_state_data; + + drm_printf(p, "\tSLPC state: %s\n", slpc_get_state_string(slpc)); + drm_printf(p, "\tGTPERF task active: %s\n", + yesno(slpc_tasks->status & SLPC_GTPERF_TASK_ENABLED)); + drm_printf(p, "\tMax freq: %u MHz\n", + slpc_decode_max_freq(slpc)); + drm_printf(p, "\tMin freq: %u MHz\n", + slpc_decode_min_freq(slpc)); + } + } + + return ret; +} + void intel_guc_slpc_fini(struct intel_guc_slpc *slpc) { if (!slpc->vma) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index 92d7afd44f07..d133c8020c16 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -9,6 +9,8 @@ #include "intel_guc_submission.h" #include "intel_guc_slpc_types.h" +struct drm_printer; + static inline bool intel_guc_slpc_is_supported(struct intel_guc *guc) { return guc->slpc_supported; @@ -25,7 +27,6 @@ static inline bool intel_guc_slpc_is_used(struct intel_guc *guc) } void intel_guc_slpc_init_early(struct intel_guc_slpc *slpc); - this should be fixed in earlier patch with all that fixed, Done. Thanks, Vinay. Reviewed-by: Michal Wajdeczko int intel_guc_slpc_init(struct intel_guc_slpc *slpc); int intel_guc_slpc_enable(struct intel_guc_slpc *slpc); void intel_guc_slpc_fini(struct intel_guc_slpc *slpc); @@ -33,5 +34,6 @@ int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val); int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc,
Re: [Intel-gfx] [PATCH 09/15] drm/i915/guc/slpc: Add get max/min freq hooks
On 7/27/2021 8:32 AM, Michal Wajdeczko wrote: On 26.07.2021 21:07, Vinay Belgaumkar wrote: Add helpers to read the min/max frequency being used by SLPC. This is done by send a H2G command which forces SLPC to update the shared data struct which can then be read. These helpers will be used in a sysfs patch later on. v2: Address review comments (Michal W) v3: Return err in case of query failure (Michal W) Signed-off-by: Vinay Belgaumkar Signed-off-by: Sundaresan Sujaritha --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 54 + drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 2 + 2 files changed, 56 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 63656640189c..c653bba3b5eb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -306,6 +306,33 @@ int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val) return ret; } +/** + * intel_guc_slpc_get_max_freq() - Get max frequency limit for SLPC. + * @slpc: pointer to intel_guc_slpc. + * @val: pointer to val which will hold max frequency (MHz) + * + * This function will invoke GuC SLPC action to read the max frequency + * limit for unslice. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret = 0; + + with_intel_runtime_pm(>runtime_pm, wakeref) { + /* Force GuC to update task data */ + ret = slpc_query_task_state(slpc); + + if (!ret) + *val = slpc_decode_max_freq(slpc); + } + + return ret; +} + /** * intel_guc_slpc_set_min_freq() - Set min frequency limit for SLPC. * @slpc: pointer to intel_guc_slpc. @@ -338,6 +365,33 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) return ret; } +/** + * intel_guc_slpc_get_min_freq() - Get min frequency limit for SLPC. + * @slpc: pointer to intel_guc_slpc. + * @val: pointer to val which will hold min frequency (MHz) + * + * This function will invoke GuC SLPC action to read the min frequency + * limit for unslice. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val) +{ + intel_wakeref_t wakeref; + struct drm_i915_private *i915 = guc_to_gt(slpc_to_guc(slpc))->i915; use slpc_to_i915() and in this order: struct drm_i915_private *i915 = slpc_to_i915(slpc); intel_wakeref_t wakeref; int ret = 0; with that fixed, Reviewed-by: Michal Wajdeczko done. Thanks, vinay. + int ret = 0; + + with_intel_runtime_pm(>runtime_pm, wakeref) { + /* Force GuC to update task data */ + ret = slpc_query_task_state(slpc); + + if (!ret) + *val = slpc_decode_min_freq(slpc); + } + + return ret; +} + /* * intel_guc_slpc_enable() - Start SLPC * @slpc: pointer to intel_guc_slpc. diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index e594510497ec..92d7afd44f07 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -31,5 +31,7 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc); void intel_guc_slpc_fini(struct intel_guc_slpc *slpc); int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val); int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val); +int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val); +int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val); #endif
Re: [PATCH 03/15] drm/i915/guc/slpc: Gate Host RPS when SLPC is enabled
On 7/27/2021 3:44 PM, Matthew Brost wrote: On Mon, Jul 26, 2021 at 12:07:48PM -0700, Vinay Belgaumkar wrote: Also ensure uc_init is called before we initialize RPS so that we can check for SLPC support. We do not need to enable up/down interrupts when SLPC is enabled. However, we still need the ARAT interrupt, which will be enabled separately later. Do we not need a check for rps_uses_slpc in intel_rps_enable? I guessing there is a reason why we don't but can't seem to figure that out. Yeah, it's due to this check in there - if (rps->max_freq <= rps->min_freq) /* leave disabled, no room for dynamic reclocking */; With slpc, rps->max_freq and rps->min freq remain uninitialized, so the if condition just falls through and returns with this- if (!enabled) return; Thanks, Vinay. Matt Signed-off-by: Vinay Belgaumkar Signed-off-by: Sundaresan Sujaritha --- drivers/gpu/drm/i915/gt/intel_gt.c | 2 +- drivers/gpu/drm/i915/gt/intel_rps.c | 20 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index a64aa43f7cd9..04dd69bcf6cb 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -41,8 +41,8 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) intel_gt_init_timelines(gt); intel_gt_pm_init_early(gt); - intel_rps_init_early(>rps); intel_uc_init_early(>uc); + intel_rps_init_early(>rps); } int intel_gt_probe_lmem(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 0c8e7f2b06f0..e858eeb2c59d 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -37,6 +37,13 @@ static struct intel_uncore *rps_to_uncore(struct intel_rps *rps) return rps_to_gt(rps)->uncore; } +static bool rps_uses_slpc(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + return intel_uc_uses_guc_slpc(>uc); +} + static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask) { return mask & ~rps->pm_intrmsk_mbz; @@ -167,6 +174,8 @@ static void rps_enable_interrupts(struct intel_rps *rps) { struct intel_gt *gt = rps_to_gt(rps); + GEM_BUG_ON(rps_uses_slpc(rps)); + GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n", rps->pm_events, rps_pm_mask(rps, rps->last_freq)); @@ -771,6 +780,8 @@ static int gen6_rps_set(struct intel_rps *rps, u8 val) struct drm_i915_private *i915 = rps_to_i915(rps); u32 swreq; + GEM_BUG_ON(rps_uses_slpc(rps)); + if (GRAPHICS_VER(i915) >= 9) swreq = GEN9_FREQUENCY(val); else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) @@ -861,6 +872,9 @@ void intel_rps_park(struct intel_rps *rps) { int adj; + if (!intel_rps_is_enabled(rps)) + return; + GEM_BUG_ON(atomic_read(>num_waiters)); if (!intel_rps_clear_active(rps)) @@ -1829,6 +1843,9 @@ void intel_rps_init(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); + if (rps_uses_slpc(rps)) + return; + if (IS_CHERRYVIEW(i915)) chv_rps_init(rps); else if (IS_VALLEYVIEW(i915)) @@ -1885,6 +1902,9 @@ void intel_rps_init(struct intel_rps *rps) void intel_rps_sanitize(struct intel_rps *rps) { + if (rps_uses_slpc(rps)) + return; + if (GRAPHICS_VER(rps_to_i915(rps)) >= 6) rps_disable_interrupts(rps); } -- 2.25.0
Re: [Intel-gfx] [PATCH 08/15] drm/i915/guc/slpc: Add methods to set min/max frequency
On 7/27/2021 8:24 AM, Michal Wajdeczko wrote: On 26.07.2021 21:07, Vinay Belgaumkar wrote: Add param set h2g helpers to set the min and max frequencies s/h2g/H2G for use by SLPC. v2: Address review comments (Michal W) v3: Check for positive error code (Michal W) Signed-off-by: Sundaresan Sujaritha Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 89 - drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 2 + 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index f5808d2acbca..63656640189c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -109,6 +109,21 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc) return data->header.global_state; } +static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) +{ + u32 request[] = { + INTEL_GUC_ACTION_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), + id, + value, + }; + int ret; + + ret = intel_guc_send(guc, request, ARRAY_SIZE(request)); + + return ret > 0 ? -EPROTO : ret; +} + static bool slpc_is_running(struct intel_guc_slpc *slpc) { return (slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING); @@ -118,7 +133,7 @@ static int guc_action_slpc_query(struct intel_guc *guc, u32 offset) { u32 request[] = { INTEL_GUC_ACTION_SLPC_REQUEST, - SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2), + SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2), this should be fixed in original patch ok. offset, 0, }; @@ -146,6 +161,15 @@ static int slpc_query_task_state(struct intel_guc_slpc *slpc) return ret; } +static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + + GEM_BUG_ON(id >= SLPC_MAX_PARAM); + + return guc_action_slpc_set_param(guc, id, value); +} + static const char *slpc_global_state_to_string(enum slpc_global_state state) { const char *str = NULL; @@ -251,6 +275,69 @@ static u32 slpc_decode_max_freq(struct intel_guc_slpc *slpc) GT_FREQUENCY_MULTIPLIER, GEN9_FREQ_SCALER); } +/** + * intel_guc_slpc_set_max_freq() - Set max frequency limit for SLPC. + * @slpc: pointer to intel_guc_slpc. + * @val: frequency (MHz) + * + * This function will invoke GuC SLPC action to update the max frequency + * limit for unslice. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret; + + with_intel_runtime_pm(>runtime_pm, wakeref) { + ret = slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ, + val); + if (ret) { + drm_err(>drm, + "Set max frequency unslice returned (%pe)\n", ERR_PTR(ret)); maybe generic error reporting could be moved to slpc_set_param() ? + /* Return standardized err code for sysfs */ + ret = -EIO; at this point we don't know if this function is for sysfs only I would sanitize error in "store" hook if really needed ok. ssize_t slpc_max_freq_store(... const char *buf, size_t count) { ... err = intel_guc_slpc_set_max_freq(slpc, val); return err ? -EIO : count; } + } + } + + return ret; +} + +/** + * intel_guc_slpc_set_min_freq() - Set min frequency limit for SLPC. + * @slpc: pointer to intel_guc_slpc. + * @val: frequency (MHz) + * + * This function will invoke GuC SLPC action to update the min unslice + * frequency. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) +{ + int ret; + struct intel_guc *guc = slpc_to_guc(slpc); + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + intel_wakeref_t wakeref; + + with_intel_runtime_pm(>runtime_pm, wakeref) { + ret = slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + val); + if (ret) { + drm_err(>drm, + "Set min frequency for unslice returned (%pe)\n", ERR_PTR(ret)); + /* Return standardized err code for sysfs */ + ret = -EIO; + } + } same here Fixed. Thanks, Vinay. Michal + + return ret; +} + /* * intel_guc_slpc_enable() - Start SLPC * @slpc:
Re: [PATCH 14/15] drm/i915/guc/slpc: Add SLPC selftest
On 7/27/2021 12:16 PM, Matthew Brost wrote: On Mon, Jul 26, 2021 at 12:07:59PM -0700, Vinay Belgaumkar wrote: Tests that exercise the SLPC get/set frequency interfaces. Clamp_max will set max frequency to multiple levels and check that SLPC requests frequency lower than or equal to it. Clamp_min will set min frequency to different levels and check if SLPC requests are higher or equal to those levels. v2: Address review comments (Michal W) v3: Checkpatch() corrections Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/intel_rps.c | 1 + drivers/gpu/drm/i915/gt/selftest_slpc.c | 311 ++ drivers/gpu/drm/i915/gt/selftest_slpc.h | 12 + .../drm/i915/selftests/i915_live_selftests.h | 1 + 4 files changed, 325 insertions(+) create mode 100644 drivers/gpu/drm/i915/gt/selftest_slpc.c create mode 100644 drivers/gpu/drm/i915/gt/selftest_slpc.h diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 48d4147165a9..6237332835fe 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2318,4 +2318,5 @@ EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_rps.c" +#include "selftest_slpc.c" #endif diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c new file mode 100644 index ..5018f686686f --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -0,0 +1,311 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "selftest_slpc.h" + +#define NUM_STEPS 5 +#define H2G_DELAY 5 +#define delay_for_h2g() usleep_range(H2G_DELAY, H2G_DELAY + 1) + +static int set_min_freq(struct intel_guc_slpc *slpc, u32 freq) +{ + int ret; + + ret = intel_guc_slpc_set_min_freq(slpc, freq); + if (ret) + pr_err("Could not set min frequency to [%u]\n", freq); + else /* Delay to ensure h2g completes */ + delay_for_h2g(); + + return ret; +} + +static int set_max_freq(struct intel_guc_slpc *slpc, u32 freq) +{ + int ret; + + ret = intel_guc_slpc_set_max_freq(slpc, freq); + if (ret) + pr_err("Could not set maximum frequency [%u]\n", + freq); + else /* Delay to ensure h2g completes */ + delay_for_h2g(); + + return ret; +} + +int live_slpc_clamp_min(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_gt *gt = >gt; + struct intel_guc_slpc *slpc = >uc.guc.slpc; + struct intel_rps *rps = >rps; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + u32 slpc_min_freq, slpc_max_freq; + int err = 0; + + if (!intel_uc_uses_guc_slpc(>uc)) + return 0; + + if (igt_spinner_init(, gt)) + return -ENOMEM; + + if (intel_guc_slpc_get_max_freq(slpc, _max_freq)) { + pr_err("Could not get SLPC max freq\n"); + return -EIO; + } + + if (intel_guc_slpc_get_min_freq(slpc, _min_freq)) { + pr_err("Could not get SLPC min freq\n"); + return -EIO; + } + + if (slpc_min_freq == slpc_max_freq) { + pr_err("Min/Max are fused to the same value\n"); + return -EINVAL; + } + + intel_gt_pm_wait_for_idle(gt); + intel_gt_pm_get(gt); + for_each_engine(engine, gt, id) { + struct i915_request *rq; + u32 step, min_freq, req_freq; + u32 act_freq, max_act_freq; + + if (!intel_engine_can_store_dword(engine)) + continue; + + /* Go from min to max in 5 steps */ + step = (slpc_max_freq - slpc_min_freq) / NUM_STEPS; + max_act_freq = slpc_min_freq; + for (min_freq = slpc_min_freq; min_freq < slpc_max_freq; + min_freq += step) { + err = set_min_freq(slpc, min_freq); + if (err) + break; + + st_engine_heartbeat_disable(engine); + + rq = igt_spinner_create_request(, + engine->kernel_context, + MI_NOOP); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + st_engine_heartbeat_enable(engine); + break; + } + + i915_request_add(rq); + + if (!igt_wait_for_spinner(, rq)) { + pr_err("%s: Spinner did not start\n", + engine->name); + igt_spinner_end(); +
Re: [Intel-gfx] [PATCH 06/15] drm/i915/guc/slpc: Enable SLPC and add related H2G events
On 7/27/2021 1:19 PM, Michal Wajdeczko wrote: On 27.07.2021 22:00, Belgaumkar, Vinay wrote: On 7/27/2021 8:12 AM, Michal Wajdeczko wrote: On 26.07.2021 21:07, Vinay Belgaumkar wrote: Add methods for interacting with GuC for enabling SLPC. Enable SLPC after GuC submission has been established. GuC load will fail if SLPC cannot be successfully initialized. Add various helper methods to set/unset the parameters for SLPC. They can be set using H2G calls or directly setting bits in the shared data structure. v2: Address several review comments, add new helpers for decoding the SLPC min/max frequencies. Use masks instead of hardcoded constants. (Michal W) v3: Split global_state_to_string function, and check for positive non-zero return value from intel_guc_send() (Michal W) Signed-off-by: Vinay Belgaumkar Signed-off-by: Sundaresan Sujaritha --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 237 ++ .../gpu/drm/i915/gt/uc/intel_guc_slpc_types.h | 2 + drivers/gpu/drm/i915/gt/uc/intel_uc.c | 8 + 3 files changed, 247 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index bae4e33db0f8..f5808d2acbca 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -45,6 +45,40 @@ void intel_guc_slpc_init_early(struct intel_guc_slpc *slpc) guc->slpc_selected = __guc_slpc_selected(guc); } +static void slpc_mem_set_param(struct slpc_shared_data *data, + u32 id, u32 value) +{ + GEM_BUG_ON(id >= SLPC_MAX_OVERRIDE_PARAMETERS); + /* + * When the flag bit is set, corresponding value will be read + * and applied by slpc. s/slpc/SLPC ok. + */ + data->override_params.bits[id >> 5] |= (1 << (id % 32)); + data->override_params.values[id] = value; +} + +static void slpc_mem_set_enabled(struct slpc_shared_data *data, + u8 enable_id, u8 disable_id) +{ + /* + * Enabling a param involves setting the enable_id + * to 1 and disable_id to 0. + */ + slpc_mem_set_param(data, enable_id, 1); + slpc_mem_set_param(data, disable_id, 0); +} + +static void slpc_mem_set_disabled(struct slpc_shared_data *data, + u8 enable_id, u8 disable_id) +{ + /* + * Disabling a param involves setting the enable_id + * to 0 and disable_id to 1. + */ + slpc_mem_set_param(data, disable_id, 1); + slpc_mem_set_param(data, enable_id, 0); +} + static int slpc_shared_data_init(struct intel_guc_slpc *slpc) { struct intel_guc *guc = slpc_to_guc(slpc); @@ -63,6 +97,129 @@ static int slpc_shared_data_init(struct intel_guc_slpc *slpc) return err; } +static u32 slpc_get_state(struct intel_guc_slpc *slpc) +{ + struct slpc_shared_data *data; + + GEM_BUG_ON(!slpc->vma); + + drm_clflush_virt_range(slpc->vaddr, sizeof(u32)); + data = slpc->vaddr; + + return data->header.global_state; +} + +static bool slpc_is_running(struct intel_guc_slpc *slpc) +{ + return (slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING); extra ( ) not needed ok. +} + +static int guc_action_slpc_query(struct intel_guc *guc, u32 offset) +{ + u32 request[] = { + INTEL_GUC_ACTION_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2), + offset, + 0, + }; + int ret; + + ret = intel_guc_send(guc, request, ARRAY_SIZE(request)); + + return ret > 0 ? -EPROTO : ret; +} + +static int slpc_query_task_state(struct intel_guc_slpc *slpc) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + struct drm_i915_private *i915 = slpc_to_i915(slpc); + u32 shared_data_gtt_offset = intel_guc_ggtt_offset(guc, slpc->vma); just "offset" ? or maybe pass directly in call below ? ok. + int ret; + + ret = guc_action_slpc_query(guc, shared_data_gtt_offset); + if (ret) + drm_err(>drm, "Query task state data returned (%pe)\n", "Failed to query task state (%pe)\n" ? ok. + ERR_PTR(ret)); + + drm_clflush_virt_range(slpc->vaddr, SLPC_PAGE_SIZE_BYTES); + + return ret; +} + +static const char *slpc_global_state_to_string(enum slpc_global_state state) +{ + const char *str = NULL; + + switch (state) { + case SLPC_GLOBAL_STATE_NOT_RUNNING: + str = "not running"; + break; + case SLPC_GLOBAL_STATE_INITIALIZING: + str = "initializing"; + break; + case SLPC_GLOBAL_STATE_RESETTING: + str = "resetting"; + break; + case SLPC_GLOBAL_STATE_RUNNING: + str = "running"; + break; + case SLPC_GLOBAL_STATE_SHUTTING_DOWN: + str = "shutting down"; + break; + case SLPC_GLOBAL_STATE_ERROR: + str = "error"; + break; + default: + str = "unknown"; nit: yo
Re: [Intel-gfx] [PATCH 06/15] drm/i915/guc/slpc: Enable SLPC and add related H2G events
On 7/27/2021 8:12 AM, Michal Wajdeczko wrote: On 26.07.2021 21:07, Vinay Belgaumkar wrote: Add methods for interacting with GuC for enabling SLPC. Enable SLPC after GuC submission has been established. GuC load will fail if SLPC cannot be successfully initialized. Add various helper methods to set/unset the parameters for SLPC. They can be set using H2G calls or directly setting bits in the shared data structure. v2: Address several review comments, add new helpers for decoding the SLPC min/max frequencies. Use masks instead of hardcoded constants. (Michal W) v3: Split global_state_to_string function, and check for positive non-zero return value from intel_guc_send() (Michal W) Signed-off-by: Vinay Belgaumkar Signed-off-by: Sundaresan Sujaritha --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 237 ++ .../gpu/drm/i915/gt/uc/intel_guc_slpc_types.h | 2 + drivers/gpu/drm/i915/gt/uc/intel_uc.c | 8 + 3 files changed, 247 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index bae4e33db0f8..f5808d2acbca 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -45,6 +45,40 @@ void intel_guc_slpc_init_early(struct intel_guc_slpc *slpc) guc->slpc_selected = __guc_slpc_selected(guc); } +static void slpc_mem_set_param(struct slpc_shared_data *data, + u32 id, u32 value) +{ + GEM_BUG_ON(id >= SLPC_MAX_OVERRIDE_PARAMETERS); + /* +* When the flag bit is set, corresponding value will be read +* and applied by slpc. s/slpc/SLPC ok. +*/ + data->override_params.bits[id >> 5] |= (1 << (id % 32)); + data->override_params.values[id] = value; +} + +static void slpc_mem_set_enabled(struct slpc_shared_data *data, + u8 enable_id, u8 disable_id) +{ + /* +* Enabling a param involves setting the enable_id +* to 1 and disable_id to 0. +*/ + slpc_mem_set_param(data, enable_id, 1); + slpc_mem_set_param(data, disable_id, 0); +} + +static void slpc_mem_set_disabled(struct slpc_shared_data *data, + u8 enable_id, u8 disable_id) +{ + /* +* Disabling a param involves setting the enable_id +* to 0 and disable_id to 1. +*/ + slpc_mem_set_param(data, disable_id, 1); + slpc_mem_set_param(data, enable_id, 0); +} + static int slpc_shared_data_init(struct intel_guc_slpc *slpc) { struct intel_guc *guc = slpc_to_guc(slpc); @@ -63,6 +97,129 @@ static int slpc_shared_data_init(struct intel_guc_slpc *slpc) return err; } +static u32 slpc_get_state(struct intel_guc_slpc *slpc) +{ + struct slpc_shared_data *data; + + GEM_BUG_ON(!slpc->vma); + + drm_clflush_virt_range(slpc->vaddr, sizeof(u32)); + data = slpc->vaddr; + + return data->header.global_state; +} + +static bool slpc_is_running(struct intel_guc_slpc *slpc) +{ + return (slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING); extra ( ) not needed ok. +} + +static int guc_action_slpc_query(struct intel_guc *guc, u32 offset) +{ + u32 request[] = { + INTEL_GUC_ACTION_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2), + offset, + 0, + }; + int ret; + + ret = intel_guc_send(guc, request, ARRAY_SIZE(request)); + + return ret > 0 ? -EPROTO : ret; +} + +static int slpc_query_task_state(struct intel_guc_slpc *slpc) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + struct drm_i915_private *i915 = slpc_to_i915(slpc); + u32 shared_data_gtt_offset = intel_guc_ggtt_offset(guc, slpc->vma); just "offset" ? or maybe pass directly in call below ? ok. + int ret; + + ret = guc_action_slpc_query(guc, shared_data_gtt_offset); + if (ret) + drm_err(>drm, "Query task state data returned (%pe)\n", "Failed to query task state (%pe)\n" ? ok. + ERR_PTR(ret)); + + drm_clflush_virt_range(slpc->vaddr, SLPC_PAGE_SIZE_BYTES); + + return ret; +} + +static const char *slpc_global_state_to_string(enum slpc_global_state state) +{ + const char *str = NULL; + + switch (state) { + case SLPC_GLOBAL_STATE_NOT_RUNNING: + str = "not running"; + break; + case SLPC_GLOBAL_STATE_INITIALIZING: + str = "initializing"; + break; + case SLPC_GLOBAL_STATE_RESETTING: + str = "resetting"; + break; + case SLPC_GLOBAL_STATE_RUNNING: + str = "running"; + break; + case SLPC_GLOBAL_STATE_SHUTTING_DOWN: + str = "shutting down"; + break; + case SLPC_GLOBAL_STATE_ERROR: + str = "error"; +
Re: [PATCH 04/15] drm/i915/guc/slpc: Adding SLPC communication interfaces
On 7/27/2021 6:59 AM, Michal Wajdeczko wrote: On 26.07.2021 21:07, Vinay Belgaumkar wrote: Add constants and params that are needed to configure SLPC. v2: Add a new abi header for SLPC. Replace bitfields with genmasks. Address other comments from Michal W. v3: Add slpc H2G format in abi, other review commments (Michal W) v4: Update status bits according to latest spec Signed-off-by: Vinay Belgaumkar Signed-off-by: Sundaresan Sujaritha --- .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h | 1 - .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 235 ++ drivers/gpu/drm/i915/gt/uc/intel_guc.c| 3 + drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 7 + 4 files changed, 245 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index d832c8f11c11..ca538e5de940 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -135,7 +135,6 @@ enum intel_guc_action { INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007, INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008, INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009, - INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003, INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, INTEL_GUC_ACTION_REGISTER_CONTEXT = 0x4502, INTEL_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503, diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h new file mode 100644 index ..70b300d4a536 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h @@ -0,0 +1,235 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _GUC_ACTIONS_SLPC_ABI_H_ +#define _GUC_ACTIONS_SLPC_ABI_H_ + +#include + +/** + * DOC: SLPC SHARED DATA STRUCTURE + * + * ++--+--+ + * | CL | Bytes| Description | + * ++==+==+ + * | 1 | 0-3 | SHARED DATA SIZE | + * | +--+--+ + * || 4-7 | GLOBAL STATE | + * | +--+--+ + * || 8-11 | DISPLAY DATA ADDRESS | + * | +--+--+ + * || 12:63| PADDING | + * ++--+--+ + * || 0:63 | PADDING(PLATFORM INFO) | + * ++--+--+ + * | 3 | 0-3 | TASK STATE DATA | + * + +--+--+ + * || 4:63 | PADDING | + * ++--+--+ + * |4-21|0:1087| OVERRIDE PARAMS AND BIT FIELDS | + * ++--+--+ + * || | PADDING + EXTRA RESERVED PAGE | + * ++--+--+ + */ + +/* + * SLPC exposes certain parameters for global configuration by the host. + * These are referred to as override parameters, because in most cases + * the host will not need to modify the default values used by SLPC. + * SLPC remembers the default values which allows the host to easily restore + * them by simply unsetting the override. The host can set or unset override + * parameters during SLPC (re-)initialization using the SLPC Reset event. + * The host can also set or unset override parameters on the fly using the + * Parameter Set and Parameter Unset events + */ + +#define SLPC_MAX_OVERRIDE_PARAMETERS 256 +#define SLPC_OVERRIDE_BITFIELD_SIZE \ + (SLPC_MAX_OVERRIDE_PARAMETERS / 32) + +#define SLPC_PAGE_SIZE_BYTES 4096 +#define SLPC_CACHELINE_SIZE_BYTES 64 +#define SLPC_SHARED_DATA_SIZE_BYTE_HEADER SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE SLPC_PAGE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_MAX (2 * SLPC_PAGE_SIZE_BYTES) + +/* + * Cacheline size
Re: [PATCH 02/15] drm/i915/guc/slpc: Initial definitions for SLPC
On 7/27/2021 6:43 AM, Michal Wajdeczko wrote: On 26.07.2021 21:07, Vinay Belgaumkar wrote: Add macros to check for SLPC support. This feature is currently supported for Gen12+ and enabled whenever GuC submission is enabled/selected. Include templates for SLPC init/fini and enable. v2: Move SLPC helper functions to intel_guc_slpc.c/.h. Define basic template for SLPC structure in intel_guc_slpc_types.h. Fix copyright (Michal W) v3: Review comments (Michal W) Signed-off-by: Vinay Belgaumkar Signed-off-by: Sundaresan Sujaritha Signed-off-by: Daniele Ceraolo Spurio drm/i915/guc/slpc: Lay out slpc init/enable/fini Declare init/fini and enable function templates. v2: Rebase Signed-off-by: Vinay Belgaumkar Signed-off-by: Sundaresan Sujaritha --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc.c| 2 + drivers/gpu/drm/i915/gt/uc/intel_guc.h| 4 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 45 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 33 ++ .../gpu/drm/i915/gt/uc/intel_guc_slpc_types.h | 12 + drivers/gpu/drm/i915/gt/uc/intel_uc.c | 6 ++- drivers/gpu/drm/i915/gt/uc/intel_uc.h | 2 + 8 files changed, 103 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index ab7679957623..d8eac4468df9 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -186,6 +186,7 @@ i915-y += gt/uc/intel_uc.o \ gt/uc/intel_guc_fw.o \ gt/uc/intel_guc_log.o \ gt/uc/intel_guc_log_debugfs.o \ + gt/uc/intel_guc_slpc.o \ gt/uc/intel_guc_submission.o \ gt/uc/intel_huc.o \ gt/uc/intel_huc_debugfs.o \ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 979128e28372..39bc3c16057b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -7,6 +7,7 @@ #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm_irq.h" #include "intel_guc.h" +#include "intel_guc_slpc.h" #include "intel_guc_ads.h" #include "intel_guc_submission.h" #include "i915_drv.h" @@ -157,6 +158,7 @@ void intel_guc_init_early(struct intel_guc *guc) intel_guc_ct_init_early(>ct); intel_guc_log_init_early(>log); intel_guc_submission_init_early(guc); + intel_guc_slpc_init_early(>slpc); mutex_init(>send_mutex); spin_lock_init(>irq_lock); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index a9547069ee7e..15ad2eaee473 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -15,6 +15,7 @@ #include "intel_guc_ct.h" #include "intel_guc_log.h" #include "intel_guc_reg.h" +#include "intel_guc_slpc_types.h" #include "intel_uc_fw.h" #include "i915_utils.h" #include "i915_vma.h" @@ -30,6 +31,7 @@ struct intel_guc { struct intel_uc_fw fw; struct intel_guc_log log; struct intel_guc_ct ct; + struct intel_guc_slpc slpc; /* Global engine used to submit requests to GuC */ struct i915_sched_engine *sched_engine; @@ -57,6 +59,8 @@ struct intel_guc { bool submission_supported; bool submission_selected; + bool slpc_supported; + bool slpc_selected; (I know that you were following existing code, but we might do better and since you have to resend it anyway without patch 1/15 ...) as we have here: + struct intel_guc_slpc slpc; then maybe both supported/selected shall be moved there as: struct intel_guc_slpc { + bool supported; + bool selected; }; so the struct wont be empty any more, with that fixed: Reviewed-by: Michal Wajdeczko Ok, done, Thanks for the review, Vinay. struct i915_vma *ads_vma; struct __guc_ads_blob *ads_blob; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c new file mode 100644 index ..7275100ef8f8 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_guc_slpc.h" +#include "gt/intel_gt.h" + +static inline struct intel_guc *slpc_to_guc(struct intel_guc_slpc *slpc) +{ + return container_of(slpc, struct intel_guc, slpc); +} + +static bool __detect_slpc_supported(struct intel_guc *guc) +{ + /* GuC SLPC is unavailable for pre-Gen12 */ + return guc->submission_supported && + GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12; +} + +static bool __guc_slpc_selected(struct intel_guc *guc) +{ + if
Re: [PATCH 15/15] drm/i915/guc/rc: Setup and enable GUCRC feature
On 7/27/2021 8:37 AM, Matt Roper wrote: On Mon, Jul 26, 2021 at 12:08:00PM -0700, Vinay Belgaumkar wrote: This feature hands over the control of HW RC6 to the GuC. GuC decides when to put HW into RC6 based on it's internal busyness algorithms. GUCRC needs GuC submission to be enabled, and only supported on Gen12+ for now. When GUCRC is enabled, do not set HW RC6. Use a H2G message to tell GuC to enable GUCRC. When disabling RC6, tell GuC to revert RC6 control back to KMD. v2: Address comments (Michal W) Reviewed-by: Michal Wajdeczko Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gt/intel_rc6.c | 22 +++-- .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h | 6 ++ drivers/gpu/drm/i915/gt/uc/intel_guc.c| 1 + drivers/gpu/drm/i915/gt/uc/intel_guc.h| 2 + drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c | 80 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_rc.h | 31 +++ drivers/gpu/drm/i915/gt/uc/intel_uc.h | 2 + 8 files changed, 140 insertions(+), 5 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_rc.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index d8eac4468df9..3fc17f20d88e 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -186,6 +186,7 @@ i915-y += gt/uc/intel_uc.o \ gt/uc/intel_guc_fw.o \ gt/uc/intel_guc_log.o \ gt/uc/intel_guc_log_debugfs.o \ + gt/uc/intel_guc_rc.o \ gt/uc/intel_guc_slpc.o \ gt/uc/intel_guc_submission.o \ gt/uc/intel_huc.o \ diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 259d7eb4e165..299fcf10b04b 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -98,11 +98,19 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60); set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60); Do steps 2b and 2c above this still apply to gucrc? Are those still controlling the behavior of gucrc or does the GuC firmware just overwrite them with its own values? If they're still impacting the behavior when gucrc is enabled, is there any updated guidance on how the values should be set? It seems that there isn't any guidance in the bspec for the last several platforms, so we've pretty much been re-using old values without knowing if there's additional adjustment that should be done for the newer platforms. If the tuning values the driver sets get ignored/overwritten during GuC operation, maybe we should add a new gucrc_rc6_enable() that gets used instead of gen11_rc6_enable() and drops the unnecessary steps to help clarify what's truly important? Yeah, 2b does get overwritten by guc, but we still need 2c. - /* 3a: Enable RC6 */ - rc6->ctl_enable = - GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_RC6_ENABLE | - GEN6_RC_CTL_EI_MODE(1); + /* 3a: Enable RC6 +* +* With GUCRC, we do not enable bit 31 of RC_CTL, +* thus allowing GuC to control RC6 entry/exit fully instead. +* We will not set the HW ENABLE and EI bits +*/ + if (!intel_guc_rc_enable(>uc.guc)) + rc6->ctl_enable = GEN6_RC_CTL_RC6_ENABLE; + else + rc6->ctl_enable = + GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_RC6_ENABLE | + GEN6_RC_CTL_EI_MODE(1); pg_enable = GEN9_RENDER_PG_ENABLE | We should probably clarify in the commit message that gucrc doesn't cover powergating and leaves that under driver control. Maybe we should even pull this out into its own function rather than leaving it in the "rc6 enable" function since it really is its own thing? I have a note in the summary patch about this, will pull it into this patch header as well. There is already a separate effort underway from Suja to decouple RC6 and coarse power gate enabling. Might become more streamlined after that. For now, I can have an if check around 2b so that there is more clarity? Thanks, Vinay. Matt @@ -513,6 +521,10 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6) { struct drm_i915_private *i915 = rc6_to_i915(rc6); struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_gt *gt = rc6_to_gt(rc6); + + /* Take control of RC6 back from GuC */ + intel_guc_rc_disable(>uc.guc); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); if (GRAPHICS_VER(i915) >= 9) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index ca538e5de940..8ff58aff 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++
Re: [PATCH 14/14] drm/i915/guc/rc: Setup and enable GUCRC feature
On 7/21/2021 11:21 AM, Michal Wajdeczko wrote: On 21.07.2021 18:11, Vinay Belgaumkar wrote: This feature hands over the control of HW RC6 to the GuC. GuC decides when to put HW into RC6 based on it's internal busyness algorithms. GUCRC needs GuC submission to be enabled, and only supported on Gen12+ for now. When GUCRC is enabled, do not set HW RC6. Use a H2G message to tell GuC to enable GUCRC. When disabling RC6, tell GuC to revert RC6 control back to KMD. v2: Address comments (Michal W) Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gt/intel_rc6.c | 22 +++-- .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h | 6 ++ drivers/gpu/drm/i915/gt/uc/intel_guc.c| 1 + drivers/gpu/drm/i915/gt/uc/intel_guc.h| 2 + drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c | 81 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_rc.h | 31 +++ drivers/gpu/drm/i915/gt/uc/intel_uc.h | 2 + 8 files changed, 141 insertions(+), 5 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_rc.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index d8eac4468df9..3fc17f20d88e 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -186,6 +186,7 @@ i915-y += gt/uc/intel_uc.o \ gt/uc/intel_guc_fw.o \ gt/uc/intel_guc_log.o \ gt/uc/intel_guc_log_debugfs.o \ + gt/uc/intel_guc_rc.o \ gt/uc/intel_guc_slpc.o \ gt/uc/intel_guc_submission.o \ gt/uc/intel_huc.o \ diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 259d7eb4e165..299fcf10b04b 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -98,11 +98,19 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60); set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60); - /* 3a: Enable RC6 */ - rc6->ctl_enable = - GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_RC6_ENABLE | - GEN6_RC_CTL_EI_MODE(1); + /* 3a: Enable RC6 +* +* With GUCRC, we do not enable bit 31 of RC_CTL, +* thus allowing GuC to control RC6 entry/exit fully instead. +* We will not set the HW ENABLE and EI bits +*/ + if (!intel_guc_rc_enable(>uc.guc)) + rc6->ctl_enable = GEN6_RC_CTL_RC6_ENABLE; + else + rc6->ctl_enable = + GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_RC6_ENABLE | + GEN6_RC_CTL_EI_MODE(1); pg_enable = GEN9_RENDER_PG_ENABLE | @@ -513,6 +521,10 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6) { struct drm_i915_private *i915 = rc6_to_i915(rc6); struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_gt *gt = rc6_to_gt(rc6); + + /* Take control of RC6 back from GuC */ + intel_guc_rc_disable(>uc.guc); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); if (GRAPHICS_VER(i915) >= 9) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index d832c8f11c11..5f1c82f35d97 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -136,6 +136,7 @@ enum intel_guc_action { INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008, INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009, INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003, + INTEL_GUC_ACTION_SETUP_PC_GUCRC = 0x3004, INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, INTEL_GUC_ACTION_REGISTER_CONTEXT = 0x4502, INTEL_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503, @@ -146,6 +147,11 @@ enum intel_guc_action { INTEL_GUC_ACTION_LIMIT }; +enum intel_guc_rc_options { + INTEL_GUCRC_HOST_CONTROL, + INTEL_GUCRC_FIRMWARE_CONTROL, +}; + enum intel_guc_preempt_options { INTEL_GUC_PREEMPT_OPTION_DROP_WORK_Q = 0x4, INTEL_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 686cb978662d..e474f554b17a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -159,6 +159,7 @@ void intel_guc_init_early(struct intel_guc *guc) intel_guc_log_init_early(>log); intel_guc_submission_init_early(guc); intel_guc_slpc_init_early(>slpc); + intel_guc_rc_init_early(guc); mutex_init(>send_mutex); spin_lock_init(>irq_lock); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 8cecfad9d7b1..dcac31098687 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++
Re: [PATCH 12/14] drm/i915/guc/slpc: Sysfs hooks for SLPC
On 7/21/2021 11:13 AM, Michal Wajdeczko wrote: On 21.07.2021 18:11, Vinay Belgaumkar wrote: Update the get/set min/max freq hooks to work for SLPC case as well. Consolidate helpers for requested/min/max frequency get/set to intel_rps where the proper action can be taken depending on whether slpc is enabled. s/slpc/SLPC ok. v2: Add wrappers for getting rp0/1/n frequencies, update softlimits in set min/max slpc functions. Also check for boundary conditions before setting them. v3: Address review comments (Michal W) Signed-off-by: Vinay Belgaumkar Signed-off-by: Tvrtko Ursulin Signed-off-by: Sujaritha Sundaresan --- drivers/gpu/drm/i915/gt/intel_rps.c | 165 drivers/gpu/drm/i915/gt/intel_rps.h | 11 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 14 ++ drivers/gpu/drm/i915/i915_pmu.c | 2 +- drivers/gpu/drm/i915/i915_reg.h | 2 + drivers/gpu/drm/i915/i915_sysfs.c | 77 ++--- 6 files changed, 207 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index e858eeb2c59d..48d4147165a9 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -37,6 +37,13 @@ static struct intel_uncore *rps_to_uncore(struct intel_rps *rps) return rps_to_gt(rps)->uncore; } +static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + return >uc.guc.slpc; +} + static bool rps_uses_slpc(struct intel_rps *rps) { struct intel_gt *gt = rps_to_gt(rps); @@ -1960,6 +1967,164 @@ u32 intel_rps_read_actual_frequency(struct intel_rps *rps) return freq; } +u32 intel_rps_read_punit_req(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + + return intel_uncore_read(uncore, GEN6_RPNSWREQ); +} + +u32 intel_rps_get_req(struct intel_rps *rps, u32 pureq) +{ + u32 req = pureq >> GEN9_SW_REQ_UNSLICE_RATIO_SHIFT; + + return req; +} + +u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps) +{ + u32 freq = intel_rps_get_req(rps, intel_rps_read_punit_req(rps)); + + return intel_gpu_freq(rps, freq); +} + +u32 intel_rps_get_requested_frequency(struct intel_rps *rps) +{ + if (rps_uses_slpc(rps)) + return intel_rps_read_punit_req_frequency(rps); + else + return intel_gpu_freq(rps, rps->cur_freq); +} + +u32 intel_rps_get_max_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->max_freq_softlimit; + else + return intel_gpu_freq(rps, rps->max_freq_softlimit); +} + +u32 intel_rps_get_rp0_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->rp0_freq; + else + return intel_gpu_freq(rps, rps->rp0_freq); +} + +u32 intel_rps_get_rp1_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->rp1_freq; + else + return intel_gpu_freq(rps, rps->rp1_freq); +} + +u32 intel_rps_get_rpn_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->min_freq; + else + return intel_gpu_freq(rps, rps->min_freq); +} + +int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + int ret = 0; + + if (rps_uses_slpc(rps)) + return intel_guc_slpc_set_max_freq(slpc, val); + + mutex_lock(>lock); + + val = intel_freq_opcode(rps, val); + if (val < rps->min_freq || + val > rps->max_freq || + val < rps->min_freq_softlimit) { + ret = -EINVAL; + goto unlock; + } + + if (val > rps->rp0_freq) + drm_dbg(>drm, "User requested overclocking to %d\n", + intel_gpu_freq(rps, val)); + + rps->max_freq_softlimit = val; + + val = clamp_t(int, rps->cur_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); + + /* +* We still need *_set_rps to process the new max_delay and +* update the interrupt limits and PMINTRMSK even though +* frequency request may be unchanged. +*/ + intel_rps_set(rps, val); + +unlock: + mutex_unlock(>lock); + + return ret; +} + +u32 intel_rps_get_min_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->min_freq_softlimit; +
Re: [PATCH 11/14] drm/i915/guc/slpc: Cache platform frequency limits
On 7/21/2021 11:09 AM, Michal Wajdeczko wrote: On 21.07.2021 18:11, Vinay Belgaumkar wrote: Cache rp0, rp1 and rpn platform limits into SLPC structure for range checking while setting min/max frequencies. Also add "soft" limits which keep track of frequency changes made from userland. These are initially set to platform min and max. v2: Address review comments (Michal W) Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 98 +++ .../gpu/drm/i915/gt/uc/intel_guc_slpc_types.h | 9 ++ drivers/gpu/drm/i915/i915_reg.h | 3 + 3 files changed, 110 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 8796a8929d89..134c57ca10b7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -94,6 +94,9 @@ static int slpc_shared_data_init(struct intel_guc_slpc *slpc) return err; } + slpc->max_freq_softlimit = 0; + slpc->min_freq_softlimit = 0; + return err; } @@ -121,6 +124,19 @@ static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) return intel_guc_send(guc, request, ARRAY_SIZE(request)); } +static int guc_action_slpc_unset_param(struct intel_guc *guc, + u8 id) +{ + u32 request[] = { + INTEL_GUC_ACTION_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 2), + id, + }; + + return intel_guc_send(guc, request, ARRAY_SIZE(request)); +} + + static bool slpc_is_running(struct intel_guc_slpc *slpc) { return (slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING); @@ -164,6 +180,16 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value) return guc_action_slpc_set_param(guc, id, value); } +static int slpc_unset_param(struct intel_guc_slpc *slpc, + u8 id) likely can fit into one line done. +{ + struct intel_guc *guc = slpc_to_guc(slpc); + + GEM_BUG_ON(id >= SLPC_MAX_PARAM); + + return guc_action_slpc_unset_param(guc, id); +} + static const char *slpc_state_string(struct intel_guc_slpc *slpc) { const char *str = NULL; @@ -388,6 +414,55 @@ void intel_guc_pm_intrmsk_enable(struct intel_gt *gt) GEN6_PMINTRMSK, pm_intrmsk_mbz, 0); } +static int intel_guc_slpc_set_softlimits(struct intel_guc_slpc *slpc) +{ + int ret = 0; + + /* Softlimits are initially equivalent to platform limits +* unless they have deviated from defaults, in which case, +* we retain the values and set min/max accordingly. +*/ + if (!slpc->max_freq_softlimit) + slpc->max_freq_softlimit = slpc->rp0_freq; + else if (slpc->max_freq_softlimit != slpc->rp0_freq) + ret = intel_guc_slpc_set_max_freq(slpc, + slpc->max_freq_softlimit); + + if (!slpc->min_freq_softlimit) + slpc->min_freq_softlimit = slpc->min_freq; + else if (slpc->min_freq_softlimit != slpc->min_freq) + ret = intel_guc_slpc_set_min_freq(slpc, + slpc->min_freq_softlimit); + + return ret; +} + +static void intel_guc_slpc_ignore_eff_freq(struct intel_guc_slpc *slpc, bool ignore) +{ + if (ignore) { + /* A failure here does not affect the algorithm in a fatal way */ + slpc_set_param(slpc, + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, + ignore); + slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + slpc->min_freq); + } else { + slpc_unset_param(slpc, + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY); + slpc_unset_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ); hard to tell from mail client, but likely misalignment did you run checkpatch.pl ? Yup, ran it this time. + } +} + +static void intel_guc_slpc_use_fused_rp0(struct intel_guc_slpc *slpc) +{ + /* Force slpc to used platform rp0 */ + slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ, + slpc->rp0_freq); +} + /* * intel_guc_slpc_enable() - Start SLPC * @slpc: pointer to intel_guc_slpc. @@ -405,6 +480,7 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) { struct drm_i915_private *i915 = slpc_to_i915(slpc); struct slpc_shared_data *data; + u32 rp_state_cap; int ret; GEM_BUG_ON(!slpc->vma); @@ -442,6 +518,28 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) slpc_decode_min_freq(slpc), slpc_decode_max_freq(slpc)); + rp_state_cap = intel_uncore_read(i915->gt.uncore, GEN6_RP_STATE_CAP);
Re: [PATCH 09/14] drm/i915/guc/slpc: Add debugfs for SLPC info
On 7/21/2021 11:05 AM, Michal Wajdeczko wrote: On 21.07.2021 18:11, Vinay Belgaumkar wrote: This prints out relevant SLPC info from the SLPC shared structure. We will send a h2g message which forces SLPC to update the shared data structure with latest information before reading it. v2: Address review comments (Michal W) Signed-off-by: Vinay Belgaumkar Signed-off-by: Sundaresan Sujaritha --- .../gpu/drm/i915/gt/uc/intel_guc_debugfs.c| 23 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 40 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 4 +- 3 files changed, 66 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c index 72ddfff42f7d..46b22187927b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c @@ -12,6 +12,7 @@ #include "gt/uc/intel_guc_ct.h" #include "gt/uc/intel_guc_ads.h" #include "gt/uc/intel_guc_submission.h" +#include "gt/uc/intel_guc_slpc.h" static int guc_info_show(struct seq_file *m, void *data) { @@ -50,11 +51,33 @@ static int guc_registered_contexts_show(struct seq_file *m, void *data) } DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_registered_contexts); +static int guc_slpc_info_show(struct seq_file *m, void *unused) +{ + struct intel_guc *guc = m->private; + struct intel_guc_slpc *slpc = >slpc; + struct drm_printer p = drm_seq_file_printer(m); + + if (!intel_guc_slpc_is_used(guc)) + return -ENODEV; + + return intel_guc_slpc_info(slpc, ); +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_slpc_info); + +bool intel_eval_slpc_support(void *data) +{ + struct intel_guc *guc; + + guc = (struct intel_guc *)data; struct intel_guc *guc = (struct intel_guc *)data; + return intel_guc_slpc_is_used(guc); +} + void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root) { static const struct debugfs_gt_file files[] = { { "guc_info", _info_fops, NULL }, { "guc_registered_contexts", _registered_contexts_fops, NULL }, + { "guc_slpc_info", _slpc_info_fops, _eval_slpc_support}, }; if (!intel_guc_is_supported(guc)) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index c1cf8d46e360..73379985c105 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -430,6 +430,46 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) return 0; } +int intel_guc_slpc_info(struct intel_guc_slpc *slpc, struct drm_printer *p) +{ + struct drm_i915_private *i915 = guc_to_gt(slpc_to_guc(slpc))->i915; + struct slpc_shared_data *data; + struct slpc_task_state_data *slpc_tasks; + intel_wakeref_t wakeref; + int ret = 0; + + with_intel_runtime_pm(>runtime_pm, wakeref) { + if (slpc_query_task_state(slpc)) + return -EIO; not sure if you can return directly from "with_rpm" Good point, checking err instead. + + slpc_tasks = >task_state_data; + + drm_printf(p, "SLPC state: %s\n", slpc_state_string(slpc)); + drm_printf(p, "\tgtperf task active: %s\n", + yesno(slpc_tasks->status & SLPC_GTPERF_TASK_ACTIVE)); + drm_printf(p, "\tdcc task active: %s\n", + yesno(slpc_tasks->status & SLPC_DCC_TASK_ACTIVE)); + drm_printf(p, "\tin dcc: %s\n", + yesno(slpc_tasks->status & SLPC_IN_DCC)); + drm_printf(p, "\tfreq switch active: %s\n", + yesno(slpc_tasks->status & SLPC_FREQ_SWITCH_ACTIVE)); + drm_printf(p, "\tibc enabled: %s\n", + yesno(slpc_tasks->status & SLPC_IBC_ENABLED)); + drm_printf(p, "\tibc active: %s\n", + yesno(slpc_tasks->status & SLPC_IBC_ACTIVE)); + drm_printf(p, "\tpg1 enabled: %s\n", + yesno(slpc_tasks->status & SLPC_PG1_ENABLED)); + drm_printf(p, "\tpg1 active: %s\n", + yesno(slpc_tasks->status & SLPC_PG1_ACTIVE)); + drm_printf(p, "\tmax freq: %dMHz\n", + slpc_decode_max_freq(slpc)); + drm_printf(p, "\tmin freq: %dMHz\n", + slpc_decode_min_freq(slpc)); not sure what they are: DCC ? IBC ? PG1 ? Removed these for now, since we don't use/enable them yet anyways. and make sure to use %u for unsigned Ok. Thanks, Vinay. Michal + } + + return ret; +} + void intel_guc_slpc_fini(struct intel_guc_slpc *slpc) { if (!slpc->vma) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index
Re: [PATCH 08/14] drm/i915/guc/slpc: Add get max/min freq hooks
On 7/21/2021 11:00 AM, Michal Wajdeczko wrote: On 21.07.2021 18:11, Vinay Belgaumkar wrote: Add helpers to read the min/max frequency being used by SLPC. This is done by send a H2G command which forces SLPC to update the shared data struct which can then be read. add note that functions will be used later ok. v2: Address review comments (Michal W) Signed-off-by: Vinay Belgaumkar Signed-off-by: Sundaresan Sujaritha --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 52 + drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 2 + 2 files changed, 54 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index b40c39ba4049..c1cf8d46e360 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -290,6 +290,32 @@ int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val) return ret; } +/** + * intel_guc_slpc_get_max_freq() - Get max frequency limit for SLPC. + * @slpc: pointer to intel_guc_slpc. + * @val: pointer to val which will hold max frequency (MHz) + * + * This function will invoke GuC SLPC action to read the max frequency + * limit for unslice. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val) +{ + intel_wakeref_t wakeref; + struct drm_i915_private *i915 = guc_to_gt(slpc_to_guc(slpc))->i915; + int ret = 0; struct drm_i915_private *i915 = slpc_to_i915(slpc); intel_wakeref_t wakeref; int ret = 0; + + with_intel_runtime_pm(>runtime_pm, wakeref) { + /* Force GuC to update task data */ + slpc_query_task_state(slpc); what if this call fails ? saving error in ret. + + *val = slpc_decode_max_freq(slpc); + } + + return ret; +} + /** * intel_guc_slpc_set_min_freq() - Set min frequency limit for SLPC. * @slpc: pointer to intel_guc_slpc. @@ -322,6 +348,32 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) return ret; } +/** + * intel_guc_slpc_get_min_freq() - Get min frequency limit for SLPC. + * @slpc: pointer to intel_guc_slpc. + * @val: pointer to val which will hold min frequency (MHz) + * + * This function will invoke GuC SLPC action to read the min frequency + * limit for unslice. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val) +{ + intel_wakeref_t wakeref; + struct drm_i915_private *i915 = guc_to_gt(slpc_to_guc(slpc))->i915; + int ret = 0; + + with_intel_runtime_pm(>runtime_pm, wakeref) { + /* Force GuC to update task data */ + slpc_query_task_state(slpc); same here Populated ret with return code. Thanks, Vinay. Michal + + *val = slpc_decode_min_freq(slpc); + } + + return ret; +} + /* * intel_guc_slpc_enable() - Start SLPC * @slpc: pointer to intel_guc_slpc. diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index 3a1a7eaafc12..627c71a95777 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -32,5 +32,7 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc); void intel_guc_slpc_fini(struct intel_guc_slpc *slpc); int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val); int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val); +int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val); +int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val); #endif
Re: [PATCH 07/14] drm/i915/guc/slpc: Add methods to set min/max frequency
On 7/21/2021 10:42 AM, Michal Wajdeczko wrote: On 21.07.2021 18:11, Vinay Belgaumkar wrote: Add param set h2g helpers to set the min and max frequencies for use by SLPC. v2: Address review comments (Michal W) Signed-off-by: Sundaresan Sujaritha Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 84 + drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 2 + 2 files changed, 86 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 48db2a8f67d1..b40c39ba4049 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -109,6 +109,18 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc) return data->header.global_state; } +static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) +{ + u32 request[] = { + INTEL_GUC_ACTION_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), + id, + value, + }; + + return intel_guc_send(guc, request, ARRAY_SIZE(request)); beware of possible non-zero data0 returned by guc_send() Ok, added -EPROTO check. +} + static bool slpc_is_running(struct intel_guc_slpc *slpc) { return (slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING); @@ -143,6 +155,15 @@ static int slpc_query_task_state(struct intel_guc_slpc *slpc) return ret; } +static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + + GEM_BUG_ON(id >= SLPC_MAX_PARAM); + + return guc_action_slpc_set_param(guc, id, value); +} + static const char *slpc_state_string(struct intel_guc_slpc *slpc) { const char *str = NULL; @@ -238,6 +259,69 @@ u32 slpc_decode_max_freq(struct intel_guc_slpc *slpc) GT_FREQUENCY_MULTIPLIER, GEN9_FREQ_SCALER); } +/** + * intel_guc_slpc_set_max_freq() - Set max frequency limit for SLPC. + * @slpc: pointer to intel_guc_slpc. + * @val: frequency (MHz) + * + * This function will invoke GuC SLPC action to update the max frequency + * limit for unslice. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val) +{ + int ret; + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; nit: move "ret" as last ok. + + with_intel_runtime_pm(>runtime_pm, wakeref) { + ret = slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ, + val); + if (ret) { + drm_err(>drm, + "Set max frequency unslice returned (%pe)\n", ERR_PTR(ret)); + /* Return standardized err code for sysfs */ + ret = -EIO; maybe caller (hook in sysfs) can sanitize this error ? Caller will then need to check the error type - something like- if (err) { if (err != -EINVAL) return -EIO; } Seems cleaner to return specific error type from here instead. Anything other than -EINVAL or -EIO causes garbage in sysfs output. Thanks, Vinay. Michal + } + } + + return ret; +} + +/** + * intel_guc_slpc_set_min_freq() - Set min frequency limit for SLPC. + * @slpc: pointer to intel_guc_slpc. + * @val: frequency (MHz) + * + * This function will invoke GuC SLPC action to update the min unslice + * frequency. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) +{ + int ret; + struct intel_guc *guc = slpc_to_guc(slpc); + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + intel_wakeref_t wakeref; + + with_intel_runtime_pm(>runtime_pm, wakeref) { + ret = slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + val); + if (ret) { + drm_err(>drm, + "Set min frequency for unslice returned (%pe)\n", ERR_PTR(ret)); + /* Return standardized err code for sysfs */ + ret = -EIO; + } + } + + return ret; +} + /* * intel_guc_slpc_enable() - Start SLPC * @slpc: pointer to intel_guc_slpc. diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index f02249ff5f1b..3a1a7eaafc12 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -30,5 +30,7 @@ void intel_guc_slpc_init_early(struct intel_guc_slpc *slpc); int intel_guc_slpc_init(struct intel_guc_slpc *slpc); int intel_guc_slpc_enable(struct intel_guc_slpc *slpc); void
Re: [Intel-gfx] [PATCH 05/14] drm/i915/guc/slpc: Allocate, initialize and release SLPC
On 7/21/2021 10:26 AM, Michal Wajdeczko wrote: On 21.07.2021 18:11, Vinay Belgaumkar wrote: Allocate data structures for SLPC and functions for initializing on host side. v2: Address review comments (Michal W) Signed-off-by: Vinay Belgaumkar Signed-off-by: Sundaresan Sujaritha --- drivers/gpu/drm/i915/gt/uc/intel_guc.c| 11 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 36 ++- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 1 + .../gpu/drm/i915/gt/uc/intel_guc_slpc_types.h | 3 ++ 4 files changed, 50 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index fcccb103a21a..686cb978662d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -337,6 +337,12 @@ int intel_guc_init(struct intel_guc *guc) goto err_ct; } + if (intel_guc_slpc_is_used(guc)) { + ret = intel_guc_slpc_init(>slpc); + if (ret) + goto err_submission; + } + /* now that everything is perma-pinned, initialize the parameters */ guc_init_params(guc); @@ -347,6 +353,8 @@ int intel_guc_init(struct intel_guc *guc) return 0; +err_submission: + intel_guc_submission_fini(guc); err_ct: intel_guc_ct_fini(>ct); err_ads: @@ -369,6 +377,9 @@ void intel_guc_fini(struct intel_guc *guc) i915_ggtt_disable_guc(gt->ggtt); + if (intel_guc_slpc_is_used(guc)) + intel_guc_slpc_fini(>slpc); + if (intel_guc_submission_is_used(guc)) intel_guc_submission_fini(guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index d9feb430ce35..a99d727b5bf0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -12,6 +12,16 @@ static inline struct intel_guc *slpc_to_guc(struct intel_guc_slpc *slpc) return container_of(slpc, struct intel_guc, slpc); } +static inline struct intel_gt *slpc_to_gt(struct intel_guc_slpc *slpc) +{ + return guc_to_gt(slpc_to_guc(slpc)); +} + +static inline struct drm_i915_private *slpc_to_i915(struct intel_guc_slpc *slpc) +{ + return (slpc_to_gt(slpc))->i915; redundant ( ) ok. +} + static bool __detect_slpc_supported(struct intel_guc *guc) { /* GuC SLPC is unavailable for pre-Gen12 */ @@ -35,9 +45,29 @@ void intel_guc_slpc_init_early(struct intel_guc_slpc *slpc) guc->slpc_selected = __guc_slpc_selected(guc); } +static int slpc_shared_data_init(struct intel_guc_slpc *slpc) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + struct drm_i915_private *i915 = slpc_to_i915(slpc); + u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); + int err; + + err = intel_guc_allocate_and_map_vma(guc, size, >vma, (void **)>vaddr); + if (unlikely(err)) { + drm_err(>drm, + "Failed to allocate SLPC struct (err=%pe)\n", + ERR_PTR(err)); + return err; + } + + return err; +} + int intel_guc_slpc_init(struct intel_guc_slpc *slpc) { - return 0; + GEM_BUG_ON(slpc->vma); + + return slpc_shared_data_init(slpc); } /* @@ -60,4 +90,8 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) void intel_guc_slpc_fini(struct intel_guc_slpc *slpc) { + if (!slpc->vma) + return; + + i915_vma_unpin_and_release(>vma, I915_VMA_RELEASE_MAP); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index c3b0ad7f0f93..f02249ff5f1b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -8,6 +8,7 @@ #include "intel_guc_submission.h" #include "intel_guc_slpc_types.h" +#include "abi/guc_actions_slpc_abi.h" is this is needed here ? maybe abi.h could be included only in slcp.c ? ok, removed. static inline bool intel_guc_slpc_is_supported(struct intel_guc *guc) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h index b85148265b1f..214a449e78f2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h @@ -7,9 +7,12 @@ #define _INTEL_GUC_SLPC_TYPES_H_ #include +#include "abi/guc_actions_slpc_abi.h" for below pointers you don't need this header(s) to be included ok. Thanks, Vinay. Michal struct intel_guc_slpc { + struct i915_vma *vma; + struct slpc_shared_data *vaddr; }; #endif
Re: [PATCH 06/14] drm/i915/guc/slpc: Enable SLPC and add related H2G events
On 7/21/2021 10:38 AM, Michal Wajdeczko wrote: On 21.07.2021 18:11, Vinay Belgaumkar wrote: Add methods for interacting with GuC for enabling SLPC. Enable SLPC after GuC submission has been established. GuC load will fail if SLPC cannot be successfully initialized. Add various helper methods to set/unset the parameters for SLPC. They can be set using H2G calls or directly setting bits in the shared data structure. This patch also removes the GEM_BUG_ON from guc_submission_disable(). The assumption when that was added was there would be no wakerefs when it would be called. However, if we fail to enable slpc, we will s/slpc/SLPC done. still be holding a wakeref. v2: Address several review comments, add new helpers for decoding the slpc min/max frequencies. Use masks instead of hardcoded constants. (Michal W) Signed-off-by: Vinay Belgaumkar Signed-off-by: Sundaresan Sujaritha --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 206 ++ .../gpu/drm/i915/gt/uc/intel_guc_slpc_types.h | 2 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 - drivers/gpu/drm/i915/gt/uc/intel_uc.c | 10 + 4 files changed, 218 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index a99d727b5bf0..48db2a8f67d1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -45,6 +45,40 @@ void intel_guc_slpc_init_early(struct intel_guc_slpc *slpc) guc->slpc_selected = __guc_slpc_selected(guc); } +static void slpc_mem_set_param(struct slpc_shared_data *data, + u32 id, u32 value) +{ + GEM_BUG_ON(id >= SLPC_MAX_OVERRIDE_PARAMETERS); + /* +* When the flag bit is set, corresponding value will be read +* and applied by slpc. +*/ + data->override_params.bits[id >> 5] |= (1 << (id % 32)); + data->override_params.values[id] = value; +} + +static void slpc_mem_set_enabled(struct slpc_shared_data *data, + u8 enable_id, u8 disable_id) +{ + /* +* Enabling a param involves setting the enable_id +* to 1 and disable_id to 0. +*/ + slpc_mem_set_param(data, enable_id, 1); + slpc_mem_set_param(data, disable_id, 0); +} + +static void slpc_mem_set_disabled(struct slpc_shared_data *data, + u8 enable_id, u8 disable_id) +{ + /* +* Disabling a param involves setting the enable_id +* to 0 and disable_id to 1. +*/ + slpc_mem_set_param(data, disable_id, 1); + slpc_mem_set_param(data, enable_id, 0); +} + static int slpc_shared_data_init(struct intel_guc_slpc *slpc) { struct intel_guc *guc = slpc_to_guc(slpc); @@ -63,6 +97,116 @@ static int slpc_shared_data_init(struct intel_guc_slpc *slpc) return err; } +static u32 slpc_get_state(struct intel_guc_slpc *slpc) +{ + struct slpc_shared_data *data; + + GEM_BUG_ON(!slpc->vma); + + drm_clflush_virt_range(slpc->vaddr, sizeof(u32)); + data = slpc->vaddr; + + return data->header.global_state; +} + +static bool slpc_is_running(struct intel_guc_slpc *slpc) +{ + return (slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING); +} + +static int guc_action_slpc_query(struct intel_guc *guc, u32 offset) +{ + u32 request[] = { + INTEL_GUC_ACTION_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2), + offset, + 0, + }; + + return intel_guc_send(guc, request, ARRAY_SIZE(request)); +} + +static int slpc_query_task_state(struct intel_guc_slpc *slpc) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + struct drm_i915_private *i915 = slpc_to_i915(slpc); + u32 shared_data_gtt_offset = intel_guc_ggtt_offset(guc, slpc->vma); + int ret; + + ret = guc_action_slpc_query(guc, shared_data_gtt_offset); + if (ret) + drm_err(>drm, "Query task state data returned (%pe)\n", + ERR_PTR(ret)); + + drm_clflush_virt_range(slpc->vaddr, SLPC_PAGE_SIZE_BYTES); + + return ret; +} + +static const char *slpc_state_string(struct intel_guc_slpc *slpc) +{ + const char *str = NULL; + u32 state = slpc_get_state(slpc); + + switch (state) { + case SLPC_GLOBAL_STATE_NOT_RUNNING: + str = "not running"; + break; + case SLPC_GLOBAL_STATE_INITIALIZING: + str = "initializing"; + break; + case SLPC_GLOBAL_STATE_RESETTING: + str = "resetting"; + break; + case SLPC_GLOBAL_STATE_RUNNING: + str = "running"; + break; + case SLPC_GLOBAL_STATE_SHUTTING_DOWN: + str = "shutting down"; + break; + case SLPC_GLOBAL_STATE_ERROR: + str =
Re: [PATCH 04/14] drm/i915/guc/slpc: Adding SLPC communication interfaces
On 7/21/2021 10:25 AM, Michal Wajdeczko wrote: On 21.07.2021 18:11, Vinay Belgaumkar wrote: Add constants and params that are needed to configure SLPC. v2: Add a new abi header for SLPC. Replace bitfields with genmasks. Address other comments from Michal W. Signed-off-by: Vinay Belgaumkar Signed-off-by: Sundaresan Sujaritha --- .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 201 ++ drivers/gpu/drm/i915/gt/uc/intel_guc.c| 4 + drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 1 + 3 files changed, 206 insertions(+) create mode 100644 drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h new file mode 100644 index ..05d809746b32 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h @@ -0,0 +1,201 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _GUC_ACTIONS_SLPC_ABI_H_ +#define _GUC_ACTIONS_SLPC_ABI_H_ + +#include + +/** + * SLPC SHARED DATA STRUCTURE if you want to use kernel-doc, then add DOC: tag ok. + * + * +---+---+--+ + * | CL| Bytes | Description | + * +===+===+==+ + * | 1 | 0-3 | SHARED DATA SIZE | + * | +---+--+ + * | | 4-7 | GLOBAL STATE | + * | +---+--+ + * | | 8-11 | DISPLAY DATA ADDRESS | + * | +---+--+ + * | | 12:63 | PADDING | + * +---+---+--+ + * | | 0:63 | PADDING(PLATFORM INFO) | + * +---+---+--+ + * | 3 | 0-3 | TASK STATE DATA | + * + +---+--+ + * | | 4:63 | PADDING | + * +---+---+--+ + * |4-21 0:1087| OVERRIDE PARAMS AND BIT FIELDS | ^ something didn't work here Fixed. + * +---+---+--+ + * | | | PADDING + EXTRA RESERVED PAGE | + * +---+---+--+ + */ + +/* + * SLPC exposes certain parameters for global configuration by the host. + * These are referred to as override parameters, because in most cases + * the host will not need to modify the default values used by SLPC. + * SLPC remembers the default values which allows the host to easily restore + * them by simply unsetting the override. The host can set or unset override + * parameters during SLPC (re-)initialization using the SLPC Reset event. + * The host can also set or unset override parameters on the fly using the + * Parameter Set and Parameter Unset events + */ + +#define SLPC_MAX_OVERRIDE_PARAMETERS 256 +#define SLPC_OVERRIDE_BITFIELD_SIZE \ + (SLPC_MAX_OVERRIDE_PARAMETERS / 32) + +#define SLPC_PAGE_SIZE_BYTES 4096 +#define SLPC_CACHELINE_SIZE_BYTES 64 +#define SLPC_SHARED_DATA_SIZE_BYTE_HEADER SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE SLPC_PAGE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_MAX (2 * SLPC_PAGE_SIZE_BYTES) +#define SLPC_EVENT(id, argc) ((u32)(id) << 8 | (argc)) +#define SLPC_EVENT_MAX_INPUT_ARGS 9 above two are likely used in H2G SLPC message that is not fully defined, maybe at the end of this file add separate section with H2G format, using plain C #defines, and move SLPC_EVENT helper macro to fwif.h ok. + +/* + * Cacheline size aligned (Total size needed for + * SLPM_KMD_MAX_OVERRIDE_PARAMETERS=256 is 1088 bytes) + */ +#define SLPC_OVERRIDE_PARAMS_TOTAL_BYTES (SLPC_MAX_OVERRIDE_PARAMETERS * 4) \ + + ((SLPC_MAX_OVERRIDE_PARAMETERS / 32) * 4)) \ + + (SLPC_CACHELINE_SIZE_BYTES-1)) / SLPC_CACHELINE_SIZE_BYTES)*SLPC_CACHELINE_SIZE_BYTES) + +#define SLPC_SHARED_DATA_SIZE_BYTE_OTHER