Re: [PATCH 2/2] drm/i915/guc: Extend w/a 14019159160

2024-07-15 Thread Belgaumkar, Vinay



On 6/21/2024 5:46 PM, john.c.harri...@intel.com wrote:

From: John Harrison 

There is a new part to an existing workaround, so enable that piece as
well.

v2: Extend even further.
v3: Drop DG2 as there are CI failures still to resolve. Also re-order
the parameters to a function to reduce excessive line wrapping.


LGTM,

Reviewed-by: Vinay Belgaumkar 



Signed-off-by: John Harrison 
---
  drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h |  1 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c| 18 +-
  2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
index 37ff539a6963d..0c709e6c15be7 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
@@ -107,6 +107,7 @@ enum {
  enum {
GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE   = 
0x9001,
GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED   = 
0x9002,
+   GUC_WORKAROUND_KLV_AVOID_GFX_CLEAR_WHILE_ACTIVE = 
0x9006,
  };
  
  #endif /* _ABI_GUC_KLVS_ABI_H */

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index f1fe5f9054538..46fabbfc775e0 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -815,8 +815,7 @@ guc_capture_prep_lists(struct intel_guc *guc)
return PAGE_ALIGN(total_size);
  }
  
-static void guc_waklv_enable_simple(struct intel_guc *guc,

-   u32 klv_id, u32 *offset, u32 *remain)
+static void guc_waklv_enable_simple(struct intel_guc *guc, u32 *offset, u32 
*remain, u32 klv_id)
  {
u32 size;
u32 klv_entry[] = {
@@ -850,19 +849,20 @@ static void guc_waklv_init(struct intel_guc *guc)
remain = guc_ads_waklv_size(guc);
  
  	/* Wa_14019159160 */

-   if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)))
-   guc_waklv_enable_simple(guc,
-   GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE,
-   , );
+   if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) {
+   guc_waklv_enable_simple(guc, , ,
+   GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE);
+   guc_waklv_enable_simple(guc, , ,
+   
GUC_WORKAROUND_KLV_AVOID_GFX_CLEAR_WHILE_ACTIVE);
+   }
  
  	/* Wa_16021333562 */

if ((GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 21, 1)) &&
(IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) ||
 IS_MEDIA_GT_IP_RANGE(gt, IP_VER(13, 0), IP_VER(13, 0)) ||
 IS_DG2(gt->i915)))
-   guc_waklv_enable_simple(guc,
-   
GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED,
-   , );
+   guc_waklv_enable_simple(guc, , ,
+   
GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED);
  
  	size = guc_ads_waklv_size(guc) - remain;

if (!size)


Re: [PATCH 1/2] drm/i915/arl: Enable Wa_14019159160 for ARL

2024-07-15 Thread Belgaumkar, Vinay



On 6/21/2024 5:46 PM, john.c.harri...@intel.com wrote:

From: John Harrison 

The context switch out workaround also applies to ARL.

Signed-off-by: John Harrison 


LGTM,

Reviewed-by: Vinay Belgaumkar 


---
  drivers/gpu/drm/i915/gt/uc/intel_guc.c | 2 +-
  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 5e60a34692af8..097fc6bd1285e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -296,7 +296,7 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
  
  	/* Wa_16019325821 */

/* Wa_14019159160 */
-   if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)))
+   if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)))
flags |= GUC_WA_RCS_CCS_SWITCHOUT;
  
  	/*

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 7995f059f30df..f1fe5f9054538 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -850,7 +850,7 @@ static void guc_waklv_init(struct intel_guc *guc)
remain = guc_ads_waklv_size(guc);
  
  	/* Wa_14019159160 */

-   if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)))
+   if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)))
guc_waklv_enable_simple(guc,
GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE,
, );


Re: [PATCH v2] drm/i915/guc: Use context hints for GT freq

2024-02-28 Thread Belgaumkar, Vinay



On 2/28/2024 4:54 AM, Tvrtko Ursulin wrote:


On 27/02/2024 23:51, Vinay Belgaumkar wrote:

Allow user to provide a low latency context hint. When set, KMD
sends a hint to GuC which results in special handling for this
context. SLPC will ramp the GT frequency aggressively every time
it switches to this context. The down freq threshold will also be
lower so GuC will ramp down the GT freq for this context more slowly.
We also disable waitboost for this context as that will interfere with
the strategy.

We need to enable the use of SLPC Compute strategy during init, but
it will apply only to contexts that set this bit during context
creation.

Userland can check whether this feature is supported using a new param-
I915_PARAM_HAS_CONTEXT_FREQ_HINTS. This flag is true for all guc 
submission

enabled platforms as they use SLPC for frequency management.

The Mesa usage model for this flag is here -
https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint

v2: Rename flags as per review suggestions (Rodrigo, Tvrtko).
Also, use flag bits in intel_context as it allows finer control for
toggling per engine if needed (Tvrtko).

Cc: Rodrigo Vivi 
Cc: Tvrtko Ursulin 
Cc: Sushma Venkatesh Reddy 
Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gem/i915_gem_context.c   | 15 +++--
  .../gpu/drm/i915/gem/i915_gem_context_types.h |  1 +
  drivers/gpu/drm/i915/gt/intel_context_types.h |  1 +
  drivers/gpu/drm/i915/gt/intel_rps.c   |  5 +
  .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 +++
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 17 +++
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   |  1 +
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  6 ++
  drivers/gpu/drm/i915/i915_getparam.c  | 12 +++
  include/uapi/drm/i915_drm.h   | 15 +
  10 files changed, 92 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c

index dcbfe32fd30c..0799cb0b2803 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct 
drm_i915_file_private *fpriv,

 struct i915_gem_proto_context *pc,
 struct drm_i915_gem_context_param *args)
  {
+    struct drm_i915_private *i915 = fpriv->i915;
  int ret = 0;
    switch (args->param) {
@@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct 
drm_i915_file_private *fpriv,

  pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
  break;
  +    case I915_CONTEXT_PARAM_LOW_LATENCY:
+    if (intel_uc_uses_guc_submission(_gt(i915)->uc))
+    pc->user_flags |= BIT(UCONTEXT_LOW_LATENCY);
+    else
+    ret = -EINVAL;
+    break;
+
  case I915_CONTEXT_PARAM_RECOVERABLE:
  if (args->size)
  ret = -EINVAL;
@@ -992,6 +1000,9 @@ static int intel_context_set_gem(struct 
intel_context *ce,
  if (sseu.slice_mask && !WARN_ON(ce->engine->class != 
RENDER_CLASS))

  ret = intel_context_reconfigure_sseu(ce, sseu);
  +    if (test_bit(UCONTEXT_LOW_LATENCY, >user_flags))
+    set_bit(CONTEXT_LOW_LATENCY, >flags);


Does not need to be atomic so can use __set_bit as higher up in the 
function.

ok.



+
  return ret;
  }
  @@ -1630,6 +1641,8 @@ i915_gem_create_context(struct 
drm_i915_private *i915,

  if (vm)
  ctx->vm = vm;
  +    ctx->user_flags = pc->user_flags;
+


Given how most ctx->something assignments are at the bottom of the 
function I would stick a comment here saying along the lines of 
"assign early for intel_context_set_gem called when creating engines".

ok.



mutex_init(>engines_mutex);
  if (pc->num_user_engines >= 0) {
  i915_gem_context_set_user_engines(ctx);
@@ -1652,8 +1665,6 @@ i915_gem_create_context(struct drm_i915_private 
*i915,

   * is no remap info, it will be a NOP. */
  ctx->remap_slice = ALL_L3_SLICES(i915);
  -    ctx->user_flags = pc->user_flags;
-
  for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
  ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
  diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h

index 03bc7f9d191b..b6d97da63d1f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -338,6 +338,7 @@ struct i915_gem_context {
  #define UCONTEXT_BANNABLE    2
  #define UCONTEXT_RECOVERABLE    3
  #define UCONTEXT_PERSISTENCE    4
+#define UCONTEXT_LOW_LATENCY    5
    /**
   * @flags: small set of booleans
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h

index 7eccbd70d89f..ed95a7b57cbb 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ 

Re: [PATCH] drm/i915/guc: Add Compute context hint

2024-02-23 Thread Belgaumkar, Vinay



On 2/23/2024 12:51 AM, Tvrtko Ursulin wrote:


On 22/02/2024 23:31, Belgaumkar, Vinay wrote:


On 2/22/2024 7:32 AM, Tvrtko Ursulin wrote:


On 21/02/2024 21:28, Rodrigo Vivi wrote:

On Wed, Feb 21, 2024 at 09:42:34AM +, Tvrtko Ursulin wrote:


On 21/02/2024 00:14, Vinay Belgaumkar wrote:

Allow user to provide a context hint. When this is set, KMD will
send a hint to GuC which results in special handling for this
context. SLPC will ramp the GT frequency aggressively every time
it switches to this context. The down freq threshold will also be
lower so GuC will ramp down the GT freq for this context more 
slowly.
We also disable waitboost for this context as that will interfere 
with

the strategy.

We need to enable the use of Compute strategy during SLPC init, but
it will apply only to contexts that set this bit during context
creation.

Userland can check whether this feature is supported using a new 
param-
I915_PARAM_HAS_COMPUTE_CONTEXT. This flag is true for all guc 
submission

enabled platforms since they use SLPC for freq management.

The Mesa usage model for this flag is here -
https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint


This allows for setting it for the whole application, correct? 
Upsides,

downsides? Are there any plans for per context?


Currently there's no extension on a high level API 
(Vulkan/OpenGL/OpenCL/etc)
that would allow the application to hint for power/freq/latency. So 
Mesa cannot
decide when to hint. So their solution was to use .drirc and make 
per-application

decision.

I would prefer a high level extension for a more granular and 
informative

decision. We need to work with that goal, but for now I don't see any
cons on this approach.


In principle yeah I doesn't harm to have the option. I am just not 
sure how useful this intermediate step this is with its lack of 
intra-process granularity.



Cc: Rodrigo Vivi 
Signed-off-by: Vinay Belgaumkar 
---
   drivers/gpu/drm/i915/gem/i915_gem_context.c   |  8 +++
   .../gpu/drm/i915/gem/i915_gem_context_types.h |  1 +
   drivers/gpu/drm/i915/gt/intel_rps.c   |  8 +++
   .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 
+++
   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 17 
+++

   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   |  1 +
   .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  7 +++
   drivers/gpu/drm/i915/i915_getparam.c  | 11 ++
   include/uapi/drm/i915_drm.h   | 15 +
   9 files changed, 89 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c

index dcbfe32fd30c..ceab7dbe9b47 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct 
drm_i915_file_private *fpriv,

  struct i915_gem_proto_context *pc,
  struct drm_i915_gem_context_param *args)
   {
+    struct drm_i915_private *i915 = fpriv->i915;
   int ret = 0;
   switch (args->param) {
@@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct 
drm_i915_file_private *fpriv,

   pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
   break;
+    case I915_CONTEXT_PARAM_IS_COMPUTE:
+    if (!intel_uc_uses_guc_submission(_gt(i915)->uc))
+    ret = -EINVAL;
+    else
+    pc->user_flags |= BIT(UCONTEXT_COMPUTE);
+    break;
+
   case I915_CONTEXT_PARAM_RECOVERABLE:
   if (args->size)
   ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h

index 03bc7f9d191b..db86d6f6245f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -338,6 +338,7 @@ struct i915_gem_context {
   #define UCONTEXT_BANNABLE    2
   #define UCONTEXT_RECOVERABLE    3
   #define UCONTEXT_PERSISTENCE    4
+#define UCONTEXT_COMPUTE    5


What is the GuC behaviour when SLPC_CTX_FREQ_REQ_IS_COMPUTE is set 
for
non-compute engines? Wondering if per intel_context is what we 
want instead.

(Which could then be the i915_context_param_engines extension to mark
individual contexts as compute strategy.)


Perhaps we should rename this? This is a freq-decision-strategy inside
GuC that is there mostly targeting compute workloads that needs lower
latency with short burst execution. But the engine itself doesn't 
matter.

It can be applied to any engine.


I have no idea if it makes sense for other engines, such as video, 
and what would be pros and cons in terms of PnP. But in the case we 
end up allowing it on any engine, then at least userspace name 
shouldn't be compute. :)
Yes, one of the suggestions from Daniele was to have something along 
the lines of UCONTEXT_HIFREQ or something along those lines so we 
don't confu

Re: [PATCH] drm/i915/guc: Add Compute context hint

2024-02-22 Thread Belgaumkar, Vinay



On 2/22/2024 7:32 AM, Tvrtko Ursulin wrote:


On 21/02/2024 21:28, Rodrigo Vivi wrote:

On Wed, Feb 21, 2024 at 09:42:34AM +, Tvrtko Ursulin wrote:


On 21/02/2024 00:14, Vinay Belgaumkar wrote:

Allow user to provide a context hint. When this is set, KMD will
send a hint to GuC which results in special handling for this
context. SLPC will ramp the GT frequency aggressively every time
it switches to this context. The down freq threshold will also be
lower so GuC will ramp down the GT freq for this context more slowly.
We also disable waitboost for this context as that will interfere with
the strategy.

We need to enable the use of Compute strategy during SLPC init, but
it will apply only to contexts that set this bit during context
creation.

Userland can check whether this feature is supported using a new 
param-
I915_PARAM_HAS_COMPUTE_CONTEXT. This flag is true for all guc 
submission

enabled platforms since they use SLPC for freq management.

The Mesa usage model for this flag is here -
https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint


This allows for setting it for the whole application, correct? Upsides,
downsides? Are there any plans for per context?


Currently there's no extension on a high level API 
(Vulkan/OpenGL/OpenCL/etc)
that would allow the application to hint for power/freq/latency. So 
Mesa cannot
decide when to hint. So their solution was to use .drirc and make 
per-application

decision.

I would prefer a high level extension for a more granular and 
informative

decision. We need to work with that goal, but for now I don't see any
cons on this approach.


In principle yeah I doesn't harm to have the option. I am just not 
sure how useful this intermediate step this is with its lack of 
intra-process granularity.



Cc: Rodrigo Vivi 
Signed-off-by: Vinay Belgaumkar 
---
   drivers/gpu/drm/i915/gem/i915_gem_context.c   |  8 +++
   .../gpu/drm/i915/gem/i915_gem_context_types.h |  1 +
   drivers/gpu/drm/i915/gt/intel_rps.c   |  8 +++
   .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 
+++

   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 17 +++
   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   |  1 +
   .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  7 +++
   drivers/gpu/drm/i915/i915_getparam.c  | 11 ++
   include/uapi/drm/i915_drm.h   | 15 +
   9 files changed, 89 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c

index dcbfe32fd30c..ceab7dbe9b47 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct 
drm_i915_file_private *fpriv,

  struct i915_gem_proto_context *pc,
  struct drm_i915_gem_context_param *args)
   {
+    struct drm_i915_private *i915 = fpriv->i915;
   int ret = 0;
   switch (args->param) {
@@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct 
drm_i915_file_private *fpriv,

   pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
   break;
+    case I915_CONTEXT_PARAM_IS_COMPUTE:
+    if (!intel_uc_uses_guc_submission(_gt(i915)->uc))
+    ret = -EINVAL;
+    else
+    pc->user_flags |= BIT(UCONTEXT_COMPUTE);
+    break;
+
   case I915_CONTEXT_PARAM_RECOVERABLE:
   if (args->size)
   ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h

index 03bc7f9d191b..db86d6f6245f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -338,6 +338,7 @@ struct i915_gem_context {
   #define UCONTEXT_BANNABLE    2
   #define UCONTEXT_RECOVERABLE    3
   #define UCONTEXT_PERSISTENCE    4
+#define UCONTEXT_COMPUTE    5


What is the GuC behaviour when SLPC_CTX_FREQ_REQ_IS_COMPUTE is set for
non-compute engines? Wondering if per intel_context is what we want 
instead.

(Which could then be the i915_context_param_engines extension to mark
individual contexts as compute strategy.)


Perhaps we should rename this? This is a freq-decision-strategy inside
GuC that is there mostly targeting compute workloads that needs lower
latency with short burst execution. But the engine itself doesn't 
matter.

It can be applied to any engine.


I have no idea if it makes sense for other engines, such as video, and 
what would be pros and cons in terms of PnP. But in the case we end up 
allowing it on any engine, then at least userspace name shouldn't be 
compute. :)
Yes, one of the suggestions from Daniele was to have something along the 
lines of UCONTEXT_HIFREQ or something along those lines so we don't 
confuse it with the Compute Engine.


Or if we decide to call it compute and only apply to compute engines, 
then I would strongly 

Re: [PATCH] drm/i915/mtl: Wake GT before sending H2G message

2024-01-18 Thread Belgaumkar, Vinay



On 1/18/2024 3:50 PM, Matt Roper wrote:

On Thu, Jan 18, 2024 at 03:17:28PM -0800, Vinay Belgaumkar wrote:

Instead of waiting until the interrupt reaches GuC, we can grab a
forcewake while triggering the H2G interrupt. GEN11_GUC_HOST_INTERRUPT
is inside an "always on" domain with respect to RC6. However, there

A bit of a nitpick, but technically "always on" is a description of GT
register ranges that never get powered down.  GEN11_GUC_HOST_INTERRUPT
isn't inside the GT at all, but rather is an sgunit register and thus
isn't affected by forcewake.  This is just a special case where the
sgunit register forwards a message back to the GT's GuC, and the
workaround wants us to make sure the GT is awake before that message
gets there.

True, can modify the description to reflect this.



could be some delays when platform is entering/exiting some higher
level platform sleep states and a H2G is triggered. A forcewake
ensures those sleep states have been fully exited and further
processing occurs as expected.

Based on this description, is adding implicit forcewake to this register
really enough?  Implicit forcewake powers up before a read/write, but
also allows it to power back down as soon as the MMIO operation is
complete.  If the GuC is a bit slow to notice the interrupt, then we
could wind up with a sequence like

  - Driver grabs forcewake and GT powers up
  - Driver writes 0x1901f0 to trigger GuC interrupt
  - Driver releases forcewake and GT powers down
  - GuC notices interrupt (or maybe fails to notice it because the GT
powered down before it had a chance to process it?)

which I'm guessing isn't actually going to satisfy this workaround.  Do
we actually need to keep the GT awake not just through the register
operation, but also through the GuC's processing of the interrupt?  If
so, then we probably want to do an explicit forcewake get/put to ensure
the hardware stays powered up long enough.


The issue being addressed here is not GT entering C6, but the higher 
platform sleep states. Once we force wake GT while writing to the H2G 
register, that should bring us out of sleep. After clearing the 
forcewake (which would happen after the write for 0x1901f0 goes 
through), we still have C6 hysteresis and the hysteresis counters for 
the higher platform sleep states which should give GuC enough time to 
process the interrupt before we enter C6 and then subsequently these 
higher sleep states.


Thanks,

Vinay.




Matt


This will have an official WA soon so adding a FIXME in the comments.

Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/intel_uncore.c | 5 -
  1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
b/drivers/gpu/drm/i915/intel_uncore.c
index dfefad5a5fec..121458a31886 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1800,7 +1800,10 @@ static const struct intel_forcewake_range 
__mtl_fw_ranges[] = {
GEN_FW_RANGE(0x24000, 0x2, 0), /*
0x24000 - 0x2407f: always on
0x24080 - 0x2: reserved */
-   GEN_FW_RANGE(0x3, 0x3, FORCEWAKE_GT)
+   GEN_FW_RANGE(0x3, 0x3, FORCEWAKE_GT),
+   GEN_FW_RANGE(0x4, 0x1901ec, 0),
+   GEN_FW_RANGE(0x1901f0, 0x1901f0, FORCEWAKE_GT)
+   /* FIXME: WA to wake GT while triggering H2G */
  };
  
  /*

--
2.38.1



Re: [Intel-gfx] [PATCH v2 1/4] drm/i915: Enable Wa_16019325821

2023-12-13 Thread Belgaumkar, Vinay


On 10/27/2023 2:18 PM, john.c.harri...@intel.com wrote:

From: John Harrison

Some platforms require holding RCS context switches until CCS is idle
(the reverse w/a of Wa_14014475959). Some platforms require both
versions.

Signed-off-by: John Harrison
---
  drivers/gpu/drm/i915/gt/gen8_engine_cs.c  | 19 +++
  drivers/gpu/drm/i915/gt/intel_engine_types.h  |  7 ---
  drivers/gpu/drm/i915/gt/uc/intel_guc.c|  4 
  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |  3 ++-
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  7 ++-
  5 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c 
b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 86a04afff64b3..9cccd60a5c41d 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -743,21 +743,23 @@ static u32 *gen12_emit_preempt_busywait(struct 
i915_request *rq, u32 *cs)
  }
  
  /* Wa_14014475959:dg2 */

-#define CCS_SEMAPHORE_PPHWSP_OFFSET0x540
-static u32 ccs_semaphore_offset(struct i915_request *rq)
+/* Wa_16019325821 */
+#define HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET 0x540
+static u32 hold_switchout_semaphore_offset(struct i915_request *rq)
  {
return i915_ggtt_offset(rq->context->state) +
-   (LRC_PPHWSP_PN * PAGE_SIZE) + CCS_SEMAPHORE_PPHWSP_OFFSET;
+   (LRC_PPHWSP_PN * PAGE_SIZE) + 
HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET;
  }
  
  /* Wa_14014475959:dg2 */

-static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs)
+/* Wa_16019325821 */
+static u32 *hold_switchout_emit_wa_busywait(struct i915_request *rq, u32 *cs)
  {
int i;
  
  	*cs++ = MI_ATOMIC_INLINE | MI_ATOMIC_GLOBAL_GTT | MI_ATOMIC_CS_STALL |

MI_ATOMIC_MOVE;
-   *cs++ = ccs_semaphore_offset(rq);
+   *cs++ = hold_switchout_semaphore_offset(rq);
*cs++ = 0;
*cs++ = 1;
  
@@ -773,7 +775,7 @@ static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs)

MI_SEMAPHORE_POLL |
MI_SEMAPHORE_SAD_EQ_SDD;
*cs++ = 0;
-   *cs++ = ccs_semaphore_offset(rq);
+   *cs++ = hold_switchout_semaphore_offset(rq);
*cs++ = 0;
  
  	return cs;

@@ -790,8 +792,9 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, 
u32 *cs)
cs = gen12_emit_preempt_busywait(rq, cs);
  
  	/* Wa_14014475959:dg2 */

-   if (intel_engine_uses_wa_hold_ccs_switchout(rq->engine))
-   cs = ccs_emit_wa_busywait(rq, cs);
+   /* Wa_16019325821 */
+   if (intel_engine_uses_wa_hold_switchout(rq->engine))
+   cs = hold_switchout_emit_wa_busywait(rq, cs);
  
  	rq->tail = intel_ring_offset(rq, cs);

assert_ring_tail_valid(rq->ring, rq->tail);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 8769760257fd9..f08739d020332 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -584,7 +584,7 @@ struct intel_engine_cs {
  #define I915_ENGINE_HAS_RCS_REG_STATE  BIT(9)
  #define I915_ENGINE_HAS_EU_PRIORITYBIT(10)
  #define I915_ENGINE_FIRST_RENDER_COMPUTE BIT(11)
-#define I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT BIT(12)
+#define I915_ENGINE_USES_WA_HOLD_SWITCHOUT BIT(12)
unsigned int flags;
  
  	/*

@@ -694,10 +694,11 @@ intel_engine_has_relative_mmio(const struct 
intel_engine_cs * const engine)
  }
  
  /* Wa_14014475959:dg2 */

+/* Wa_16019325821 */
  static inline bool
-intel_engine_uses_wa_hold_ccs_switchout(struct intel_engine_cs *engine)
+intel_engine_uses_wa_hold_switchout(struct intel_engine_cs *engine)
  {
-   return engine->flags & I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
+   return engine->flags & I915_ENGINE_USES_WA_HOLD_SWITCHOUT;
  }
  
  #endif /* __INTEL_ENGINE_TYPES_H__ */

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 3f3df1166b860..0e6c160de3315 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -294,6 +294,10 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
IS_DG2(gt->i915))
flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
  
+	/* Wa_16019325821 */

+   if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)))
+   flags |= GUC_WA_RCS_CCS_SWITCHOUT;
+
/*
 * Wa_14012197797
 * Wa_22011391025
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 8ae1846431da7..48863188a130e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -96,8 +96,9 @@
  #define   GUC_WA_GAM_CREDITS  BIT(10)
  #define   GUC_WA_DUAL_QUEUE   BIT(11)
  #define   GUC_WA_RCS_RESET_BEFORE_RC6 BIT(13)
-#define   GUC_WA_CONTEXT_ISOLATION BIT(15)
  #define   GUC_WA_PRE_PARSER   BIT(14)
+#define   

Re: [Intel-gfx] [PATCH v2 3/4] drm/i915/guc: Enable Wa_14019159160

2023-12-13 Thread Belgaumkar, Vinay



On 10/27/2023 2:18 PM, john.c.harri...@intel.com wrote:

From: John Harrison 

Use the new w/a KLV support to enable a MTL w/a. Note, this w/a is a
super-set of Wa_16019325821, so requires turning that one as well as
setting the new flag for Wa_14019159160 itself.

Signed-off-by: John Harrison 
---
  drivers/gpu/drm/i915/gt/gen8_engine_cs.c  |  3 ++
  drivers/gpu/drm/i915/gt/intel_engine_types.h  |  1 +
  drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h |  7 
  drivers/gpu/drm/i915/gt/uc/intel_guc.c|  1 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c| 34 ++-
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  1 +
  6 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c 
b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 9cccd60a5c41d..359b21fb02ab2 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -744,6 +744,7 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request 
*rq, u32 *cs)
  
  /* Wa_14014475959:dg2 */

  /* Wa_16019325821 */
+/* Wa_14019159160 */
  #define HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET0x540
  static u32 hold_switchout_semaphore_offset(struct i915_request *rq)
  {
@@ -753,6 +754,7 @@ static u32 hold_switchout_semaphore_offset(struct 
i915_request *rq)
  
  /* Wa_14014475959:dg2 */

  /* Wa_16019325821 */
+/* Wa_14019159160 */
  static u32 *hold_switchout_emit_wa_busywait(struct i915_request *rq, u32 *cs)
  {
int i;
@@ -793,6 +795,7 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, 
u32 *cs)
  
  	/* Wa_14014475959:dg2 */

/* Wa_16019325821 */
+   /* Wa_14019159160 */
if (intel_engine_uses_wa_hold_switchout(rq->engine))
cs = hold_switchout_emit_wa_busywait(rq, cs);
  
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h

index f08739d020332..3b4993955a4b6 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -695,6 +695,7 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs 
* const engine)
  
  /* Wa_14014475959:dg2 */

  /* Wa_16019325821 */
+/* Wa_14019159160 */
  static inline bool
  intel_engine_uses_wa_hold_switchout(struct intel_engine_cs *engine)
  {
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
index 58012edd4eb0e..bebf28e3c4794 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
@@ -101,4 +101,11 @@ enum {
GUC_CONTEXT_POLICIES_KLV_NUM_IDS = 5,
  };
  
+/*

+ * Workaround keys:
+ */
+enum {
+   GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE   = 
0x9001,
+};
+
  #endif /* _ABI_GUC_KLVS_ABI_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 0e6c160de3315..6252f32d67011 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -295,6 +295,7 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
  
  	/* Wa_16019325821 */

+   /* Wa_14019159160 */
if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)))
flags |= GUC_WA_RCS_CCS_SWITCHOUT;
  
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c

index 251e7a7a05cb8..8f7298cbbc322 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -810,6 +810,25 @@ guc_capture_prep_lists(struct intel_guc *guc)
return PAGE_ALIGN(total_size);
  }
  
+/* Wa_14019159160 */

+static u32 guc_waklv_ra_mode(struct intel_guc *guc, u32 offset, u32 remain)
+{
+   u32 size;
+   u32 klv_entry[] = {
+   /* 16:16 key/length */
+   FIELD_PREP(GUC_KLV_0_KEY, 
GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE) |
+   FIELD_PREP(GUC_KLV_0_LEN, 0),
+   /* 0 dwords data */
+   };
+
+   size = sizeof(klv_entry);
+   GEM_BUG_ON(remain < size);
+
+   iosys_map_memcpy_to(>ads_map, offset, klv_entry, size);
+
+   return size;
+}
+
  static void guc_waklv_init(struct intel_guc *guc)
  {
struct intel_gt *gt = guc_to_gt(guc);
@@ -825,15 +844,12 @@ static void guc_waklv_init(struct intel_guc *guc)
offset = guc_ads_waklv_offset(guc);
remain = guc_ads_waklv_size(guc);
  
-	/*

-* Add workarounds here:
-*
-* if (want_wa_) {
-*  size = guc_waklv_(guc, offset, remain);
-*  offset += size;
-*  remain -= size;
-* }
-*/
+   /* Wa_14019159160 */
+   if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) {
+   size = guc_waklv_ra_mode(guc, offset, remain);
+   offset += size;
+   remain -= size;
+   }
  
  	size = 

Re: [Intel-gfx] [PATCH v2 2/4] drm/i915/guc: Add support for w/a KLVs

2023-12-13 Thread Belgaumkar, Vinay



On 10/27/2023 2:18 PM, john.c.harri...@intel.com wrote:

From: John Harrison 

To prevent running out of bits, new w/a enable flags are being added
via a KLV system instead of a 32 bit flags word.

Signed-off-by: John Harrison 
---
  .../gpu/drm/i915/gt/uc/abi/guc_errors_abi.h   |  1 +
  drivers/gpu/drm/i915/gt/uc/intel_guc.h|  2 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c| 73 ++-
  drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c |  6 ++
  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |  5 +-
  5 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
index dabeaf4f245f3..00d6402333f8e 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
@@ -36,6 +36,7 @@ enum intel_guc_load_status {
INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START,
INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID = 0x73,
INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID   = 0x74,
+   INTEL_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR= 0x75,
INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END,
  
  	INTEL_GUC_LOAD_STATUS_READY= 0xF0,

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 2b6dfe62c8f2a..4113776ff3e19 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -198,6 +198,8 @@ struct intel_guc {
struct guc_mmio_reg *ads_regset;
/** @ads_golden_ctxt_size: size of the golden contexts in the ADS */
u32 ads_golden_ctxt_size;
+   /** @ads_waklv_size: size of workaround KLVs */
+   u32 ads_waklv_size;
/** @ads_capture_size: size of register lists in the ADS used for error 
capture */
u32 ads_capture_size;
/** @ads_engine_usage_size: size of engine usage in the ADS */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 63724e17829a7..251e7a7a05cb8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -46,6 +46,10 @@
   *  +---+
   *  | padding   |
   *  +---+ <== 4K aligned
+ *  | w/a KLVs  |
+ *  +---+
+ *  | padding   |
+ *  +---+ <== 4K aligned
   *  | capture lists |
   *  +---+
   *  | padding   |
@@ -88,6 +92,11 @@ static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc)
return PAGE_ALIGN(guc->ads_golden_ctxt_size);
  }
  
+static u32 guc_ads_waklv_size(struct intel_guc *guc)

+{
+   return PAGE_ALIGN(guc->ads_waklv_size);
+}
+
  static u32 guc_ads_capture_size(struct intel_guc *guc)
  {
return PAGE_ALIGN(guc->ads_capture_size);
@@ -113,7 +122,7 @@ static u32 guc_ads_golden_ctxt_offset(struct intel_guc *guc)
return PAGE_ALIGN(offset);
  }
  
-static u32 guc_ads_capture_offset(struct intel_guc *guc)

+static u32 guc_ads_waklv_offset(struct intel_guc *guc)
  {
u32 offset;
  
@@ -123,6 +132,16 @@ static u32 guc_ads_capture_offset(struct intel_guc *guc)

return PAGE_ALIGN(offset);
  }
  
+static u32 guc_ads_capture_offset(struct intel_guc *guc)

+{
+   u32 offset;
+
+   offset = guc_ads_waklv_offset(guc) +
+guc_ads_waklv_size(guc);
+
+   return PAGE_ALIGN(offset);
+}
+
  static u32 guc_ads_private_data_offset(struct intel_guc *guc)
  {
u32 offset;
@@ -791,6 +810,49 @@ guc_capture_prep_lists(struct intel_guc *guc)
return PAGE_ALIGN(total_size);
  }
  
+static void guc_waklv_init(struct intel_guc *guc)

+{
+   struct intel_gt *gt = guc_to_gt(guc);
+   u32 offset, addr_ggtt, remain, size;
+
+   if (!intel_uc_uses_guc_submission(>uc))
+   return;
+
+   if (GUC_FIRMWARE_VER(guc) < MAKE_GUC_VER(70, 10, 0))
+   return;
+
+   GEM_BUG_ON(iosys_map_is_null(>ads_map));
+   offset = guc_ads_waklv_offset(guc);
+   remain = guc_ads_waklv_size(guc);
+
+   /*
+* Add workarounds here:
+*
+* if (want_wa_) {
+*  size = guc_waklv_(guc, offset, remain);
+*  offset += size;
+*  remain -= size;
+* }
+*/
+
+   size = guc_ads_waklv_size(guc) - remain;
+   if (!size)
+   return;
+
+   offset = guc_ads_waklv_offset(guc);
+   addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
+
+   ads_blob_write(guc, ads.wa_klv_addr_lo, addr_ggtt);
+   ads_blob_write(guc, ads.wa_klv_addr_hi, 0);
+   ads_blob_write(guc, 

Re: [Intel-gfx] [PATCH] drm/i915: Read a shadowed mmio register for ggtt flush

2023-11-09 Thread Belgaumkar, Vinay



On 11/9/2023 12:35 PM, Ville Syrjälä wrote:

On Thu, Nov 09, 2023 at 12:01:26PM -0800, Belgaumkar, Vinay wrote:

On 11/9/2023 11:30 AM, Ville Syrjälä wrote:

On Thu, Nov 09, 2023 at 11:21:48AM -0800, Vinay Belgaumkar wrote:

We read RENDER_HEAD as a part of the flush. If GT is in
deeper sleep states, this could lead to read errors since we are
not using a forcewake. Safer to read a shadowed register instead.

IIRC shadowing is only thing for writes, not reads.

Sure, but reading from a shadowed register does return the cached value

Does it? I suppose that would make some sense, but I don't recall that
ever being stated anywhere. At least before the shadow registers
existed reads would just give you zeroes when not awake.


(even though we don't care about the vakue here). When GT is in deeper
sleep states, it is better to read a shadowed (cached) value instead of
trying to attempt an mmio register read without a force wake anyways.

So you're saying reads from non-shadowed registers fails somehow
when not awake? How exactly do they fail? And when reading from
a shadowed register that failure never happens?


We could hit problems like the one being addressed here - 
https://patchwork.freedesktop.org/series/125356/.  Reading from a 
shadowed register will avoid any needless references(without a wake) to 
the MMIO space. Shouldn't hurt to make this change for all gens IMO.


Thanks,

Vinay.




Thanks,

Vinay.


Cc: John Harrison 
Cc: Daniele Ceraolo Spurio 
Signed-off-by: Vinay Belgaumkar 
---
   drivers/gpu/drm/i915/gt/intel_gt.c | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index ed32bf5b1546..ea814ea5f700 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -451,7 +451,7 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
   
   		spin_lock_irqsave(>lock, flags);

intel_uncore_posting_read_fw(uncore,
-RING_HEAD(RENDER_RING_BASE));
+RING_TAIL(RENDER_RING_BASE));
spin_unlock_irqrestore(>lock, flags);
}
   }
--
2.38.1


Re: [Intel-gfx] [PATCH] drm/i915: Read a shadowed mmio register for ggtt flush

2023-11-09 Thread Belgaumkar, Vinay



On 11/9/2023 11:30 AM, Ville Syrjälä wrote:

On Thu, Nov 09, 2023 at 11:21:48AM -0800, Vinay Belgaumkar wrote:

We read RENDER_HEAD as a part of the flush. If GT is in
deeper sleep states, this could lead to read errors since we are
not using a forcewake. Safer to read a shadowed register instead.

IIRC shadowing is only thing for writes, not reads.


Sure, but reading from a shadowed register does return the cached value 
(even though we don't care about the vakue here). When GT is in deeper 
sleep states, it is better to read a shadowed (cached) value instead of 
trying to attempt an mmio register read without a force wake anyways.


Thanks,

Vinay.




Cc: John Harrison 
Cc: Daniele Ceraolo Spurio 
Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/intel_gt.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index ed32bf5b1546..ea814ea5f700 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -451,7 +451,7 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
  
  		spin_lock_irqsave(>lock, flags);

intel_uncore_posting_read_fw(uncore,
-RING_HEAD(RENDER_RING_BASE));
+RING_TAIL(RENDER_RING_BASE));
spin_unlock_irqrestore(>lock, flags);
}
  }
--
2.38.1


Re: [Intel-gfx] [PATCH] drm/i915/mtl: Don't set PIPE_CONTROL_FLUSH_L3

2023-10-16 Thread Belgaumkar, Vinay



On 10/16/2023 4:24 PM, John Harrison wrote:

On 10/16/2023 15:55, Vinay Belgaumkar wrote:

This bit does not cause an explicit L3 flush. We already use
At all? Or only on newer hardware? And as a genuine spec change or as 
a bug / workaround?


If the hardware has re-purposed the bit then it is probably worth at 
least adding a comment to the bit definition to say that it is only 
valid up to IP version 12.70.
At this point, this is a bug on MTL since this bit is not related to L3 
flushes as per spec. Regarding older platforms, still checking the 
reason why this was added (i.e if it fixed something and will regress if 
removed). If not, we can extend the change for others as well in a 
separate patch. On older platforms, this bit seems to cause an implicit 
flush at best.



PIPE_CONTROL_DC_FLUSH_ENABLE for that purpose.

Cc: Nirmoy Das 
Cc: Mikka Kuoppala 
Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 8 ++--
  1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c 
b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c

index ba4c2422b340..abbc02f3e66e 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -247,6 +247,7 @@ static int mtl_dummy_pipe_control(struct 
i915_request *rq)

  int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
  {
  struct intel_engine_cs *engine = rq->engine;
+    struct intel_gt *gt = rq->engine->gt;
    /*
   * On Aux CCS platforms the invalidation of the Aux
@@ -278,7 +279,8 @@ int gen12_emit_flush_rcs(struct i915_request *rq, 
u32 mode)

   * deals with Protected Memory which is not needed for
   * AUX CCS invalidation and lead to unwanted side effects.
   */
-    if (mode & EMIT_FLUSH)
+    if ((mode & EMIT_FLUSH) &&
+    !(IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71
Why stop at 12.71? Is the meaning only changed for 12.70 and the 
old/correct version will be restored in later hardware?


Was trying to keep this limited to MTL for now until the above 
statements are verified.


Thanks,

Vinay.



John.



  bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
    bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH;
@@ -812,12 +814,14 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct 
i915_request *rq, u32 *cs)

  u32 flags = (PIPE_CONTROL_CS_STALL |
   PIPE_CONTROL_TLB_INVALIDATE |
   PIPE_CONTROL_TILE_CACHE_FLUSH |
- PIPE_CONTROL_FLUSH_L3 |
   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
   PIPE_CONTROL_DC_FLUSH_ENABLE |
   PIPE_CONTROL_FLUSH_ENABLE);
  +    if (!(IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71
+    flags |= PIPE_CONTROL_FLUSH_L3;
+
  /* Wa_14016712196 */
  if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || 
IS_DG2(i915))

  /* dummy PIPE_CONTROL + depth flush */




Re: [Intel-gfx] [PATCH 3/4] drm/i915/guc: Add support for w/a KLVs

2023-10-06 Thread Belgaumkar, Vinay



On 9/15/2023 2:55 PM, john.c.harri...@intel.com wrote:

From: John Harrison 

To prevent running out of bits, new w/a enable flags are being added
via a KLV system instead of a 32 bit flags word.

Signed-off-by: John Harrison 
---
  .../gpu/drm/i915/gt/uc/abi/guc_errors_abi.h   |  1 +
  drivers/gpu/drm/i915/gt/uc/intel_guc.h|  3 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c| 64 ++-
  drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c |  6 ++
  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |  5 +-
  5 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
index dabeaf4f245f3..00d6402333f8e 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
@@ -36,6 +36,7 @@ enum intel_guc_load_status {
INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START,
INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID = 0x73,
INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID   = 0x74,
+   INTEL_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR= 0x75,
INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END,
  
  	INTEL_GUC_LOAD_STATUS_READY= 0xF0,

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 6c392bad29c19..3b1fc5f96306b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -186,6 +186,8 @@ struct intel_guc {
struct guc_mmio_reg *ads_regset;
/** @ads_golden_ctxt_size: size of the golden contexts in the ADS */
u32 ads_golden_ctxt_size;
+   /** @ads_waklv_size: size of workaround KLVs */
+   u32 ads_waklv_size;
/** @ads_capture_size: size of register lists in the ADS used for error 
capture */
u32 ads_capture_size;
/** @ads_engine_usage_size: size of engine usage in the ADS */
@@ -295,6 +297,7 @@ struct intel_guc {
  #define MAKE_GUC_VER(maj, min, pat)   (((maj) << 16) | ((min) << 8) | (pat))
  #define MAKE_GUC_VER_STRUCT(ver)  MAKE_GUC_VER((ver).major, (ver).minor, 
(ver).patch)
  #define GUC_SUBMIT_VER(guc)   
MAKE_GUC_VER_STRUCT((guc)->submission_version)
+#define GUC_FIRMWARE_VER(guc)  
MAKE_GUC_VER_STRUCT((guc)->fw.file_selected.ver)
  
  static inline struct intel_guc *log_to_guc(struct intel_guc_log *log)

  {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 63724e17829a7..792910af3a481 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -46,6 +46,10 @@
   *  +---+
   *  | padding   |
   *  +---+ <== 4K aligned
+ *  | w/a KLVs  |
+ *  +---+
+ *  | padding   |
+ *  +---+ <== 4K aligned
   *  | capture lists |
   *  +---+
   *  | padding   |
@@ -88,6 +92,11 @@ static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc)
return PAGE_ALIGN(guc->ads_golden_ctxt_size);
  }
  
+static u32 guc_ads_waklv_size(struct intel_guc *guc)

+{
+   return PAGE_ALIGN(guc->ads_waklv_size);
+}
+
  static u32 guc_ads_capture_size(struct intel_guc *guc)
  {
return PAGE_ALIGN(guc->ads_capture_size);
@@ -113,7 +122,7 @@ static u32 guc_ads_golden_ctxt_offset(struct intel_guc *guc)
return PAGE_ALIGN(offset);
  }
  
-static u32 guc_ads_capture_offset(struct intel_guc *guc)

+static u32 guc_ads_waklv_offset(struct intel_guc *guc)
  {
u32 offset;
  
@@ -123,6 +132,16 @@ static u32 guc_ads_capture_offset(struct intel_guc *guc)

return PAGE_ALIGN(offset);
  }
  
+static u32 guc_ads_capture_offset(struct intel_guc *guc)

+{
+   u32 offset;
+
+   offset = guc_ads_waklv_offset(guc) +
+guc_ads_waklv_size(guc);
+
+   return PAGE_ALIGN(offset);
+}
+
  static u32 guc_ads_private_data_offset(struct intel_guc *guc)
  {
u32 offset;
@@ -791,6 +810,40 @@ guc_capture_prep_lists(struct intel_guc *guc)
return PAGE_ALIGN(total_size);
  }
  
+static void guc_waklv_init(struct intel_guc *guc)

+{
+   struct intel_gt *gt = guc_to_gt(guc);
+   u32 offset, addr_ggtt, remain, size;
+
+   if (!intel_uc_uses_guc_submission(>uc))
+   return;
+
+   if (GUC_FIRMWARE_VER(guc) < MAKE_GUC_VER(70, 10, 0))
+   return;

should this be <= ?

+
+   GEM_BUG_ON(iosys_map_is_null(>ads_map));
+   offset = guc_ads_waklv_offset(guc);
+   remain = guc_ads_waklv_size(guc);
+
+   /* Add workarounds here */
+

extra blank line?

+   size = guc_ads_waklv_size(guc) - remain;

Re: [Intel-gfx] [PATCH 2/4] drm/i915: Enable Wa_16019325821

2023-10-06 Thread Belgaumkar, Vinay



On 9/15/2023 2:55 PM, john.c.harri...@intel.com wrote:

From: John Harrison 

Some platforms require holding RCS context switches until CCS is idle
(the reverse w/a of Wa_14014475959). Some platforms require both
versions.

Signed-off-by: John Harrison 
---
  drivers/gpu/drm/i915/gt/gen8_engine_cs.c  | 19 +++
  drivers/gpu/drm/i915/gt/intel_engine_types.h  |  7 ---
  drivers/gpu/drm/i915/gt/uc/intel_guc.c|  4 
  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |  3 ++-
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  8 +++-
  5 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c 
b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 0143445dba830..8b494825c55f2 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -733,21 +733,23 @@ static u32 *gen12_emit_preempt_busywait(struct 
i915_request *rq, u32 *cs)
  }
  
  /* Wa_14014475959:dg2 */

-#define CCS_SEMAPHORE_PPHWSP_OFFSET0x540
-static u32 ccs_semaphore_offset(struct i915_request *rq)
+/* Wa_16019325821 */
+#define HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET 0x540
+static u32 hold_switchout_semaphore_offset(struct i915_request *rq)
  {
return i915_ggtt_offset(rq->context->state) +
-   (LRC_PPHWSP_PN * PAGE_SIZE) + CCS_SEMAPHORE_PPHWSP_OFFSET;
+   (LRC_PPHWSP_PN * PAGE_SIZE) + 
HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET;
  }
  
  /* Wa_14014475959:dg2 */

-static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs)
+/* Wa_16019325821 */
+static u32 *hold_switchout_emit_wa_busywait(struct i915_request *rq, u32 *cs)
  {
int i;
  
  	*cs++ = MI_ATOMIC_INLINE | MI_ATOMIC_GLOBAL_GTT | MI_ATOMIC_CS_STALL |

MI_ATOMIC_MOVE;
-   *cs++ = ccs_semaphore_offset(rq);
+   *cs++ = hold_switchout_semaphore_offset(rq);
*cs++ = 0;
*cs++ = 1;
  
@@ -763,7 +765,7 @@ static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs)

MI_SEMAPHORE_POLL |
MI_SEMAPHORE_SAD_EQ_SDD;
*cs++ = 0;
-   *cs++ = ccs_semaphore_offset(rq);
+   *cs++ = hold_switchout_semaphore_offset(rq);
*cs++ = 0;
  
  	return cs;

@@ -780,8 +782,9 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, 
u32 *cs)
cs = gen12_emit_preempt_busywait(rq, cs);
  
  	/* Wa_14014475959:dg2 */

-   if (intel_engine_uses_wa_hold_ccs_switchout(rq->engine))
-   cs = ccs_emit_wa_busywait(rq, cs);
+   /* Wa_16019325821 */
+   if (intel_engine_uses_wa_hold_switchout(rq->engine))
+   cs = hold_switchout_emit_wa_busywait(rq, cs);
  
  	rq->tail = intel_ring_offset(rq, cs);

assert_ring_tail_valid(rq->ring, rq->tail);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index a7e6775980043..68fe1cef9cd94 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -573,7 +573,7 @@ struct intel_engine_cs {
  #define I915_ENGINE_HAS_RCS_REG_STATE  BIT(9)
  #define I915_ENGINE_HAS_EU_PRIORITYBIT(10)
  #define I915_ENGINE_FIRST_RENDER_COMPUTE BIT(11)
-#define I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT BIT(12)
+#define I915_ENGINE_USES_WA_HOLD_SWITCHOUT BIT(12)
unsigned int flags;
  
  	/*

@@ -683,10 +683,11 @@ intel_engine_has_relative_mmio(const struct 
intel_engine_cs * const engine)
  }
  
  /* Wa_14014475959:dg2 */

+/* Wa_16019325821 */
  static inline bool
-intel_engine_uses_wa_hold_ccs_switchout(struct intel_engine_cs *engine)
+intel_engine_uses_wa_hold_switchout(struct intel_engine_cs *engine)
  {
-   return engine->flags & I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
+   return engine->flags & I915_ENGINE_USES_WA_HOLD_SWITCHOUT;
  }
  
  #endif /* __INTEL_ENGINE_TYPES_H__ */

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 27df41c53b890..4001679ba0793 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -294,6 +294,10 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
IS_DG2(gt->i915))
flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
  
+	/* Wa_16019325821 */

+   if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)))
+   flags |= GUC_WA_RCS_CCS_SWITCHOUT;
+
/*
 * Wa_14012197797
 * Wa_22011391025
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index b4d56eccfb1f0..f97af0168a66b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -95,8 +95,9 @@
  #define   GUC_WA_GAM_CREDITS  BIT(10)
  #define   GUC_WA_DUAL_QUEUE   BIT(11)
  #define   GUC_WA_RCS_RESET_BEFORE_RC6 BIT(13)
-#define   GUC_WA_CONTEXT_ISOLATION BIT(15)
  #define   GUC_WA_PRE_PARSER   BIT(14)
+#define   

Re: [Intel-gfx] [PATCH 2/2] drm/i915/guc: Enable WA 14018913170

2023-10-05 Thread Belgaumkar, Vinay



On 9/14/2023 3:28 PM, john.c.harri...@intel.com wrote:

From: Daniele Ceraolo Spurio 

The GuC handles the WA, the KMD just needs to set the flag to enable
it on the appropriate platforms.

Signed-off-by: John Harrison 
Signed-off-by: Daniele Ceraolo Spurio 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc.c  | 6 ++
  drivers/gpu/drm/i915/gt/uc/intel_guc.h  | 1 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 1 +
  3 files changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 27df41c53b890..3f3df1166b860 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -319,6 +319,12 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
if (!RCS_MASK(gt))
flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST;
  
+	/* Wa_14018913170 */

+   if (GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 7, 0)) {
+   if (IS_DG2(gt->i915) || IS_METEORLAKE(gt->i915) || 
IS_PONTEVECCHIO(gt->i915))
+   flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6;
+   }
+
return flags;


LGTM,

Reviewed-by: Vinay Belgaumkar 


  }
  
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h

index 6c392bad29c19..818c8c146fd47 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -295,6 +295,7 @@ struct intel_guc {
  #define MAKE_GUC_VER(maj, min, pat)   (((maj) << 16) | ((min) << 8) | (pat))
  #define MAKE_GUC_VER_STRUCT(ver)  MAKE_GUC_VER((ver).major, (ver).minor, 
(ver).patch)
  #define GUC_SUBMIT_VER(guc)   
MAKE_GUC_VER_STRUCT((guc)->submission_version)
+#define GUC_FIRMWARE_VER(guc)  
MAKE_GUC_VER_STRUCT((guc)->fw.file_selected.ver)
  
  static inline struct intel_guc *log_to_guc(struct intel_guc_log *log)

  {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index b4d56eccfb1f0..123ad75d2eb28 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -100,6 +100,7 @@
  #define   GUC_WA_HOLD_CCS_SWITCHOUT   BIT(17)
  #define   GUC_WA_POLLCS   BIT(18)
  #define   GUC_WA_RCS_REGS_IN_CCS_REGS_LISTBIT(21)
+#define   GUC_WA_ENABLE_TSC_CHECK_ON_RC6   BIT(22)
  
  #define GUC_CTL_FEATURE			2

  #define   GUC_CTL_ENABLE_SLPC BIT(2)


Re: [Intel-gfx] [PATCH] drm/i915/gem: Allow users to disable waitboost

2023-09-27 Thread Belgaumkar, Vinay



On 9/21/2023 3:41 AM, Tvrtko Ursulin wrote:


On 20/09/2023 22:56, Vinay Belgaumkar wrote:

Provide a bit to disable waitboost while waiting on a gem object.
Waitboost results in increased power consumption by requesting RP0
while waiting for the request to complete. Add a bit in the gem_wait()
IOCTL where this can be disabled.

This is related to the libva API change here -
Link: 
https://github.com/XinfengZhang/libva/commit/3d90d18c67609a73121bb71b20ee4776b54b61a7


This link does not appear to lead to userspace code using this uapi?

We have asked Carl (cc'd) to post a patch for the same.




Cc: Rodrigo Vivi 
Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gem/i915_gem_wait.c | 9 ++---
  drivers/gpu/drm/i915/i915_request.c  | 3 ++-
  drivers/gpu/drm/i915/i915_request.h  | 1 +
  include/uapi/drm/i915_drm.h  | 1 +
  4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c 
b/drivers/gpu/drm/i915/gem/i915_gem_wait.c

index d4b918fb11ce..955885ec859d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -72,7 +72,8 @@ i915_gem_object_wait_reservation(struct dma_resv 
*resv,

  struct dma_fence *fence;
  long ret = timeout ?: 1;
  -    i915_gem_object_boost(resv, flags);
+    if (!(flags & I915_WAITBOOST_DISABLE))
+    i915_gem_object_boost(resv, flags);
    dma_resv_iter_begin(, resv,
  dma_resv_usage_rw(flags & I915_WAIT_ALL));
@@ -236,7 +237,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void 
*data, struct drm_file *file)

  ktime_t start;
  long ret;
  -    if (args->flags != 0)
+    if (args->flags != 0 || args->flags != I915_GEM_WAITBOOST_DISABLE)
  return -EINVAL;
    obj = i915_gem_object_lookup(file, args->bo_handle);
@@ -248,7 +249,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void 
*data, struct drm_file *file)

  ret = i915_gem_object_wait(obj,
 I915_WAIT_INTERRUPTIBLE |
 I915_WAIT_PRIORITY |
-   I915_WAIT_ALL,
+   I915_WAIT_ALL |
+   (args->flags & I915_GEM_WAITBOOST_DISABLE ?
+    I915_WAITBOOST_DISABLE : 0),
 to_wait_timeout(args->timeout_ns));
    if (args->timeout_ns > 0) {
diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c

index f59081066a19..2957409b4b2a 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -2044,7 +2044,8 @@ long i915_request_wait_timeout(struct 
i915_request *rq,

   * but at a cost of spending more power processing the workload
   * (bad for battery).
   */
-    if (flags & I915_WAIT_PRIORITY && !i915_request_started(rq))
+    if (!(flags & I915_WAITBOOST_DISABLE) && (flags & 
I915_WAIT_PRIORITY) &&

+    !i915_request_started(rq))
  intel_rps_boost(rq);
    wait.tsk = current;
diff --git a/drivers/gpu/drm/i915/i915_request.h 
b/drivers/gpu/drm/i915/i915_request.h

index 0ac55b2e4223..3cc00e8254dc 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -445,6 +445,7 @@ long i915_request_wait(struct i915_request *rq,
  #define I915_WAIT_INTERRUPTIBLE    BIT(0)
  #define I915_WAIT_PRIORITY    BIT(1) /* small priority bump for the 
request */
  #define I915_WAIT_ALL    BIT(2) /* used by 
i915_gem_object_wait() */
+#define I915_WAITBOOST_DISABLE    BIT(3) /* used by 
i915_gem_object_wait() */

    void i915_request_show(struct drm_printer *m,
 const struct i915_request *rq,
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7000e5910a1d..4adee70e39cf 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1928,6 +1928,7 @@ struct drm_i915_gem_wait {
  /** Handle of BO we shall wait on */
  __u32 bo_handle;
  __u32 flags;
+#define I915_GEM_WAITBOOST_DISABLE  (1u<<0)


Probably would be good to avoid mentioning waitboost in the uapi since 
so far it wasn't an explicit feature/contract. Something like 
I915_GEM_WAIT_BACKGROUND_PRIORITY? Low priority?

sure.


I also wonder if there could be a possible angle to help Rob (+cc) 
upstream the syncobj/fence deadline code if our media driver might 
make use of that somehow.


Like if either we could wire up the deadline into GEM_WAIT (in a 
backward compatible manner), or if media could use sync fd wait 
instead. Assuming they have an out fence already, which may not be true.


Makes sense. We could add a SET_DEADLINE flag or something similar and 
pass in the deadline when appropriate.


Thanks,

Vinay.



Regards,

Tvrtko


  /** Number of nanoseconds to wait, Returns time remaining. */
  __s64 timeout_ns;
  };


Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Restore efficient freq earlier

2023-07-21 Thread Belgaumkar, Vinay



On 7/21/2023 3:08 PM, Belgaumkar, Vinay wrote:


On 7/21/2023 2:23 PM, Rodrigo Vivi wrote:

On Fri, Jul 21, 2023 at 01:44:34PM -0700, Belgaumkar, Vinay wrote:

On 7/21/2023 1:41 PM, Rodrigo Vivi wrote:

On Fri, Jul 21, 2023 at 11:03:49AM -0700, Vinay Belgaumkar wrote:

This should be done before the soft min/max frequencies are restored.
When we disable the "Ignore efficient frequency" flag, GuC does not
actually bring the requested freq down to RPn.

Specifically, this scenario-

- ignore efficient freq set to true
- reduce min to RPn (from efficient)
- suspend
- resume (includes GuC load, restore soft min/max, restore 
efficient freq)

- validate min freq has been resored to RPn

This will fail if we didn't first restore(disable, in this case) 
efficient

freq flag before setting the soft min frequency.
that's strange. so guc is returning the rpe when we request the min 
freq

during the soft config?

we could alternatively change the soft config to actually get the min
and not be tricked by this.

But also the patch below doesn't hurt.

Reviewed-by: Rodrigo Vivi 
(Although I'm still curious and want to understand exactly why
the soft min gets messed up when we don't tell guc to ignore the
efficient freq beforehand. Please help me to understand.)
The soft min does not get messed up, but GuC keeps requesting RPe 
even after

disabling efficient freq. (unless we manually set min freq to RPn AFTER
disabling efficient).
so it looks to me that the right solution would be to ensure that 
everytime
that we disable the efficient freq we make sure to also set the mim 
freq to RPn,

no?!


Hmm, may not be applicable every time. What if someone disables 
efficient frequency while running a workload or with frequency fixed 
to 800, for example?


I'll take that back, it should not matter. GuC will not change it's 
request just because we switched min lower. I will resend the patch with 
the min setting as well.


Thanks,

Vinay.



Thanks,

Vinay.




Thanks,

Vinay.




Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8736
Fixes: 55f9720dbf23 ("drm/i915/guc/slpc: Provide sysfs for 
efficient freq")

Signed-off-by: Vinay Belgaumkar 
---
   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 6 +++---
   1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c

index ee9f83af7cf6..f16dff7c3185 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -743,6 +743,9 @@ int intel_guc_slpc_enable(struct 
intel_guc_slpc *slpc)

   intel_guc_pm_intrmsk_enable(slpc_to_gt(slpc));
+    /* Set cached value of ignore efficient freq */
+    intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq);
+
   slpc_get_rp_values(slpc);
   /* Handle the case where min=max=RPmax */
@@ -765,9 +768,6 @@ int intel_guc_slpc_enable(struct 
intel_guc_slpc *slpc)

   /* Set cached media freq ratio mode */
   intel_guc_slpc_set_media_ratio_mode(slpc, 
slpc->media_ratio_mode);

-    /* Set cached value of ignore efficient freq */
-    intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq);
-
   return 0;
   }
--
2.38.1



Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Restore efficient freq earlier

2023-07-21 Thread Belgaumkar, Vinay



On 7/21/2023 2:23 PM, Rodrigo Vivi wrote:

On Fri, Jul 21, 2023 at 01:44:34PM -0700, Belgaumkar, Vinay wrote:

On 7/21/2023 1:41 PM, Rodrigo Vivi wrote:

On Fri, Jul 21, 2023 at 11:03:49AM -0700, Vinay Belgaumkar wrote:

This should be done before the soft min/max frequencies are restored.
When we disable the "Ignore efficient frequency" flag, GuC does not
actually bring the requested freq down to RPn.

Specifically, this scenario-

- ignore efficient freq set to true
- reduce min to RPn (from efficient)
- suspend
- resume (includes GuC load, restore soft min/max, restore efficient freq)
- validate min freq has been resored to RPn

This will fail if we didn't first restore(disable, in this case) efficient
freq flag before setting the soft min frequency.

that's strange. so guc is returning the rpe when we request the min freq
during the soft config?

we could alternatively change the soft config to actually get the min
and not be tricked by this.

But also the patch below doesn't hurt.

Reviewed-by: Rodrigo Vivi 
(Although I'm still curious and want to understand exactly why
the soft min gets messed up when we don't tell guc to ignore the
efficient freq beforehand. Please help me to understand.)

The soft min does not get messed up, but GuC keeps requesting RPe even after
disabling efficient freq. (unless we manually set min freq to RPn AFTER
disabling efficient).

so it looks to me that the right solution would be to ensure that everytime
that we disable the efficient freq we make sure to also set the mim freq to RPn,
no?!


Hmm, may not be applicable every time. What if someone disables 
efficient frequency while running a workload or with frequency fixed to 
800, for example?


Thanks,

Vinay.




Thanks,

Vinay.




Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8736
Fixes: 55f9720dbf23 ("drm/i915/guc/slpc: Provide sysfs for efficient freq")
Signed-off-by: Vinay Belgaumkar 
---
   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 6 +++---
   1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index ee9f83af7cf6..f16dff7c3185 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -743,6 +743,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
intel_guc_pm_intrmsk_enable(slpc_to_gt(slpc));
+   /* Set cached value of ignore efficient freq */
+   intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq);
+
slpc_get_rp_values(slpc);
/* Handle the case where min=max=RPmax */
@@ -765,9 +768,6 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
/* Set cached media freq ratio mode */
intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode);
-   /* Set cached value of ignore efficient freq */
-   intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq);
-
return 0;
   }
--
2.38.1



Re: [PATCH] drm/i915/guc/slpc: Restore efficient freq earlier

2023-07-21 Thread Belgaumkar, Vinay



On 7/21/2023 1:41 PM, Rodrigo Vivi wrote:

On Fri, Jul 21, 2023 at 11:03:49AM -0700, Vinay Belgaumkar wrote:

This should be done before the soft min/max frequencies are restored.
When we disable the "Ignore efficient frequency" flag, GuC does not
actually bring the requested freq down to RPn.

Specifically, this scenario-

- ignore efficient freq set to true
- reduce min to RPn (from efficient)
- suspend
- resume (includes GuC load, restore soft min/max, restore efficient freq)
- validate min freq has been resored to RPn

This will fail if we didn't first restore(disable, in this case) efficient
freq flag before setting the soft min frequency.

that's strange. so guc is returning the rpe when we request the min freq
during the soft config?

we could alternatively change the soft config to actually get the min
and not be tricked by this.

But also the patch below doesn't hurt.

Reviewed-by: Rodrigo Vivi 
(Although I'm still curious and want to understand exactly why
the soft min gets messed up when we don't tell guc to ignore the
efficient freq beforehand. Please help me to understand.)


The soft min does not get messed up, but GuC keeps requesting RPe even 
after disabling efficient freq. (unless we manually set min freq to RPn 
AFTER disabling efficient).


Thanks,

Vinay.





Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8736
Fixes: 55f9720dbf23 ("drm/i915/guc/slpc: Provide sysfs for efficient freq")
Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index ee9f83af7cf6..f16dff7c3185 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -743,6 +743,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
  
  	intel_guc_pm_intrmsk_enable(slpc_to_gt(slpc));
  
+	/* Set cached value of ignore efficient freq */

+   intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq);
+
slpc_get_rp_values(slpc);
  
  	/* Handle the case where min=max=RPmax */

@@ -765,9 +768,6 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
/* Set cached media freq ratio mode */
intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode);
  
-	/* Set cached value of ignore efficient freq */

-   intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq);
-
return 0;
  }
  
--

2.38.1



Re: [Intel-gfx] [PATCH] drm/i915/guc: Dump perf_limit_reasons for debug

2023-06-27 Thread Belgaumkar, Vinay



On 6/26/2023 11:43 PM, Dixit, Ashutosh wrote:

On Mon, 26 Jun 2023 21:02:14 -0700, Belgaumkar, Vinay wrote:


On 6/26/2023 8:17 PM, Dixit, Ashutosh wrote:

On Mon, 26 Jun 2023 19:12:18 -0700, Vinay Belgaumkar wrote:

GuC load takes longer sometimes due to GT frequency not ramping up.
Add perf_limit_reasons to the existing warn print to see if frequency
is being throttled.

Signed-off-by: Vinay Belgaumkar 
---
   drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 2 ++
   1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 364d0d546ec8..73911536a8e7 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -254,6 +254,8 @@ static int guc_wait_ucode(struct intel_guc *guc)
guc_warn(guc, "excessive init time: %lldms! [freq = %dMHz, before = 
%dMHz, status = 0x%08X, count = %d, ret = %d]\n",
 delta_ms, 
intel_rps_read_actual_frequency(>gt->rps),
 before_freq, status, count, ret);
+   guc_warn(guc, "perf limit reasons = 0x%08X\n",
+intel_uncore_read(uncore, 
intel_gt_perf_limit_reasons_reg(gt)));

Maybe just add at the end of the previous guc_warn?

Its already too long a line. If I try adding on the next line checkpatch
complains about splitting double quotes.

In these cases of long quoted lines we generally ignore checkpatch. Because
perf limit reasons is part of the "excessive init time" message it should
be on the same line within the square brackets. So should not be
splitting double quotes.

Another idea would be something like this:

guc_warn(guc, "excessive init time: %lldms! [freq = %dMHz, before = 
%dMHz, status = 0x%08X]\n",
 delta_ms, 
intel_rps_read_actual_frequency(>gt->rps),
 before_freq, status);
guc_warn(guc, "excessive init time: [count = %d, ret = %d, perf 
limit reasons = 0x%08X]\n",
 count, ret, intel_uncore_read(uncore, 
intel_gt_perf_limit_reasons_reg(gt)));


ok, I will split iut based on freq and non-freq based debug.

Thanks,

Vinay.



Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH] drm/i915/guc: Dump perf_limit_reasons for debug

2023-06-26 Thread Belgaumkar, Vinay



On 6/26/2023 8:17 PM, Dixit, Ashutosh wrote:

On Mon, 26 Jun 2023 19:12:18 -0700, Vinay Belgaumkar wrote:

GuC load takes longer sometimes due to GT frequency not ramping up.
Add perf_limit_reasons to the existing warn print to see if frequency
is being throttled.

Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 364d0d546ec8..73911536a8e7 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -254,6 +254,8 @@ static int guc_wait_ucode(struct intel_guc *guc)
guc_warn(guc, "excessive init time: %lldms! [freq = %dMHz, before = 
%dMHz, status = 0x%08X, count = %d, ret = %d]\n",
 delta_ms, 
intel_rps_read_actual_frequency(>gt->rps),
 before_freq, status, count, ret);
+   guc_warn(guc, "perf limit reasons = 0x%08X\n",
+intel_uncore_read(uncore, 
intel_gt_perf_limit_reasons_reg(gt)));

Maybe just add at the end of the previous guc_warn?


Its already too long a line. If I try adding on the next line checkpatch 
complains about splitting double quotes.


Thanks,

Vinay.




} else {
guc_dbg(guc, "init took %lldms, freq = %dMHz, before = %dMHz, status 
= 0x%08X, count = %d, ret = %d\n",
delta_ms, 
intel_rps_read_actual_frequency(>gt->rps),
--
2.38.1



Re: [PATCH] drm/i915/guc/slpc: Apply min softlimit correctly

2023-06-14 Thread Belgaumkar, Vinay



On 6/13/2023 7:25 PM, Dixit, Ashutosh wrote:

On Fri, 09 Jun 2023 15:02:52 -0700, Vinay Belgaumkar wrote:
Hi Vinay,


We were skipping when min_softlimit was equal to RPn. We need to apply
it rergardless as efficient frequency will push the SLPC min to RPe.

regardless


This will break scenarios where user sets a min softlimit < RPe before
reset and then performs a GT reset.

Can you explain the reason for the patch clearly in terms of variables in
the code, what variable has what value and what is the bug. I am not
following from the above description.


Hi Ashutosh,

Scenario being fixed here is exactly the one in i915_pm_freq_api 
reset/suspend subtests (currently in review). Test sets min freq to RPn 
and then performs a reset. It then checks if cur_freq is RPn.


Here's the sequence that shows the problem-

RPLS:/home/gta# modprobe i915
RPLS:/home/gta# echo 1 > /sys/class/drm/card0/gt/gt0/slpc_ignore_eff_freq
RPLS:/home/gta# echo 300 > /sys/class/drm/card0/gt_min_freq_mhz (RPn)
RPLS:/home/gta# cat /sys/class/drm/card0/gt_cur_freq_mhz --> cur == RPn 
as expected

300
RPLS:/home/gta# echo 1 > /sys/kernel/debug/dri/0/gt0/reset --> reset
RPLS:/home/gta# cat /sys/class/drm/card0/gt_min_freq_mhz --> shows the 
internal cached variable correctly

300
RPLS:/home/gta# cat /sys/class/drm/card0/gt_cur_freq_mhz --> actual freq 
being requested by SLPC (it's not RPn!!)

700

We need to sync up driver min freq value and SLPC min after a 
reset/suspend. Currently, we skip if the user had manually set min to 
RPn (this was an optimization we had before we enabled efficient freq 
usage).


Thanks,

Vinay.



Thanks.
--
Ashutosh



Fixes: 95ccf312a1e4 ("drm/i915/guc/slpc: Allow SLPC to use efficient frequency")

Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 01b75529311c..ee9f83af7cf6 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -606,7 +606,7 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc)
if (unlikely(ret))
return ret;
slpc_to_gt(slpc)->defaults.min_freq = slpc->min_freq_softlimit;
-   } else if (slpc->min_freq_softlimit != slpc->min_freq) {
+   } else {
return intel_guc_slpc_set_min_freq(slpc,
   slpc->min_freq_softlimit);
}
--
2.38.1



Re: [Intel-gfx] [PATCH] drm/i915/pxp/mtl: intel_pxp_init_hw needs runtime-pm inside pm-complete

2023-06-13 Thread Belgaumkar, Vinay



On 6/1/2023 8:59 AM, Alan Previn wrote:

In the case of failed suspend flow or cases where the kernel does not go
into full suspend but goes from suspend_prepare back to resume_complete,
we get called for a pm_complete but without runtime_pm guaranteed.

Thus, ensure we take the runtime_pm when calling intel_pxp_init_hw
from within intel_pxp_resume_complete.


LGTM,

Reviewed-by: Vinay Belgaumkar 



Signed-off-by: Alan Previn 
---
  drivers/gpu/drm/i915/pxp/intel_pxp_pm.c | 5 -
  1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c 
b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c
index 1a04067f61fc..1d184dcd63c7 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c
@@ -36,6 +36,8 @@ void intel_pxp_suspend(struct intel_pxp *pxp)
  
  void intel_pxp_resume_complete(struct intel_pxp *pxp)

  {
+   intel_wakeref_t wakeref;
+
if (!intel_pxp_is_enabled(pxp))
return;
  
@@ -48,7 +50,8 @@ void intel_pxp_resume_complete(struct intel_pxp *pxp)

if (!HAS_ENGINE(pxp->ctrl_gt, GSC0) && !pxp->pxp_component)
return;
  
-	intel_pxp_init_hw(pxp);

+   with_intel_runtime_pm(>ctrl_gt->i915->runtime_pm, wakeref)
+   intel_pxp_init_hw(pxp);
  }
  
  void intel_pxp_runtime_suspend(struct intel_pxp *pxp)


base-commit: a66da4c33d8ede541aea9ba6d0d73b556a072d54


Re: [Intel-gfx] [PATCH v2 2/2] drm/i915/guc: Dump error capture to dmesg on CTB error

2023-05-16 Thread Belgaumkar, Vinay



On 4/18/2023 11:17 AM, john.c.harri...@intel.com wrote:

From: John Harrison 

In the past, There have been sporadic CTB failures which proved hard
to reproduce manually. The most effective solution was to dump the GuC
log at the point of failure and let the CI system do the repro. It is
preferable not to dump the GuC log via dmesg for all issues as it is
not always necessary and is not helpful for end users. But rather than
trying to re-invent the code to do this each time it is wanted, commit
the code but for DEBUG_GUC builds only.

v2: Use IS_ENABLED for testing config options.


LGTM,

Reviewed-by: Vinay Belgaumkar 



Signed-off-by: John Harrison 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 53 +++
  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  6 +++
  2 files changed, 59 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 1803a633ed648..dc5cd712f1ff5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -13,6 +13,30 @@
  #include "intel_guc_ct.h"
  #include "intel_guc_print.h"
  
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)

+enum {
+   CT_DEAD_ALIVE = 0,
+   CT_DEAD_SETUP,
+   CT_DEAD_WRITE,
+   CT_DEAD_DEADLOCK,
+   CT_DEAD_H2G_HAS_ROOM,
+   CT_DEAD_READ,
+   CT_DEAD_PROCESS_FAILED,
+};
+
+static void ct_dead_ct_worker_func(struct work_struct *w);
+
+#define CT_DEAD(ct, reason)\
+   do { \
+   if (!(ct)->dead_ct_reported) { \
+   (ct)->dead_ct_reason |= 1 << CT_DEAD_##reason; \
+   queue_work(system_unbound_wq, &(ct)->dead_ct_worker); \
+   } \
+   } while (0)
+#else
+#define CT_DEAD(ct, reason)do { } while (0)
+#endif
+
  static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct)
  {
return container_of(ct, struct intel_guc, ct);
@@ -93,6 +117,9 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct)
spin_lock_init(>requests.lock);
INIT_LIST_HEAD(>requests.pending);
INIT_LIST_HEAD(>requests.incoming);
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
+   INIT_WORK(>dead_ct_worker, ct_dead_ct_worker_func);
+#endif
INIT_WORK(>requests.worker, ct_incoming_request_worker_func);
tasklet_setup(>receive_tasklet, ct_receive_tasklet_func);
init_waitqueue_head(>wq);
@@ -319,11 +346,16 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct)
  
  	ct->enabled = true;

ct->stall_time = KTIME_MAX;
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
+   ct->dead_ct_reported = false;
+   ct->dead_ct_reason = CT_DEAD_ALIVE;
+#endif
  
  	return 0;
  
  err_out:

CT_PROBE_ERROR(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err));
+   CT_DEAD(ct, SETUP);
return err;
  }
  
@@ -434,6 +466,7 @@ static int ct_write(struct intel_guc_ct *ct,

  corrupted:
CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
 desc->head, desc->tail, desc->status);
+   CT_DEAD(ct, WRITE);
ctb->broken = true;
return -EPIPE;
  }
@@ -504,6 +537,7 @@ static inline bool ct_deadlocked(struct intel_guc_ct *ct)
CT_ERROR(ct, "Head: %u\n (Dwords)", ct->ctbs.recv.desc->head);
CT_ERROR(ct, "Tail: %u\n (Dwords)", ct->ctbs.recv.desc->tail);
  
+		CT_DEAD(ct, DEADLOCK);

ct->ctbs.send.broken = true;
}
  
@@ -552,6 +586,7 @@ static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw)

 head, ctb->size);
desc->status |= GUC_CTB_STATUS_OVERFLOW;
ctb->broken = true;
+   CT_DEAD(ct, H2G_HAS_ROOM);
return false;
}
  
@@ -908,6 +943,7 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)

CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
 desc->head, desc->tail, desc->status);
ctb->broken = true;
+   CT_DEAD(ct, READ);
return -EPIPE;
  }
  
@@ -1057,6 +1093,7 @@ static bool ct_process_incoming_requests(struct intel_guc_ct *ct)

if (unlikely(err)) {
CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n",
 ERR_PTR(err), 4 * request->size, request->msg);
+   CT_DEAD(ct, PROCESS_FAILED);
ct_free_msg(request);
}
  
@@ -1233,3 +1270,19 @@ void intel_guc_ct_print_info(struct intel_guc_ct *ct,

drm_printf(p, "Tail: %u\n",
   ct->ctbs.recv.desc->tail);
  }
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
+static void ct_dead_ct_worker_func(struct work_struct *w)
+{
+   struct intel_guc_ct *ct = container_of(w, struct intel_guc_ct, 
dead_ct_worker);
+   struct intel_guc *guc = ct_to_guc(ct);
+
+   if (ct->dead_ct_reported)
+   return;
+
+   ct->dead_ct_reported = true;
+
+   guc_info(guc, "CTB is dead - 

Re: [Intel-gfx] [PATCH v2 1/2] drm/i915: Dump error capture to kernel log

2023-05-16 Thread Belgaumkar, Vinay



On 4/18/2023 11:17 AM, john.c.harri...@intel.com wrote:

From: John Harrison 

This is useful for getting debug information out in certain
situations, such as failing kernel selftests and CI runs that don't
log error captures. It is especially useful for things like retrieving
GuC logs as GuC operation can't be tracked by adding printk or ftrace
entries.

v2: Add CONFIG_DRM_I915_DEBUG_GEM wrapper (review feedback by Rodrigo).


Do the CI sparse warnings hold water? With that looked at,

LGTM,

Reviewed-by: Vinay Belgaumkar 



Signed-off-by: John Harrison 
---
  drivers/gpu/drm/i915/i915_gpu_error.c | 132 ++
  drivers/gpu/drm/i915/i915_gpu_error.h |  10 ++
  2 files changed, 142 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index f020c0086fbcd..03d62c250c465 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -2219,3 +2219,135 @@ void i915_disable_error_state(struct drm_i915_private 
*i915, int err)
i915->gpu_error.first_error = ERR_PTR(err);
spin_unlock_irq(>gpu_error.lock);
  }
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+void intel_klog_error_capture(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask)
+{
+   static int g_count;
+   struct drm_i915_private *i915 = gt->i915;
+   struct i915_gpu_coredump *error;
+   intel_wakeref_t wakeref;
+   size_t buf_size = PAGE_SIZE * 128;
+   size_t pos_err;
+   char *buf, *ptr, *next;
+   int l_count = g_count++;
+   int line = 0;
+
+   /* Can't allocate memory during a reset */
+   if (test_bit(I915_RESET_BACKOFF, >reset.flags)) {
+   drm_err(>i915->drm, "[Capture/%d.%d] Inside GT reset, skipping 
error capture :(\n",
+   l_count, line++);
+   return;
+   }
+
+   error = READ_ONCE(i915->gpu_error.first_error);
+   if (error) {
+   drm_err(>drm, "[Capture/%d.%d] Clearing existing error capture 
first...\n",
+   l_count, line++);
+   i915_reset_error_state(i915);
+   }
+
+   with_intel_runtime_pm(>runtime_pm, wakeref)
+   error = i915_gpu_coredump(gt, engine_mask, CORE_DUMP_FLAG_NONE);
+
+   if (IS_ERR(error)) {
+   drm_err(>drm, "[Capture/%d.%d] Failed to capture error 
capture: %ld!\n",
+   l_count, line++, PTR_ERR(error));
+   return;
+   }
+
+   buf = kvmalloc(buf_size, GFP_KERNEL);
+   if (!buf) {
+   drm_err(>drm, "[Capture/%d.%d] Failed to allocate buffer for 
error capture!\n",
+   l_count, line++);
+   i915_gpu_coredump_put(error);
+   return;
+   }
+
+   drm_info(>drm, "[Capture/%d.%d] Dumping i915 error capture for 
%ps...\n",
+l_count, line++, __builtin_return_address(0));
+
+   /* Largest string length safe to print via dmesg */
+#  define MAX_CHUNK800
+
+   pos_err = 0;
+   while (1) {
+   ssize_t got = i915_gpu_coredump_copy_to_buffer(error, buf, 
pos_err, buf_size - 1);
+
+   if (got <= 0)
+   break;
+
+   buf[got] = 0;
+   pos_err += got;
+
+   ptr = buf;
+   while (got > 0) {
+   size_t count;
+   char tag[2];
+
+   next = strnchr(ptr, got, '\n');
+   if (next) {
+   count = next - ptr;
+   *next = 0;
+   tag[0] = '>';
+   tag[1] = '<';
+   } else {
+   count = got;
+   tag[0] = '}';
+   tag[1] = '{';
+   }
+
+   if (count > MAX_CHUNK) {
+   size_t pos;
+   char *ptr2 = ptr;
+
+   for (pos = MAX_CHUNK; pos < count; pos += 
MAX_CHUNK) {
+   char chr = ptr[pos];
+
+   ptr[pos] = 0;
+   drm_info(>drm, "[Capture/%d.%d] 
}%s{\n",
+l_count, line++, ptr2);
+   ptr[pos] = chr;
+   ptr2 = ptr + pos;
+
+   /*
+* If spewing large amounts of data via 
a serial console,
+* this can be a very slow process. So 
be friendly and try
+* not to cause 'softlockup on CPU' 
problems.
+*/
+   cond_resched();

Re: [Intel-gfx] [PATCH v2 0/2] Add support for dumping error captures via kernel logging

2023-05-16 Thread Belgaumkar, Vinay



On 4/18/2023 11:17 AM, john.c.harri...@intel.com wrote:

From: John Harrison 

Sometimes, the only effective way to debug an issue is to dump all the
interesting information at the point of failure. So add support for
doing that.

v2: Extra CONFIG wrapping (review feedback from Rodrigo)

Signed-off-by: John Harrison 


series LGTM,

Reviewed-by: Vinay Belgaumkar 




John Harrison (2):
   drm/i915: Dump error capture to kernel log
   drm/i915/guc: Dump error capture to dmesg on CTB error

  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |  53 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |   6 +
  drivers/gpu/drm/i915/i915_gpu_error.c | 132 ++
  drivers/gpu/drm/i915/i915_gpu_error.h |  10 ++
  4 files changed, 201 insertions(+)



Re: [PATCH] drm/i915/guc/slpc: Disable rps_boost debugfs

2023-05-15 Thread Belgaumkar, Vinay



On 5/12/2023 5:39 PM, Dixit, Ashutosh wrote:

On Fri, 12 May 2023 16:56:03 -0700, Vinay Belgaumkar wrote:
Hi Vinay,


rps_boost debugfs shows host turbo related info. This is not valid
when SLPC is enabled.

A couple of thoughts about this. It appears people are know only about
rps_boost_info and don't know about guc_slpc_info? So:

a. Instead of hiding the rps_boost_info file do we need to print there
saying "SLPC is enabled, go look at guc_slpc_info"?
rps_boost_info has an eval() function which disables the interface when 
RPS is OFF. This is indeed the case here, so shouldn't we just follow 
that instead of trying to link the two?


b. Or, even just call guc_slpc_info_show from rps_boost_show (so the two
files will show the same SLPC information)?


slpc_info has a lot of other info like the SLPC state, not sure that 
matches up with the rps_boost_info name.


Thanks,

Vinay.



Ashutosh



guc_slpc_info already shows the number of boosts.  Add num_waiters there
as well and disable rps_boost when SLPC is enabled.

Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/7632
Signed-off-by: Vinay Belgaumkar 


Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Provide sysfs for efficient freq

2023-04-18 Thread Belgaumkar, Vinay



On 4/17/2023 6:39 PM, Andi Shyti wrote:

Hi Vinay,

Looks good, just few minor comments below,

[...]


@@ -267,13 +267,11 @@ static int run_test(struct intel_gt *gt, int test_type)
}
  
  	/*

-* Set min frequency to RPn so that we can test the whole
-* range of RPn-RP0. This also turns off efficient freq
-* usage and makes results more predictable.
+* Turn off efficient freq so RPn/RP0 ranges are obeyed
 */
-   err = slpc_set_min_freq(slpc, slpc->min_freq);
+   err = intel_guc_slpc_set_ignore_eff_freq(slpc, true);
if (err) {
-   pr_err("Unable to update min freq!");
+   pr_err("Unable to turn off efficient freq!");

drm_err()? or gt_err()? As we are here we can use a proper
printing.

How is this change related to the scope of this patch?
The selftest was relying on setting min freq < RP1 to disable efficient 
freq, now that we have an interface, the test should use that (former 
method will not work). Should this be a separate patch?



return err;
}
  
@@ -358,9 +356,10 @@ static int run_test(struct intel_gt *gt, int test_type)

break;
}
  
-	/* Restore min/max frequencies */

-   slpc_set_max_freq(slpc, slpc_max_freq);
+   /* Restore min/max frequencies and efficient flag */
slpc_set_min_freq(slpc, slpc_min_freq);
+   slpc_set_max_freq(slpc, slpc_max_freq);
+   intel_guc_slpc_set_ignore_eff_freq(slpc, false);

mmhhh... do we care here about the return value?

I guess we should, will add.


  
  	if (igt_flush_test(gt->i915))

err = -EIO;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 026d73855f36..b1b70ee3001b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -277,6 +277,7 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
  
  	slpc->max_freq_softlimit = 0;

slpc->min_freq_softlimit = 0;
+   slpc->ignore_eff_freq = false;
slpc->min_is_rpmax = false;
  
  	slpc->boost_freq = 0;

@@ -457,6 +458,31 @@ int intel_guc_slpc_get_max_freq(struct intel_guc_slpc 
*slpc, u32 *val)
return ret;
  }
  
+int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val)

+{
+   struct drm_i915_private *i915 = slpc_to_i915(slpc);
+   intel_wakeref_t wakeref;
+   int ret = 0;

no need to initialize ret here.

ok.



+
+   mutex_lock(>lock);
+   wakeref = intel_runtime_pm_get(>runtime_pm);
+
+   ret = slpc_set_param(slpc,
+SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
+val);
+   if (ret) {
+   guc_probe_error(slpc_to_guc(slpc), "Failed to set efficient 
freq(%d): %pe\n",
+   val, ERR_PTR(ret));
+   goto out;
+   }
+
+   slpc->ignore_eff_freq = val;

nit that you can ignore: if you put this under else and save
brackets and a goto.


ok.

Thanks,

Vinay.



Andi


Re: [Intel-gfx] [PATCH v3] drm/i915/guc/slpc: Provide sysfs for efficient freq

2023-04-17 Thread Belgaumkar, Vinay



On 4/14/2023 4:49 PM, Dixit, Ashutosh wrote:

On Fri, 14 Apr 2023 15:34:15 -0700, Vinay Belgaumkar wrote:

@@ -457,6 +458,34 @@ int intel_guc_slpc_get_max_freq(struct intel_guc_slpc 
*slpc, u32 *val)
return ret;
  }

+int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val)
+{
+   struct drm_i915_private *i915 = slpc_to_i915(slpc);
+   intel_wakeref_t wakeref;
+   int ret = 0;
+
+   /* Need a lock now since waitboost can be modifying min as well */

Delete comment.

ok.

+   mutex_lock(>lock);

Actually, don't need the lock itself now so delete the lock.

Or, maybe the lock prevents the race if userspace writes to the sysfs when
GuC reset is going on so let's retain the lock. But the comment is wrong.

yup, ok.



+   wakeref = intel_runtime_pm_get(>runtime_pm);
+
+   /* Ignore efficient freq if lower min freq is requested */

Delete comment, it's wrong.

ok.



+   ret = slpc_set_param(slpc,
+SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
+val);
+   if (ret) {
+   guc_probe_error(slpc_to_guc(slpc), "Failed to set efficient 
freq(%d): %pe\n",
+   val, ERR_PTR(ret));
+   goto out;
+   }
+
+   slpc->ignore_eff_freq = val;
+

This extra line can also be deleted.

ok.



+out:
+   intel_runtime_pm_put(>runtime_pm, wakeref);
+   mutex_unlock(>lock);
+   return ret;
+}
+
  /**
   * intel_guc_slpc_set_min_freq() - Set min frequency limit for SLPC.
   * @slpc: pointer to intel_guc_slpc.
@@ -482,16 +511,6 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc 
*slpc, u32 val)
mutex_lock(>lock);
wakeref = intel_runtime_pm_get(>runtime_pm);

-   /* Ignore efficient freq if lower min freq is requested */
-   ret = slpc_set_param(slpc,
-SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
-val < slpc->rp1_freq);
-   if (ret) {
-   guc_probe_error(slpc_to_guc(slpc), "Failed to toggle efficient freq: 
%pe\n",
-   ERR_PTR(ret));
-   goto out;
-   }
-

Great, thanks!

After taking care of the above, and seems there are also a couple of
checkpatch errors, this is:

Reviewed-by: Ashutosh Dixit 


Thanks,

Vinay.



Re: [PATCH] drm/i915/guc: Disable PL1 power limit when loading GuC firmware

2023-03-24 Thread Belgaumkar, Vinay



On 3/24/2023 4:31 PM, Dixit, Ashutosh wrote:

On Fri, 24 Mar 2023 11:15:02 -0700, Belgaumkar, Vinay wrote:
Hi Vinay,

Thanks for the review. Comments inline below.
Sorry about asking the same questions all over again :) Didn't look at 
previous versions.



On 3/15/2023 8:59 PM, Ashutosh Dixit wrote:

On dGfx, the PL1 power limit being enabled and set to a low value results
in a low GPU operating freq. It also negates the freq raise operation which
is done before GuC firmware load. As a result GuC firmware load can time
out. Such timeouts were seen in the GL #8062 bug below (where the PL1 power
limit was enabled and set to a low value). Therefore disable the PL1 power
limit when allowed by HW when loading GuC firmware.

v3 label missing in subject.

v2:
   - Take mutex (to disallow writes to power1_max) across GuC reset/fw load
   - Add hwm_power_max_restore to error return code path

v3 (Jani N):
   - Add/remove explanatory comments
   - Function renames
   - Type corrections
   - Locking annotation

Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062
Signed-off-by: Ashutosh Dixit 
---
   drivers/gpu/drm/i915/gt/uc/intel_uc.c |  9 +++
   drivers/gpu/drm/i915/i915_hwmon.c | 39 +++
   drivers/gpu/drm/i915/i915_hwmon.h |  7 +
   3 files changed, 55 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 4ccb4be4c9cba..aa8e35a5636a0 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -18,6 +18,7 @@
   #include "intel_uc.h"
 #include "i915_drv.h"
+#include "i915_hwmon.h"
 static const struct intel_uc_ops uc_ops_off;
   static const struct intel_uc_ops uc_ops_on;
@@ -461,6 +462,7 @@ static int __uc_init_hw(struct intel_uc *uc)
struct intel_guc *guc = >guc;
struct intel_huc *huc = >huc;
int ret, attempts;
+   bool pl1en;

Init to 'false' here

See next comment.




GEM_BUG_ON(!intel_uc_supports_guc(uc));
GEM_BUG_ON(!intel_uc_wants_guc(uc));
@@ -491,6 +493,9 @@ static int __uc_init_hw(struct intel_uc *uc)
else
attempts = 1;
   +/* Disable a potentially low PL1 power limit to allow freq to be
raised */
+   i915_hwmon_power_max_disable(gt->i915, );
+
intel_rps_raise_unslice(_to_gt(uc)->rps);
while (attempts--) {
@@ -547,6 +552,8 @@ static int __uc_init_hw(struct intel_uc *uc)
intel_rps_lower_unslice(_to_gt(uc)->rps);
}
   +i915_hwmon_power_max_restore(gt->i915, pl1en);
+
guc_info(guc, "submission %s\n", 
str_enabled_disabled(intel_uc_uses_guc_submission(uc)));
guc_info(guc, "SLPC %s\n", 
str_enabled_disabled(intel_uc_uses_guc_slpc(uc)));
   @@ -563,6 +570,8 @@ static int __uc_init_hw(struct intel_uc *uc)
/* Return GT back to RPn */
intel_rps_lower_unslice(_to_gt(uc)->rps);
   +i915_hwmon_power_max_restore(gt->i915, pl1en);

if (pl1en)

     i915_hwmon_power_max_enable().

IMO it's better not to have checks in the main __uc_init_hw() function (if
we do this we'll need to add 2 checks in __uc_init_hw()). If you really
want we could do something like this inside
i915_hwmon_power_max_disable/i915_hwmon_power_max_restore. But for now I
am not making any changes.

ok.


(I can send a patch with the changes if you want to take a look but IMO it
will add more logic/code but without real benefits (it will save a rmw if
the limit was already disabled, but IMO this code is called so infrequently
(only during GuC resets) as to not have any significant impact)).


+
__uc_sanitize(uc);
if (!ret) {
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index ee63a8fd88fc1..769b5bda4d53f 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -444,6 +444,45 @@ hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int 
chan, long val)
}
   }
   +void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool
*old)

Shouldn't we call this i915_hwmon_package_pl1_disable()?

I did think of using "pl1" in the function name but then decided to retain
"power_max" because other hwmon functions for PL1 limit also use
"power_max" (hwm_power_max_read/hwm_power_max_write) and currently
"hwmon_power_max" is mapped to the PL1 limit. So "power_max" is used to
show that all these functions deal with the PL1 power limit.

There is a comment in __uc_init_hw() explaining "power_max" means the PL1
power limit.

ok.



+   __acquires(i915->hwmon->hwmon_lock)
+{
+   struct i915_hwmon *hwmon = i915->hwmon;
+   intel_wakeref_t wakeref;
+   u32 r;
+
+   if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
+   return;
+
+   /* Take mu

Re: [PATCH] drm/i915/guc: Disable PL1 power limit when loading GuC firmware

2023-03-24 Thread Belgaumkar, Vinay



On 3/15/2023 8:59 PM, Ashutosh Dixit wrote:

On dGfx, the PL1 power limit being enabled and set to a low value results
in a low GPU operating freq. It also negates the freq raise operation which
is done before GuC firmware load. As a result GuC firmware load can time
out. Such timeouts were seen in the GL #8062 bug below (where the PL1 power
limit was enabled and set to a low value). Therefore disable the PL1 power
limit when allowed by HW when loading GuC firmware.

v3 label missing in subject.


v2:
  - Take mutex (to disallow writes to power1_max) across GuC reset/fw load
  - Add hwm_power_max_restore to error return code path

v3 (Jani N):
  - Add/remove explanatory comments
  - Function renames
  - Type corrections
  - Locking annotation

Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062
Signed-off-by: Ashutosh Dixit 
---
  drivers/gpu/drm/i915/gt/uc/intel_uc.c |  9 +++
  drivers/gpu/drm/i915/i915_hwmon.c | 39 +++
  drivers/gpu/drm/i915/i915_hwmon.h |  7 +
  3 files changed, 55 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 4ccb4be4c9cba..aa8e35a5636a0 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -18,6 +18,7 @@
  #include "intel_uc.h"
  
  #include "i915_drv.h"

+#include "i915_hwmon.h"
  
  static const struct intel_uc_ops uc_ops_off;

  static const struct intel_uc_ops uc_ops_on;
@@ -461,6 +462,7 @@ static int __uc_init_hw(struct intel_uc *uc)
struct intel_guc *guc = >guc;
struct intel_huc *huc = >huc;
int ret, attempts;
+   bool pl1en;


Init to 'false' here


  
  	GEM_BUG_ON(!intel_uc_supports_guc(uc));

GEM_BUG_ON(!intel_uc_wants_guc(uc));
@@ -491,6 +493,9 @@ static int __uc_init_hw(struct intel_uc *uc)
else
attempts = 1;
  
+	/* Disable a potentially low PL1 power limit to allow freq to be raised */

+   i915_hwmon_power_max_disable(gt->i915, );
+
intel_rps_raise_unslice(_to_gt(uc)->rps);
  
  	while (attempts--) {

@@ -547,6 +552,8 @@ static int __uc_init_hw(struct intel_uc *uc)
intel_rps_lower_unslice(_to_gt(uc)->rps);
}
  
+	i915_hwmon_power_max_restore(gt->i915, pl1en);

+
guc_info(guc, "submission %s\n", 
str_enabled_disabled(intel_uc_uses_guc_submission(uc)));
guc_info(guc, "SLPC %s\n", 
str_enabled_disabled(intel_uc_uses_guc_slpc(uc)));
  
@@ -563,6 +570,8 @@ static int __uc_init_hw(struct intel_uc *uc)

/* Return GT back to RPn */
intel_rps_lower_unslice(_to_gt(uc)->rps);
  
+	i915_hwmon_power_max_restore(gt->i915, pl1en);


if (pl1en)

    i915_hwmon_power_max_enable().


+
__uc_sanitize(uc);
  
  	if (!ret) {

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index ee63a8fd88fc1..769b5bda4d53f 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -444,6 +444,45 @@ hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int 
chan, long val)
}
  }
  
+void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old)

Shouldn't we call this i915_hwmon_package_pl1_disable()?

+   __acquires(i915->hwmon->hwmon_lock)
+{
+   struct i915_hwmon *hwmon = i915->hwmon;
+   intel_wakeref_t wakeref;
+   u32 r;
+
+   if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
+   return;
+
+   /* Take mutex to prevent concurrent hwm_power_max_write */
+   mutex_lock(>hwmon_lock);
+
+   with_intel_runtime_pm(hwmon->ddat.uncore->rpm, wakeref)
+   r = intel_uncore_rmw(hwmon->ddat.uncore,
+hwmon->rg.pkg_rapl_limit,
+PKG_PWR_LIM_1_EN, 0);

Most of this code (lock and rmw parts) is already inside static void
hwm_locked_with_pm_intel_uncore_rmw() , can we reuse that here?

+
+   *old = !!(r & PKG_PWR_LIM_1_EN);
+}
+
+void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old)
+   __releases(i915->hwmon->hwmon_lock)
We can just call this i915_hwmon_power_max_enable() and call whenever 
the old value was actually enabled. That way, we have proper mirror 
functions.

+{
+   struct i915_hwmon *hwmon = i915->hwmon;
+   intel_wakeref_t wakeref;
+
+   if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
+   return;
+
+   with_intel_runtime_pm(hwmon->ddat.uncore->rpm, wakeref)
+   intel_uncore_rmw(hwmon->ddat.uncore,
+hwmon->rg.pkg_rapl_limit,
+PKG_PWR_LIM_1_EN,
+old ? PKG_PWR_LIM_1_EN : 0);


3rd param should be 0 here, else we will end up clearing other bits.

Thanks,

Vinay.


+
+   mutex_unlock(>hwmon_lock);
+}
+
  static umode_t
  hwm_energy_is_visible(const struct hwm_drvdata *ddat, u32 attr)
  {
diff --git 

Re: [PATCH 3/3] drm/i915/pmu: Use common freq functions with sysfs

2023-03-07 Thread Belgaumkar, Vinay



On 3/7/2023 9:33 PM, Ashutosh Dixit wrote:

Using common freq functions with sysfs in PMU (but without taking
forcewake) solves the following issues (a) missing support for MTL (b)


For the requested_freq, we read it only if actual_freq is zero below 
(meaning, GT is in C6). So then what is the point of reading it without 
a force wake? It will also be zero, correct?


Thanks,

Vinay.


missing support for older generation (prior to Gen6) (c) missing support
for slpc when freq sampling has to fall back to requested freq. It also
makes the PMU code future proof where sometimes code has been updated for
sysfs and PMU has been missed.

Signed-off-by: Ashutosh Dixit 
---
  drivers/gpu/drm/i915/gt/intel_rps.c | 10 --
  drivers/gpu/drm/i915/gt/intel_rps.h |  1 -
  drivers/gpu/drm/i915/i915_pmu.c | 10 --
  3 files changed, 4 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 49df31927c0e..b03bfbe7ee23 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2046,16 +2046,6 @@ void intel_rps_sanitize(struct intel_rps *rps)
rps_disable_interrupts(rps);
  }
  
-u32 intel_rps_read_rpstat_fw(struct intel_rps *rps)

-{
-   struct drm_i915_private *i915 = rps_to_i915(rps);
-   i915_reg_t rpstat;
-
-   rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1;
-
-   return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat);
-}
-
  u32 intel_rps_read_rpstat(struct intel_rps *rps)
  {
struct drm_i915_private *i915 = rps_to_i915(rps);
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h 
b/drivers/gpu/drm/i915/gt/intel_rps.h
index a990f985ab23..60ae27679011 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -53,7 +53,6 @@ u32 intel_rps_get_rp1_frequency(struct intel_rps *rps);
  u32 intel_rps_get_rpn_frequency(struct intel_rps *rps);
  u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps);
  u32 intel_rps_read_rpstat(struct intel_rps *rps);
-u32 intel_rps_read_rpstat_fw(struct intel_rps *rps);
  void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps 
*caps);
  void intel_rps_raise_unslice(struct intel_rps *rps);
  void intel_rps_lower_unslice(struct intel_rps *rps);
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index a76c5ce9513d..1a4c9fed257c 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -392,14 +392,12 @@ frequency_sample(struct intel_gt *gt, unsigned int 
period_ns)
 * case we assume the system is running at the intended
 * frequency. Fortunately, the read should rarely fail!
 */
-   val = intel_rps_read_rpstat_fw(rps);
-   if (val)
-   val = intel_rps_get_cagf(rps, val);
-   else
-   val = rps->cur_freq;
+   val = intel_rps_read_actual_frequency_fw(rps);
+   if (!val)
+   val = intel_rps_get_requested_frequency_fw(rps),
  
  		add_sample_mult(>sample[__I915_SAMPLE_FREQ_ACT],

-   intel_gpu_freq(rps, val), period_ns / 1000);
+   val, period_ns / 1000);
}
  
  	if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) {


Re: [Intel-gfx] [PATCH] drm/i915/gsc: Fix the Driver-FLR completion

2023-02-22 Thread Belgaumkar, Vinay



On 2/22/2023 1:01 PM, Alan Previn wrote:

The Driver-FLR flow may inadvertently exit early before the full
completion of the re-init of the internal HW state if we only poll
GU_DEBUG Bit31 (polling for it to toggle from 0 -> 1). Instead
we need a two-step completion wait-for-completion flow that also
involves GU_CNTL. See the patch and new code comments for detail.
This is new direction from HW architecture folks.

v2: - Add error message for the teardown timeout (Anshuman)
- Don't duplicate code in comments (Jani)


LGTM,

Tested-by: Vinay Belgaumkar 



Signed-off-by: Alan Previn 
Fixes: 5a44fcd73498 ("drm/i915/gsc: Do a driver-FLR on unload if GSC was 
loaded")
---
  drivers/gpu/drm/i915/intel_uncore.c | 13 -
  1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
b/drivers/gpu/drm/i915/intel_uncore.c
index f018da7ebaac..f3c46352db89 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -2749,14 +2749,25 @@ static void driver_initiated_flr(struct intel_uncore 
*uncore)
/* Trigger the actual Driver-FLR */
intel_uncore_rmw_fw(uncore, GU_CNTL, 0, DRIVERFLR);
  
+	/* Wait for hardware teardown to complete */

+   ret = intel_wait_for_register_fw(uncore, GU_CNTL,
+DRIVERFLR_STATUS, 0,
+flr_timeout_ms);
+   if (ret) {
+   drm_err(>drm, "Driver-FLR-teardown wait completion failed! 
%d\n", ret);
+   return;
+   }
+
+   /* Wait for hardware/firmware re-init to complete */
ret = intel_wait_for_register_fw(uncore, GU_DEBUG,
 DRIVERFLR_STATUS, DRIVERFLR_STATUS,
 flr_timeout_ms);
if (ret) {
-   drm_err(>drm, "wait for Driver-FLR completion failed! 
%d\n", ret);
+   drm_err(>drm, "Driver-FLR-reinit wait completion failed! 
%d\n", ret);
return;
}
  
+	/* Clear sticky completion status */

intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS);
  }
  


Re: [Intel-gfx] [PATCH] drm/i915/mtl: Connect root sysfs entries to GT0

2023-01-16 Thread Belgaumkar, Vinay



On 1/16/2023 10:58 AM, Andi Shyti wrote:

Hi,

On Thu, Jan 12, 2023 at 08:48:11PM -0800, Belgaumkar, Vinay wrote:

On 1/12/2023 8:37 PM, Dixit, Ashutosh wrote:

On Thu, 12 Jan 2023 20:26:34 -0800, Belgaumkar, Vinay wrote:

I think the ABI was changed by the patch mentioned in the commit
(a8a4f0467d70).

The ABI was originally changed in 80cf8af17af04 and 56a709cf77468.

In theory the ABI has never changed, we just needed to agree once
and for all what to do when reading the upper level interface.
There has never been a previous multitile specification before
this change.

There have been long and exhaustive discussions on what to do and
the decision is that in some cases we show the average, in others
the maximum. Never the GT0, though.


Yes, you are right. @Andi, did we have a plan to update the IGT tests that
use these interfaces to properly refer to the per GT entries as well? They
now receive average values instead of absolute, hence will fail on a
multi-GT device.

I don't know what's the plan for igt's.

Which tests are failing? I think we shouldn't be using the upper
level interfaces at all in IGT's. Previously there has been an
error printed on dmesg when this was happening. The error has
been removed in order to set the ABI as agreed above.


Tests like perf_mu and gem_ctx_freq will fail as they read upper level 
sysfs entries and expect them to change as per the test. I think this 
includes all of the tests that read RC6 or Trubo related sysfs entries 
for that matter.


Thanks,

Vinay.



Andi


Re: [Intel-gfx] [PATCH] drm/i915/mtl: Connect root sysfs entries to GT0

2023-01-12 Thread Belgaumkar, Vinay



On 1/12/2023 8:37 PM, Dixit, Ashutosh wrote:

On Thu, 12 Jan 2023 20:26:34 -0800, Belgaumkar, Vinay wrote:

I think the ABI was changed by the patch mentioned in the commit
(a8a4f0467d70).

The ABI was originally changed in 80cf8af17af04 and 56a709cf77468.


Yes, you are right. @Andi, did we have a plan to update the IGT tests 
that use these interfaces to properly refer to the per GT entries as 
well? They now receive average values instead of absolute, hence will 
fail on a multi-GT device.


Thanks,

Vinay.



Re: [Intel-gfx] [PATCH] drm/i915/mtl: Connect root sysfs entries to GT0

2023-01-12 Thread Belgaumkar, Vinay



On 1/12/2023 7:15 PM, Dixit, Ashutosh wrote:

On Thu, 12 Jan 2023 18:27:52 -0800, Vinay Belgaumkar wrote:

Reading current root sysfs entries gives a min/max of all
GTs. Updating this so we return default (GT0) values when root
level sysfs entries are accessed, instead of min/max for the card.
Tests that are not multi GT capable will read incorrect sysfs
values without this change on multi-GT platforms like MTL.

Fixes: a8a4f0467d70 ("drm/i915: Fix CFI violations in gt_sysfs")

We seem to be proposing to change the previous sysfs ABI with this patch?
But even then it doesn't seem correct to use gt0 values for device level
sysfs. Actually I received the following comment about using max freq
across gt's for device level freq's (gt_act_freq_mhz etc.) from one of our
users:


I think the ABI was changed by the patch mentioned in the commit 
(a8a4f0467d70). If I am not mistaken, original behavior was to return 
the GT0 values (I will double check this).


IMO, if that patch changed the behavior, it should have been accompanied 
with patches that update all the tests to use the proper per GT sysfs as 
well.


Thanks,

Vinay.



-
On Sun, 06 Nov 2022 08:54:04 -0800, Lawson, Lowren H wrote:

Why show maximum? Wouldn’t average be more accurate to the user experience?

As a user, I expect the ‘card’ frequency to be relatively accurate to the
entire card. If I see 1.6GHz, but the card is behaving as if it’s running a
1.0 & 1.6GHz on the different compute tiles, I’m going to see a massive
decrease in compute workload performance while at ‘maximum’ frequency.
-

So I am not sure why max/min were previously chosen. Why not the average?

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH v3 1/1] drm/i915/pxp: Use drm_dbg if arb session failed due to fw version

2023-01-11 Thread Belgaumkar, Vinay



On 12/21/2022 9:49 AM, Alan Previn wrote:

If PXP arb-session is being attempted on older hardware SKUs or
on hardware with older, unsupported, firmware versions, then don't
report the failure with a drm_error. Instead, look specifically for
the API-version error reply and drm_dbg that reply. In this case, the
user-space will eventually get a -ENODEV for the protected context
creation which is the correct behavior and we don't create unnecessary
drm_error's in our dmesg (for what is unsupported platforms).


LGTM. Is there a link to where these pxp status codes are documented?

Reviewed-by: Vinay Belgaumkar 



Changes from prio revs:
v2 : - remove unnecessary newline. (Jani)
v1 : - print incorrect version from input packet, not output.

Signed-off-by: Alan Previn 
---
  drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_cmn.h | 1 +
  drivers/gpu/drm/i915/pxp/intel_pxp_tee.c   | 4 
  2 files changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_cmn.h 
b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_cmn.h
index c2f23394f9b8..aaa8187a0afb 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_cmn.h
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_cmn.h
@@ -17,6 +17,7 @@
   */
  enum pxp_status {
PXP_STATUS_SUCCESS = 0x0,
+   PXP_STATUS_ERROR_API_VERSION = 0x1002,
PXP_STATUS_OP_NOT_PERMITTED = 0x4013
  };
  
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c

index d50354bfb993..73aa8015f828 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c
@@ -298,6 +298,10 @@ int intel_pxp_tee_cmd_create_arb_session(struct intel_pxp 
*pxp,
  
  	if (ret)

drm_err(>drm, "Failed to send tee msg ret=[%d]\n", ret);
+   else if (msg_out.header.status == PXP_STATUS_ERROR_API_VERSION)
+   drm_dbg(>drm, "PXP firmware version unsupported, requested: 
"
+   "CMD-ID-[0x%08x] on API-Ver-[0x%08x]\n",
+   msg_in.header.command_id, msg_in.header.api_version);
else if (msg_out.header.status != 0x0)
drm_warn(>drm, "PXP firmware failed arb session init request 
ret=[0x%08x]\n",
 msg_out.header.status);

base-commit: cc44a1e87ea6b788868878295119398966f98a81


Re: [PATCH 1/1] drm/i915/mtl: Enable Idle Messaging for GSC CS

2022-11-16 Thread Belgaumkar, Vinay



On 11/15/2022 5:44 AM, Badal Nilawar wrote:

From: Vinay Belgaumkar 

By defaut idle mesaging is disabled for GSC CS so to unblock RC6
entry on media tile idle messaging need to be enabled.

v2:
  - Fix review comments (Vinay)
  - Set GSC idle hysterisis to 5 us (Badal)

Bspec: 71496

Cc: Daniele Ceraolo Spurio 
Signed-off-by: Vinay Belgaumkar 
Signed-off-by: Badal Nilawar 
---
  drivers/gpu/drm/i915/gt/intel_engine_pm.c | 18 ++
  drivers/gpu/drm/i915/gt/intel_gt_regs.h   |  4 
  2 files changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c 
b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index b0a4a2dbe3ee..5522885b2db0 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -15,6 +15,22 @@
  #include "intel_rc6.h"
  #include "intel_ring.h"
  #include "shmem_utils.h"
+#include "intel_gt_regs.h"
+
+static void intel_gsc_idle_msg_enable(struct intel_engine_cs *engine)
+{
+   struct drm_i915_private *i915 = engine->i915;
+
+   if (IS_METEORLAKE(i915) && engine->id == GSC0) {
+   intel_uncore_write(engine->gt->uncore,
+  RC_PSMI_CTRL_GSCCS,
+  _MASKED_BIT_DISABLE(IDLE_MSG_DISABLE));
+   /* 5 us hysterisis */
+   intel_uncore_write(engine->gt->uncore,
+  PWRCTX_MAXCNT_GSCCS,
+  0xA);
+   }
+}
  
  static void dbg_poison_ce(struct intel_context *ce)

  {
@@ -275,6 +291,8 @@ void intel_engine_init__pm(struct intel_engine_cs *engine)
  
  	intel_wakeref_init(>wakeref, rpm, _ops);

intel_engine_init_heartbeat(engine);
+
+   intel_gsc_idle_msg_enable(engine);
  }
  
  /**

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 07031e03f80c..20472eb15364 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -913,6 +913,10 @@
  #define  MSG_IDLE_FW_MASK REG_GENMASK(13, 9)
  #define  MSG_IDLE_FW_SHIFT9
  
+#define	RC_PSMI_CTRL_GSCCS	_MMIO(0x11a050)


Alignment still seems off? Other than that,

Reviewed-by: Vinay Belgaumkar 


+#define  IDLE_MSG_DISABLE  BIT(0)
+#define PWRCTX_MAXCNT_GSCCS_MMIO(0x11a054)
+
  #define FORCEWAKE_MEDIA_GEN9  _MMIO(0xa270)
  #define FORCEWAKE_RENDER_GEN9 _MMIO(0xa278)
  


Re: [PATCH 2/2] drm/i915/mtl: Enable Idle Messaging for GSC CS

2022-11-04 Thread Belgaumkar, Vinay



On 10/31/2022 8:36 PM, Badal Nilawar wrote:

From: Vinay Belgaumkar 

By defaut idle mesaging is disabled for GSC CS so to unblock RC6
entry on media tile idle messaging need to be enabled.

C6 entry instead of RC6. Also *needs*.


Bspec: 71496

Cc: Daniele Ceraolo Spurio 
Signed-off-by: Vinay Belgaumkar 
Signed-off-by: Badal Nilawar 
---
  drivers/gpu/drm/i915/gt/intel_engine_pm.c | 12 
  drivers/gpu/drm/i915/gt/intel_gt_regs.h   |  3 +++
  2 files changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c 
b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index b0a4a2dbe3ee..8d391f8fd861 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -15,6 +15,7 @@
  #include "intel_rc6.h"
  #include "intel_ring.h"
  #include "shmem_utils.h"
+#include "intel_gt_regs.h"
  
  static void dbg_poison_ce(struct intel_context *ce)

  {
@@ -271,10 +272,21 @@ static const struct intel_wakeref_ops wf_ops = {
  
  void intel_engine_init__pm(struct intel_engine_cs *engine)

  {
+   struct drm_i915_private *i915 = engine->i915;
struct intel_runtime_pm *rpm = engine->uncore->rpm;
  
  	intel_wakeref_init(>wakeref, rpm, _ops);

intel_engine_init_heartbeat(engine);
+
+   if (IS_METEORLAKE(i915) && engine->id == GSC0) {
+   intel_uncore_write(engine->gt->uncore,
+  RC_PSMI_CTRL_GSCCS,
+  _MASKED_BIT_DISABLE(IDLE_MSG_DISABLE));
+   drm_dbg(>drm,
+   "Set GSC CS Idle Reg to: 0x%x",
+   intel_uncore_read(engine->gt->uncore, 
RC_PSMI_CTRL_GSCCS));

Do we need the debug print here?

+   }
+
  }
  
  /**

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index f4624262dc81..176902a9f2a2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -908,6 +908,9 @@
  #define  MSG_IDLE_FW_MASK REG_GENMASK(13, 9)
  #define  MSG_IDLE_FW_SHIFT9
  
+#define	RC_PSMI_CTRL_GSCCS	_MMIO(0x11a050)

+#define IDLE_MSG_DISABLE   BIT(0)


Is the alignment off?

Thanks,

Vinay.


+
  #define FORCEWAKE_MEDIA_GEN9  _MMIO(0xa270)
  #define FORCEWAKE_RENDER_GEN9 _MMIO(0xa278)
  


Re: [Intel-gfx] [PATCH v3] drm/i915/slpc: Use platform limits for min/max frequency

2022-10-24 Thread Belgaumkar, Vinay



On 10/21/2022 10:26 PM, Dixit, Ashutosh wrote:

On Fri, 21 Oct 2022 18:38:57 -0700, Belgaumkar, Vinay wrote:

On 10/20/2022 3:57 PM, Dixit, Ashutosh wrote:

On Tue, 18 Oct 2022 11:30:31 -0700, Vinay Belgaumkar wrote:
Hi Vinay,


diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c 
b/drivers/gpu/drm/i915/gt/selftest_slpc.c
index 4c6e9257e593..e42bc215e54d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_slpc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c
@@ -234,6 +234,7 @@ static int run_test(struct intel_gt *gt, int test_type)
enum intel_engine_id id;
struct igt_spinner spin;
u32 slpc_min_freq, slpc_max_freq;
+   u32 saved_min_freq;
int err = 0;

if (!intel_uc_uses_guc_slpc(>uc))
@@ -252,20 +253,35 @@ static int run_test(struct intel_gt *gt, int test_type)
return -EIO;
}

-   /*
-* FIXME: With efficient frequency enabled, GuC can request
-* frequencies higher than the SLPC max. While this is fixed
-* in GuC, we level set these tests with RPn as min.
-*/
-   err = slpc_set_min_freq(slpc, slpc->min_freq);
-   if (err)
-   return err;
+   if (slpc_min_freq == slpc_max_freq) {
+   /* Server parts will have min/max clamped to RP0 */
+   if (slpc->min_is_rpmax) {
+   err = slpc_set_min_freq(slpc, slpc->min_freq);
+   if (err) {
+   pr_err("Unable to update min freq on server 
part");
+   return err;
+   }

-   if (slpc->min_freq == slpc->rp0_freq) {
-   pr_err("Min/Max are fused to the same value\n");
-   return -EINVAL;
+   } else {
+   pr_err("Min/Max are fused to the same value\n");
+   return -EINVAL;

Sorry but I am not following this else case here. Why are we saying min/max
are fused to the same value? In this case we can't do
"slpc_set_min_freq(slpc, slpc->min_freq)" ? That is, we can't change SLPC
min freq?

This would be an error case due to a faulty part. We may come across a part
where min/max is fused to the same value.

But even then the original check is much clearer since it is actually
comparing the fused freq's:

if (slpc->min_freq == slpc->rp0_freq)

Because if min/max have been changed slpc_min_freq and slpc_max_freq are no
longer fused freq.

And also this check should be right at the top of run_test, right after if
(!intel_uc_uses_guc_slpc), rather than in the middle here (otherwise
because we are basically not doing any error rewinding so causing memory
leaks if any of the functions return error).

ok.



+   }
+   } else {
+   /*
+* FIXME: With efficient frequency enabled, GuC can request
+* frequencies higher than the SLPC max. While this is fixed
+* in GuC, we level set these tests with RPn as min.
+*/
+   err = slpc_set_min_freq(slpc, slpc->min_freq);
+   if (err)
+   return err;
}

So let's do what is suggested above and then see what remains here and if
we need all these code changes. Most likely we can just do unconditionally
what we were doing before, i.e.:

err = slpc_set_min_freq(slpc, slpc->min_freq);
if (err)
return err;


+   saved_min_freq = slpc_min_freq;
+
+   /* New temp min freq = RPn */
+   slpc_min_freq = slpc->min_freq;

Why do we need saved_min_freq? We can retain slpc_min_freq and in the check 
below:

if (max_act_freq <= slpc_min_freq)

We can just change the check to:

if (max_act_freq <= slpc->min_freq)

Looks like to have been a bug in the original code?
Not a bug, it wasn't needed until we didn't have server parts 
(slpc_min_freq would typically be slpc->min_freq on non-server parts).

+
intel_gt_pm_wait_for_idle(gt);
intel_gt_pm_get(gt);
for_each_engine(engine, gt, id) {
@@ -347,7 +363,7 @@ static int run_test(struct intel_gt *gt, int test_type)

/* Restore min/max frequencies */
slpc_set_max_freq(slpc, slpc_max_freq);
-   slpc_set_min_freq(slpc, slpc_min_freq);
+   slpc_set_min_freq(slpc, saved_min_freq);

if (igt_flush_test(gt->i915))
err = -EIO;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index fdd895f73f9f..b7cdeec44bd3 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -263,6 +263,7 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc)

slpc->max_freq_softlimit = 0;
slpc->min_freq_softlimit = 0;
+   slpc->min_is_rpmax = false;

slpc->boost_freq = 0;
atomic_set(>num_wait

Re: [Intel-gfx] [PATCH v4] drm/i915/slpc: Optmize waitboost for SLPC

2022-10-24 Thread Belgaumkar, Vinay



On 10/22/2022 12:22 PM, Dixit, Ashutosh wrote:

On Sat, 22 Oct 2022 10:56:03 -0700, Belgaumkar, Vinay wrote:
Hi Vinay,


diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index fc23c562d9b2..32e1f5dde5bb 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1016,9 +1016,15 @@ void intel_rps_boost(struct i915_request *rq)
if (rps_uses_slpc(rps)) {
slpc = rps_to_slpc(rps);

+   if (slpc->min_freq_softlimit == slpc->boost_freq)
+   return;

nit but is it possible that 'slpc->min_freq_softlimit > slpc->boost_freq'
(looks possible to me from the code though we might not have intended it)?
Then we can change this to:

if (slpc->min_freq_softlimit >= slpc->boost_freq)
return;

Any comment about this? It looks clearly possible to me from the code.

So with the above change this is:

Reviewed-by: Ashutosh Dixit 


Agree.

Thanks,

Vinay.



Re: [Intel-gfx] [PATCH v4] drm/i915/slpc: Optmize waitboost for SLPC

2022-10-22 Thread Belgaumkar, Vinay



On 10/21/2022 7:11 PM, Dixit, Ashutosh wrote:

On Fri, 21 Oct 2022 17:24:52 -0700, Vinay Belgaumkar wrote:
Hi Vinay,


Waitboost (when SLPC is enabled) results in a H2G message. This can result
in thousands of messages during a stress test and fill up an already full
CTB. There is no need to request for RP0 if boost_freq and the min softlimit
are the same.

v2: Add the tracing back, and check requested freq
in the worker thread (Tvrtko)
v3: Check requested freq in dec_waiters as well
v4: Only check min_softlimit against boost_freq. Limit this
optimization for server parts for now.

Sorry I didn't follow. Why are we saying limit this only to server? This:

if (slpc->min_freq_softlimit == slpc->boost_freq)
return;

The condition above should work for client too if it is true? But yes it is
typically true automatically for server but not for client. Is that what
you mean?

yes. For client, min_freq_softlimit would typically be RPn.



Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/intel_rps.c | 8 +++-
  1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index fc23c562d9b2..32e1f5dde5bb 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1016,9 +1016,15 @@ void intel_rps_boost(struct i915_request *rq)
if (rps_uses_slpc(rps)) {
slpc = rps_to_slpc(rps);

+   if (slpc->min_freq_softlimit == slpc->boost_freq)
+   return;

nit but is it possible that 'slpc->min_freq_softlimit > slpc->boost_freq'
(looks possible to me from the code though we might not have intended it)?
Then we can change this to:

if (slpc->min_freq_softlimit >= slpc->boost_freq)
return;



+
/* Return if old value is non zero */
-   if (!atomic_fetch_inc(>num_waiters))
+   if (!atomic_fetch_inc(>num_waiters)) {
+   GT_TRACE(rps_to_gt(rps), "boost 
fence:%llx:%llx\n",
+rq->fence.context, rq->fence.seqno);

Another possibility would have been to add the trace to slpc_boost_work but
this is matches host turbo so I think it is fine here.


schedule_work(>boost_work);
+   }

return;
}

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH v3] drm/i915/slpc: Use platform limits for min/max frequency

2022-10-21 Thread Belgaumkar, Vinay



On 10/20/2022 3:57 PM, Dixit, Ashutosh wrote:

On Tue, 18 Oct 2022 11:30:31 -0700, Vinay Belgaumkar wrote:
Hi Vinay,


diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c 
b/drivers/gpu/drm/i915/gt/selftest_slpc.c
index 4c6e9257e593..e42bc215e54d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_slpc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c
@@ -234,6 +234,7 @@ static int run_test(struct intel_gt *gt, int test_type)
enum intel_engine_id id;
struct igt_spinner spin;
u32 slpc_min_freq, slpc_max_freq;
+   u32 saved_min_freq;
int err = 0;

if (!intel_uc_uses_guc_slpc(>uc))
@@ -252,20 +253,35 @@ static int run_test(struct intel_gt *gt, int test_type)
return -EIO;
}

-   /*
-* FIXME: With efficient frequency enabled, GuC can request
-* frequencies higher than the SLPC max. While this is fixed
-* in GuC, we level set these tests with RPn as min.
-*/
-   err = slpc_set_min_freq(slpc, slpc->min_freq);
-   if (err)
-   return err;
+   if (slpc_min_freq == slpc_max_freq) {
+   /* Server parts will have min/max clamped to RP0 */
+   if (slpc->min_is_rpmax) {
+   err = slpc_set_min_freq(slpc, slpc->min_freq);
+   if (err) {
+   pr_err("Unable to update min freq on server 
part");
+   return err;
+   }

-   if (slpc->min_freq == slpc->rp0_freq) {
-   pr_err("Min/Max are fused to the same value\n");
-   return -EINVAL;
+   } else {
+   pr_err("Min/Max are fused to the same value\n");
+   return -EINVAL;

Sorry but I am not following this else case here. Why are we saying min/max
are fused to the same value? In this case we can't do
"slpc_set_min_freq(slpc, slpc->min_freq)" ? That is, we can't change SLPC
min freq?
This would be an error case due to a faulty part. We may come across a 
part where min/max is fused to the same value.



diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index fdd895f73f9f..b7cdeec44bd3 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -263,6 +263,7 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc)

slpc->max_freq_softlimit = 0;
slpc->min_freq_softlimit = 0;
+   slpc->min_is_rpmax = false;

slpc->boost_freq = 0;
atomic_set(>num_waiters, 0);
@@ -588,6 +589,32 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc)
return 0;
  }

+static bool is_slpc_min_freq_rpmax(struct intel_guc_slpc *slpc)
+{
+   int slpc_min_freq;
+
+   if (intel_guc_slpc_get_min_freq(slpc, _min_freq))
+   return false;

I am wondering what happens if the above fails on server? Should we return
true or false on server and what are the consequences of returning false on
server?

Any case I think we should at least put a drm_err or something here just in
case this ever fails so we'll know something weird happened.


Makes sense.

Thanks,

Vinay.




+
+   if (slpc_min_freq == SLPC_MAX_FREQ_MHZ)
+   return true;
+   else
+   return false;
+}
+
+static void update_server_min_softlimit(struct intel_guc_slpc *slpc)
+{
+   /* For server parts, SLPC min will be at RPMax.
+* Use min softlimit to clamp it to RP0 instead.
+*/
+   if (is_slpc_min_freq_rpmax(slpc) &&
+   !slpc->min_freq_softlimit) {
+   slpc->min_is_rpmax = true;
+   slpc->min_freq_softlimit = slpc->rp0_freq;
+   (slpc_to_gt(slpc))->defaults.min_freq = 
slpc->min_freq_softlimit;
+   }
+}
+
  static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc)
  {
/* Force SLPC to used platform rp0 */
@@ -647,6 +674,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)

slpc_get_rp_values(slpc);

+   /* Handle the case where min=max=RPmax */
+   update_server_min_softlimit(slpc);
+
/* Set SLPC max limit to RP0 */
ret = slpc_use_fused_rp0(slpc);
if (unlikely(ret)) {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
index 82a98f78f96c..11975a31c9d0 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
@@ -9,6 +9,8 @@
  #include "intel_guc_submission.h"
  #include "intel_guc_slpc_types.h"

+#define SLPC_MAX_FREQ_MHZ 4250

This seems to be really a value (255 converted to freq) so seems ok to
intepret in MHz.

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH v3] drm/i915/slpc: Optmize waitboost for SLPC

2022-10-21 Thread Belgaumkar, Vinay



On 10/21/2022 11:40 AM, Dixit, Ashutosh wrote:

On Fri, 21 Oct 2022 11:24:42 -0700, Belgaumkar, Vinay wrote:


On 10/20/2022 4:36 PM, Dixit, Ashutosh wrote:

On Thu, 20 Oct 2022 13:16:00 -0700, Belgaumkar, Vinay wrote:

On 10/20/2022 11:33 AM, Dixit, Ashutosh wrote:

On Wed, 19 Oct 2022 17:29:44 -0700, Vinay Belgaumkar wrote:
Hi Vinay,


Waitboost (when SLPC is enabled) results in a H2G message. This can result
in thousands of messages during a stress test and fill up an already full
CTB. There is no need to request for RP0 if GuC is already requesting the
same.

But how are we sure that the freq will remain at RP0 in the future (when
the waiting request or any requests which are ahead execute)?

In the current waitboost implementation, set_param is sent to GuC ahead of
the waiting request to ensure that the freq would be max when this waiting
request executed on the GPU and the freq is kept at max till this request
retires (considering just one waiting request). How can we ensure this if
we don't send the waitboost set_param to GuC?

There is no way to guarantee the frequency will remain at RP0 till the
request retires. As a theoretical example, lets say the request boosted
freq to RP0, but a user changed min freq using sysfs immediately after.

That would be a bug. If waitboost is in progress and in the middle user
changed min freq, I would expect the freq to revert to the new min only
after the waitboost phase was over.

The problem here is that GuC is unaware of this "boosting"
phenomenon. Setting the min_freq_softlimit as well to boost when we send a
boost request might help with this issue.


In any case, I am not referring to this case. Since FW controls the freq
there is nothing preventing FW to change the freq unless we raise min to
max which is what waitboost does.

Ok, so maybe the solution here is to check if min_softlimit is already at
boost freq, as it tracks the min freq changes. That should take care of
server parts automatically as well.

Correct, yes that would be the right way to do it.


Actually, rethinking, it's not going to work for client GPUs. We cannot 
clobber the min_softlimit as the user may have set it. So, I'll just 
make this change to optimize it for server parts for now.


Thanks,

Vinay.



Thanks.
--
Ashutosh


Waitboost is done by a pending request to "hurry" the current requests. If
GT is already at boost frequency, that purpose is served.

FW can bring the freq down later before the waiting request is scheduled.

Also, host algorithm already has this optimization as well.

Host turbo is different from SLPC. Host turbo controls the freq algorithm
so it knows freq will not come down till it itself brings the freq
down. Unlike SLPC where FW is controling the freq. Therefore host turbo
doesn't ever need to do a MMIO read but only needs to refer to its own
state (rps->cur_freq etc.).

True. Host algorithm has a periodic timer where it updates frequency. Here,
it checks num_waiters and sets client_boost every time that is non-zero.

I had assumed we'll do this optimization for server parts where min is
already RP0 in which case we can completely disable waitboost. But this
patch is something else.

Hopefully the softlimit changes above will help with client and server.

Thanks,

Vinay.


Thanks.
--
Ashutosh


v2: Add the tracing back, and check requested freq
in the worker thread (Tvrtko)
v3: Check requested freq in dec_waiters as well

Signed-off-by: Vinay Belgaumkar 
---
drivers/gpu/drm/i915/gt/intel_rps.c |  3 +++
drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 14 +++---
2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index fc23c562d9b2..18b75cf08d1b 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1016,6 +1016,9 @@ void intel_rps_boost(struct i915_request *rq)
if (rps_uses_slpc(rps)) {
slpc = rps_to_slpc(rps);

+   GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
+rq->fence.context, rq->fence.seqno);
+
/* Return if old value is non zero */
if (!atomic_fetch_inc(>num_waiters))
schedule_work(>boost_work);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index b7cdeec44bd3..9dbdbab1515a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -227,14 +227,19 @@ static int slpc_force_min_freq(struct intel_guc_slpc 
*slpc, u32 freq)
static void slpc_boost_work(struct work_struct *work)
{
struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), 
boost_work);
+   struct intel_rps *rps = _to_gt(slpc)->rps;
int err;

/*
 

Re: [Intel-gfx] [PATCH v3] drm/i915/slpc: Optmize waitboost for SLPC

2022-10-21 Thread Belgaumkar, Vinay



On 10/20/2022 4:36 PM, Dixit, Ashutosh wrote:

On Thu, 20 Oct 2022 13:16:00 -0700, Belgaumkar, Vinay wrote:

On 10/20/2022 11:33 AM, Dixit, Ashutosh wrote:

On Wed, 19 Oct 2022 17:29:44 -0700, Vinay Belgaumkar wrote:
Hi Vinay,


Waitboost (when SLPC is enabled) results in a H2G message. This can result
in thousands of messages during a stress test and fill up an already full
CTB. There is no need to request for RP0 if GuC is already requesting the
same.

But how are we sure that the freq will remain at RP0 in the future (when
the waiting request or any requests which are ahead execute)?

In the current waitboost implementation, set_param is sent to GuC ahead of
the waiting request to ensure that the freq would be max when this waiting
request executed on the GPU and the freq is kept at max till this request
retires (considering just one waiting request). How can we ensure this if
we don't send the waitboost set_param to GuC?

There is no way to guarantee the frequency will remain at RP0 till the
request retires. As a theoretical example, lets say the request boosted
freq to RP0, but a user changed min freq using sysfs immediately after.

That would be a bug. If waitboost is in progress and in the middle user
changed min freq, I would expect the freq to revert to the new min only
after the waitboost phase was over.


The problem here is that GuC is unaware of this "boosting" phenomenon. 
Setting the min_freq_softlimit as well to boost when we send a boost 
request might help with this issue.




In any case, I am not referring to this case. Since FW controls the freq
there is nothing preventing FW to change the freq unless we raise min to
max which is what waitboost does.
Ok, so maybe the solution here is to check if min_softlimit is already 
at boost freq, as it tracks the min freq changes. That should take care 
of server parts automatically as well.



Waitboost is done by a pending request to "hurry" the current requests. If
GT is already at boost frequency, that purpose is served.

FW can bring the freq down later before the waiting request is scheduled.

Also, host algorithm already has this optimization as well.

Host turbo is different from SLPC. Host turbo controls the freq algorithm
so it knows freq will not come down till it itself brings the freq
down. Unlike SLPC where FW is controling the freq. Therefore host turbo
doesn't ever need to do a MMIO read but only needs to refer to its own
state (rps->cur_freq etc.).
True. Host algorithm has a periodic timer where it updates frequency. 
Here, it checks num_waiters and sets client_boost every time that is 
non-zero.

I had assumed we'll do this optimization for server parts where min is
already RP0 in which case we can completely disable waitboost. But this
patch is something else.


Hopefully the softlimit changes above will help with client and server.

Thanks,

Vinay.


Thanks.
--
Ashutosh


v2: Add the tracing back, and check requested freq
in the worker thread (Tvrtko)
v3: Check requested freq in dec_waiters as well

Signed-off-by: Vinay Belgaumkar 
---
   drivers/gpu/drm/i915/gt/intel_rps.c |  3 +++
   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 14 +++---
   2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index fc23c562d9b2..18b75cf08d1b 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1016,6 +1016,9 @@ void intel_rps_boost(struct i915_request *rq)
if (rps_uses_slpc(rps)) {
slpc = rps_to_slpc(rps);

+   GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
+rq->fence.context, rq->fence.seqno);
+
/* Return if old value is non zero */
if (!atomic_fetch_inc(>num_waiters))
schedule_work(>boost_work);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index b7cdeec44bd3..9dbdbab1515a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -227,14 +227,19 @@ static int slpc_force_min_freq(struct intel_guc_slpc 
*slpc, u32 freq)
   static void slpc_boost_work(struct work_struct *work)
   {
struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), 
boost_work);
+   struct intel_rps *rps = _to_gt(slpc)->rps;
int err;

/*
 * Raise min freq to boost. It's possible that
 * this is greater than current max. But it will
 * certainly be limited by RP0. An error setting
-* the min param is not fatal.
+* the min param is not fatal. No need to boost
+* if we are already requesting it.
 */
+   if (intel_rps_get_requested_frequency(rps) == slpc->boost_freq)
+   return;
+

Re: [Intel-gfx] [PATCH v3] drm/i915/slpc: Optmize waitboost for SLPC

2022-10-20 Thread Belgaumkar, Vinay



On 10/20/2022 11:33 AM, Dixit, Ashutosh wrote:

On Wed, 19 Oct 2022 17:29:44 -0700, Vinay Belgaumkar wrote:
Hi Vinay,


Waitboost (when SLPC is enabled) results in a H2G message. This can result
in thousands of messages during a stress test and fill up an already full
CTB. There is no need to request for RP0 if GuC is already requesting the
same.

But how are we sure that the freq will remain at RP0 in the future (when
the waiting request or any requests which are ahead execute)?

In the current waitboost implementation, set_param is sent to GuC ahead of
the waiting request to ensure that the freq would be max when this waiting
request executed on the GPU and the freq is kept at max till this request
retires (considering just one waiting request). How can we ensure this if
we don't send the waitboost set_param to GuC?


There is no way to guarantee the frequency will remain at RP0 till the 
request retires. As a theoretical example, lets say the request boosted 
freq to RP0, but a user changed min freq using sysfs immediately after.


Waitboost is done by a pending request to "hurry" the current requests. 
If GT is already at boost frequency, that purpose is served. Also, host 
algorithm already has this optimization as well.


Thanks,

Vinay.



I had assumed we'll do this optimization for server parts where min is
already RP0 in which case we can completely disable waitboost. But this
patch is something else.

Thanks.
--
Ashutosh



v2: Add the tracing back, and check requested freq
in the worker thread (Tvrtko)
v3: Check requested freq in dec_waiters as well

Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/intel_rps.c |  3 +++
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 14 +++---
  2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index fc23c562d9b2..18b75cf08d1b 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1016,6 +1016,9 @@ void intel_rps_boost(struct i915_request *rq)
if (rps_uses_slpc(rps)) {
slpc = rps_to_slpc(rps);

+   GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
+rq->fence.context, rq->fence.seqno);
+
/* Return if old value is non zero */
if (!atomic_fetch_inc(>num_waiters))
schedule_work(>boost_work);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index b7cdeec44bd3..9dbdbab1515a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -227,14 +227,19 @@ static int slpc_force_min_freq(struct intel_guc_slpc 
*slpc, u32 freq)
  static void slpc_boost_work(struct work_struct *work)
  {
struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), 
boost_work);
+   struct intel_rps *rps = _to_gt(slpc)->rps;
int err;

/*
 * Raise min freq to boost. It's possible that
 * this is greater than current max. But it will
 * certainly be limited by RP0. An error setting
-* the min param is not fatal.
+* the min param is not fatal. No need to boost
+* if we are already requesting it.
 */
+   if (intel_rps_get_requested_frequency(rps) == slpc->boost_freq)
+   return;
+
mutex_lock(>lock);
if (atomic_read(>num_waiters)) {
err = slpc_force_min_freq(slpc, slpc->boost_freq);
@@ -728,6 +733,7 @@ int intel_guc_slpc_set_boost_freq(struct intel_guc_slpc 
*slpc, u32 val)

  void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc)
  {
+   struct intel_rps *rps = _to_gt(slpc)->rps;
/*
 * Return min back to the softlimit.
 * This is called during request retire,
@@ -735,8 +741,10 @@ void intel_guc_slpc_dec_waiters(struct intel_guc_slpc 
*slpc)
 * set_param fails.
 */
mutex_lock(>lock);
-   if (atomic_dec_and_test(>num_waiters))
-   slpc_force_min_freq(slpc, slpc->min_freq_softlimit);
+   if (atomic_dec_and_test(>num_waiters)) {
+   if (intel_rps_get_requested_frequency(rps) != 
slpc->min_freq_softlimit)
+   slpc_force_min_freq(slpc, slpc->min_freq_softlimit);
+   }
mutex_unlock(>lock);
  }

--
2.35.1



Re: [PATCH v2] drm/i915/slpc: Optmize waitboost for SLPC

2022-10-19 Thread Belgaumkar, Vinay



On 10/19/2022 4:05 PM, Vinay Belgaumkar wrote:

Waitboost (when SLPC is enabled) results in a H2G message. This can result
in thousands of messages during a stress test and fill up an already full
CTB. There is no need to request for RP0 if GuC is already requesting the
same.

v2: Add the tracing back, and check requested freq
in the worker thread (Tvrtko)

Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/intel_rps.c | 3 +++
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 7 ++-
  2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index fc23c562d9b2..18b75cf08d1b 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1016,6 +1016,9 @@ void intel_rps_boost(struct i915_request *rq)
if (rps_uses_slpc(rps)) {
slpc = rps_to_slpc(rps);
  
+			GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",

+rq->fence.context, rq->fence.seqno);
+
/* Return if old value is non zero */
if (!atomic_fetch_inc(>num_waiters))


The issue when we move the req freq check into the slpc_work is that we 
are incrementing num_waiters. That will trigger a de-boost and result in 
a H2G. Need to check the req frequency there as well.


Thanks,

Vinay.


schedule_work(>boost_work);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index b7cdeec44bd3..7ab96221be7e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -227,14 +227,19 @@ static int slpc_force_min_freq(struct intel_guc_slpc 
*slpc, u32 freq)
  static void slpc_boost_work(struct work_struct *work)
  {
struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), 
boost_work);
+   struct intel_rps *rps = _to_gt(slpc)->rps;
int err;
  
  	/*

 * Raise min freq to boost. It's possible that
 * this is greater than current max. But it will
 * certainly be limited by RP0. An error setting
-* the min param is not fatal.
+* the min param is not fatal. No need to boost
+* if we are already requesting it.
 */
+   if (intel_rps_get_requested_frequency(rps) == slpc->boost_freq)
+   return;
+
mutex_lock(>lock);
if (atomic_read(>num_waiters)) {
err = slpc_force_min_freq(slpc, slpc->boost_freq);


Re: [Intel-gfx] [PATCH] drm/i915/slpc: Optmize waitboost for SLPC

2022-10-19 Thread Belgaumkar, Vinay



On 10/19/2022 2:12 PM, Belgaumkar, Vinay wrote:


On 10/19/2022 12:40 AM, Tvrtko Ursulin wrote:


On 18/10/2022 23:15, Vinay Belgaumkar wrote:
Waitboost (when SLPC is enabled) results in a H2G message. This can 
result
in thousands of messages during a stress test and fill up an already 
full
CTB. There is no need to request for RP0 if GuC is already 
requesting the

same.

Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/intel_rps.c | 9 -
  1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c

index fc23c562d9b2..a20ae4fceac8 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1005,13 +1005,20 @@ void intel_rps_dec_waiters(struct intel_rps 
*rps)

  void intel_rps_boost(struct i915_request *rq)
  {
  struct intel_guc_slpc *slpc;
+    struct intel_rps *rps = _ONCE(rq->engine)->gt->rps;
    if (i915_request_signaled(rq) || 
i915_request_has_waitboost(rq))

  return;
  +    /* If GuC is already requesting RP0, skip */
+    if (rps_uses_slpc(rps)) {
+    slpc = rps_to_slpc(rps);
+    if (intel_rps_get_requested_frequency(rps) == slpc->rp0_freq)

One correction here is this should be slpc->boost_freq.

+    return;
+    }
+


Feels a little bit like a layering violation. Wait boost reference 
counts and request markings will changed based on asynchronous state 
- a mmio read.


Also, a little below we have this:

"""
/* Serializes with i915_request_retire() */
if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, >fence.flags)) {
    struct intel_rps *rps = _ONCE(rq->engine)->gt->rps;

    if (rps_uses_slpc(rps)) {
    slpc = rps_to_slpc(rps);

    /* Return if old value is non zero */
    if (!atomic_fetch_inc(>num_waiters))

***>>>> Wouldn't it skip doing anything here already? <<<<***
It will skip only if boost is already happening. This patch is trying 
to prevent even that first one if possible.


    schedule_work(>boost_work);

    return;
    }

    if (atomic_fetch_inc(>num_waiters))
    return;
"""

But I wonder if this is not a layering violation already. Looks like 
one for me at the moment. And as it happens there is an ongoing debug 
of clvk slowness where I was a bit puzzled by the lack of "boost 
fence" in trace_printk logs - but now I see how that happens. Does 
not feel right to me that we lose that tracing with SLPC.
Agreed. Will add the trace to the SLPC case as well.  However, the 
question is what does that trace indicate? Even in the host case, we 
log the trace, but may skip the actual boost as the req is already 
matching boost freq. IMO, we should log the trace only when we 
actually decide to boost.
On second thoughts, that trace only tracks the boost fence, which is set 
in this case. So, might be ok to have it regardless. We count the 
num_boosts anyways if we ever wanted to know how many of those actually 
went on to boost the freq.


So in general - why the correct approach wouldn't be to solve this in 
the worker - which perhaps should fork to slpc specific branch and do 
the consolidations/skips based on mmio reads in there?


sure, I can move the mmio read to the SLPC worker thread.

Thanks,

Vinay.



Regards,

Tvrtko


  /* Serializes with i915_request_retire() */
  if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, >fence.flags)) {
-    struct intel_rps *rps = _ONCE(rq->engine)->gt->rps;
    if (rps_uses_slpc(rps)) {
  slpc = rps_to_slpc(rps);


Re: [Intel-gfx] [PATCH] drm/i915/slpc: Optmize waitboost for SLPC

2022-10-19 Thread Belgaumkar, Vinay



On 10/19/2022 12:40 AM, Tvrtko Ursulin wrote:


On 18/10/2022 23:15, Vinay Belgaumkar wrote:
Waitboost (when SLPC is enabled) results in a H2G message. This can 
result
in thousands of messages during a stress test and fill up an already 
full
CTB. There is no need to request for RP0 if GuC is already requesting 
the

same.

Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/intel_rps.c | 9 -
  1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c

index fc23c562d9b2..a20ae4fceac8 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1005,13 +1005,20 @@ void intel_rps_dec_waiters(struct intel_rps 
*rps)

  void intel_rps_boost(struct i915_request *rq)
  {
  struct intel_guc_slpc *slpc;
+    struct intel_rps *rps = _ONCE(rq->engine)->gt->rps;
    if (i915_request_signaled(rq) || i915_request_has_waitboost(rq))
  return;
  +    /* If GuC is already requesting RP0, skip */
+    if (rps_uses_slpc(rps)) {
+    slpc = rps_to_slpc(rps);
+    if (intel_rps_get_requested_frequency(rps) == slpc->rp0_freq)

One correction here is this should be slpc->boost_freq.

+    return;
+    }
+


Feels a little bit like a layering violation. Wait boost reference 
counts and request markings will changed based on asynchronous state - 
a mmio read.


Also, a little below we have this:

"""
/* Serializes with i915_request_retire() */
if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, >fence.flags)) {
    struct intel_rps *rps = _ONCE(rq->engine)->gt->rps;

    if (rps_uses_slpc(rps)) {
    slpc = rps_to_slpc(rps);

    /* Return if old value is non zero */
    if (!atomic_fetch_inc(>num_waiters))

*** Wouldn't it skip doing anything here already? ***
It will skip only if boost is already happening. This patch is trying to 
prevent even that first one if possible.


    schedule_work(>boost_work);

    return;
    }

    if (atomic_fetch_inc(>num_waiters))
    return;
"""

But I wonder if this is not a layering violation already. Looks like 
one for me at the moment. And as it happens there is an ongoing debug 
of clvk slowness where I was a bit puzzled by the lack of "boost 
fence" in trace_printk logs - but now I see how that happens. Does not 
feel right to me that we lose that tracing with SLPC.
Agreed. Will add the trace to the SLPC case as well.  However, the 
question is what does that trace indicate? Even in the host case, we log 
the trace, but may skip the actual boost as the req is already matching 
boost freq. IMO, we should log the trace only when we actually decide to 
boost.


So in general - why the correct approach wouldn't be to solve this in 
the worker - which perhaps should fork to slpc specific branch and do 
the consolidations/skips based on mmio reads in there?


sure, I can move the mmio read to the SLPC worker thread.

Thanks,

Vinay.



Regards,

Tvrtko


  /* Serializes with i915_request_retire() */
  if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, >fence.flags)) {
-    struct intel_rps *rps = _ONCE(rq->engine)->gt->rps;
    if (rps_uses_slpc(rps)) {
  slpc = rps_to_slpc(rps);


Re: [PATCH v2] drm/i915/slpc: Use platform limits for min/max frequency

2022-10-13 Thread Belgaumkar, Vinay



On 10/13/2022 3:28 PM, Dixit, Ashutosh wrote:

On Thu, 13 Oct 2022 08:55:24 -0700, Vinay Belgaumkar wrote:
Hi Vinay,


GuC will set the min/max frequencies to theoretical max on
ATS-M. This will break kernel ABI, so limit min/max frequency
to RP0(platform max) instead.

Isn't what we are calling "theoretical max" or "RPmax" really just -1U
(0x)? Though I have heard this is not a max value but -1U indicates
FW default values unmodified by host SW, which would mean frequencies are
fully controlled by FW (min == max == -1U). But if this were the case I
don't know why this would be the case only for server, why doesn't FW set
these for clients too to indicate it is fully in control?
FW sets max to -1U for client products(we already pull it down to RP0). 
It additionally makes min=max for server parts.


So the question what does -1U actually represent? Is it the RPmax value or
does -1U represent "FW defaults"?

Also this concept of using -1U as "FW defaults" is present in Level0/OneAPI
(and likely in firmware) but we seem to have blocked in the i915 ABI.

I understand we may not be able to make such changes at present but this
provides some context for the review comments below.


diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index fdd895f73f9f..11613d373a49 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -263,6 +263,7 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc)

slpc->max_freq_softlimit = 0;
slpc->min_freq_softlimit = 0;
+   slpc->min_is_rpmax = false;

slpc->boost_freq = 0;
atomic_set(>num_waiters, 0);
@@ -588,6 +589,31 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc)
return 0;
  }

+static bool is_slpc_min_freq_rpmax(struct intel_guc_slpc *slpc)
+{
+   int slpc_min_freq;
+
+   if (intel_guc_slpc_get_min_freq(slpc, _min_freq))
+   return false;
+
+   if (slpc_min_freq > slpc->rp0_freq)
or >=.

If what we are calling "rpmax" really -1U then why don't we just check for
-1U here?

u32 slpc_min_freq;

if (slpc_min_freq == -1U)
That'll work similarly too. Only time slpc_min_freq is greater than rp0 
is for a server part.



+   return true;
+   else
+   return false;
+}
+
+static void update_server_min_softlimit(struct intel_guc_slpc *slpc)
+{
+   /* For server parts, SLPC min will be at RPMax.
+* Use min softlimit to clamp it to RP0 instead.
+*/
+   if (is_slpc_min_freq_rpmax(slpc) &&
+   !slpc->min_freq_softlimit) {
+   slpc->min_is_rpmax = true;
+   slpc->min_freq_softlimit = slpc->rp0_freq;

Isn't it safer to use a platform check such as IS_ATSM or IS_XEHPSDV (or
even #define IS_SERVER()) to set min freq to RP0 rather than this -1U value
from FW? What if -1U means "FW defaults" and FW starts setting this on
client products tomorrow?


We are not checking for -1 specifically, but only if FW has set min > 
RP0 as an indicator. Also, might be worth having IS_SERVER at some point 
if there are other places we need this info as well.




Also, we need to set gt->defaults.min_freq here.


yes, need to add that.

Thanks,

Vinay.



Thanks.
--
Ashutosh



+   }
+}
+
  static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc)
  {
/* Force SLPC to used platform rp0 */
@@ -647,6 +673,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)

slpc_get_rp_values(slpc);

+   /* Handle the case where min=max=RPmax */
+   update_server_min_softlimit(slpc);
+
/* Set SLPC max limit to RP0 */
ret = slpc_use_fused_rp0(slpc);
if (unlikely(ret)) {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
index 73d208123528..a6ef53b04e04 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
@@ -19,6 +19,9 @@ struct intel_guc_slpc {
bool supported;
bool selected;

+   /* Indicates this is a server part */
+   bool min_is_rpmax;
+
/* platform frequency limits */
u32 min_freq;
u32 rp0_freq;
--
2.35.1



Re: [PATCH] drm/i915/slpc: Use platform limits for min/max frequency

2022-10-13 Thread Belgaumkar, Vinay



On 10/13/2022 8:14 AM, Das, Nirmoy wrote:


On 10/12/2022 8:26 PM, Vinay Belgaumkar wrote:

GuC will set the min/max frequencies to theoretical max on
ATS-M. This will break kernel ABI, so limit min/max frequency
to RP0(platform max) instead.

Also modify the SLPC selftest to update the min frequency
when we have a server part so that we can iterate between
platform min and max.

Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/selftest_slpc.c   | 40 +--
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 29 ++
  .../gpu/drm/i915/gt/uc/intel_guc_slpc_types.h |  3 ++
  3 files changed, 60 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c 
b/drivers/gpu/drm/i915/gt/selftest_slpc.c

index 4c6e9257e593..1f84362af737 100644
--- a/drivers/gpu/drm/i915/gt/selftest_slpc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c
@@ -234,6 +234,7 @@ static int run_test(struct intel_gt *gt, int 
test_type)

  enum intel_engine_id id;
  struct igt_spinner spin;
  u32 slpc_min_freq, slpc_max_freq;
+    u32 saved_min_freq;
  int err = 0;
    if (!intel_uc_uses_guc_slpc(>uc))
@@ -252,20 +253,35 @@ static int run_test(struct intel_gt *gt, int 
test_type)

  return -EIO;
  }
  -    /*
- * FIXME: With efficient frequency enabled, GuC can request
- * frequencies higher than the SLPC max. While this is fixed
- * in GuC, we level set these tests with RPn as min.
- */
-    err = slpc_set_min_freq(slpc, slpc->min_freq);
-    if (err)
-    return err;
-
  if (slpc->min_freq == slpc->rp0_freq) {
-    pr_err("Min/Max are fused to the same value\n");
-    return -EINVAL;
+    /* Servers will have min/max clamped to RP0 */



This should be "server parts". Tested the patch with Riana's suggested 
changes.


Acked-by: Nirmoy Das  with above changes.


Thanks, v2 sent with corrections.

Vinay.




Nirmoy


+    if (slpc->min_is_rpmax) {
+    err = slpc_set_min_freq(slpc, slpc->min_freq);
+    if (err) {
+    pr_err("Unable to update min freq on server part");
+    return err;
+    }
+
+    } else {
+    pr_err("Min/Max are fused to the same value\n");
+    return -EINVAL;
+    }
+    } else {
+    /*
+ * FIXME: With efficient frequency enabled, GuC can request
+ * frequencies higher than the SLPC max. While this is fixed
+ * in GuC, we level set these tests with RPn as min.
+ */
+    err = slpc_set_min_freq(slpc, slpc->min_freq);
+    if (err)
+    return err;
  }
  +    saved_min_freq = slpc_min_freq;
+
+    /* New temp min freq = RPn */
+    slpc_min_freq = slpc->min_freq;
+
  intel_gt_pm_wait_for_idle(gt);
  intel_gt_pm_get(gt);
  for_each_engine(engine, gt, id) {
@@ -347,7 +363,7 @@ static int run_test(struct intel_gt *gt, int 
test_type)

    /* Restore min/max frequencies */
  slpc_set_max_freq(slpc, slpc_max_freq);
-    slpc_set_min_freq(slpc, slpc_min_freq);
+    slpc_set_min_freq(slpc, saved_min_freq);
    if (igt_flush_test(gt->i915))
  err = -EIO;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c

index fdd895f73f9f..11613d373a49 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -263,6 +263,7 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
    slpc->max_freq_softlimit = 0;
  slpc->min_freq_softlimit = 0;
+    slpc->min_is_rpmax = false;
    slpc->boost_freq = 0;
  atomic_set(>num_waiters, 0);
@@ -588,6 +589,31 @@ static int slpc_set_softlimits(struct 
intel_guc_slpc *slpc)

  return 0;
  }
  +static bool is_slpc_min_freq_rpmax(struct intel_guc_slpc *slpc)
+{
+    int slpc_min_freq;
+
+    if (intel_guc_slpc_get_min_freq(slpc, _min_freq))
+    return false;
+
+    if (slpc_min_freq > slpc->rp0_freq)
+    return true;
+    else
+    return false;
+}
+
+static void update_server_min_softlimit(struct intel_guc_slpc *slpc)
+{
+    /* For server parts, SLPC min will be at RPMax.
+ * Use min softlimit to clamp it to RP0 instead.
+ */
+    if (is_slpc_min_freq_rpmax(slpc) &&
+    !slpc->min_freq_softlimit) {
+    slpc->min_is_rpmax = true;
+    slpc->min_freq_softlimit = slpc->rp0_freq;
+    }
+}
+
  static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc)
  {
  /* Force SLPC to used platform rp0 */
@@ -647,6 +673,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc 
*slpc)

    slpc_get_rp_values(slpc);
  +    /* Handle the case where min=max=RPmax */
+    update_server_min_softlimit(slpc);
+
  /* Set SLPC max limit to RP0 */
  ret = slpc_use_fused_rp0(slpc);
  if (unlikely(ret)) {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h

index 73d208123528..a6ef53b04e04 100644
--- 

Re: [Intel-gfx] [PATCH] drm/i915/slpc: Use platform limits for min/max frequency

2022-10-13 Thread Belgaumkar, Vinay



On 10/13/2022 4:34 AM, Tauro, Riana wrote:



On 10/12/2022 11:56 PM, Vinay Belgaumkar wrote:

GuC will set the min/max frequencies to theoretical max on
ATS-M. This will break kernel ABI, so limit min/max frequency
to RP0(platform max) instead.

Also modify the SLPC selftest to update the min frequency
when we have a server part so that we can iterate between
platform min and max.

Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/selftest_slpc.c   | 40 +--
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 29 ++
  .../gpu/drm/i915/gt/uc/intel_guc_slpc_types.h |  3 ++
  3 files changed, 60 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c 
b/drivers/gpu/drm/i915/gt/selftest_slpc.c

index 4c6e9257e593..1f84362af737 100644
--- a/drivers/gpu/drm/i915/gt/selftest_slpc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c
@@ -234,6 +234,7 @@ static int run_test(struct intel_gt *gt, int 
test_type)

  enum intel_engine_id id;
  struct igt_spinner spin;
  u32 slpc_min_freq, slpc_max_freq;
+    u32 saved_min_freq;
  int err = 0;
    if (!intel_uc_uses_guc_slpc(>uc))
@@ -252,20 +253,35 @@ static int run_test(struct intel_gt *gt, int 
test_type)

  return -EIO;
  }
  -    /*
- * FIXME: With efficient frequency enabled, GuC can request
- * frequencies higher than the SLPC max. While this is fixed
- * in GuC, we level set these tests with RPn as min.
- */
-    err = slpc_set_min_freq(slpc, slpc->min_freq);
-    if (err)
-    return err;
-
  if (slpc->min_freq == slpc->rp0_freq) {

This has to be (slpc_min_freq == slpc_max_freq) instead of
(slpc->min_freq == slpc->rp0_freq).

Servers will have min/max softlimits clamped to RP0


Agree. will send out v2.

Thanks,

Vinay.



Thanks
Riana

-    pr_err("Min/Max are fused to the same value\n");
-    return -EINVAL;
+    /* Servers will have min/max clamped to RP0 */
+    if (slpc->min_is_rpmax) {
+    err = slpc_set_min_freq(slpc, slpc->min_freq);
+    if (err) {
+    pr_err("Unable to update min freq on server part");
+    return err;
+    }
+
+    } else {
+    pr_err("Min/Max are fused to the same value\n");
+    return -EINVAL;
+    }
+    } else {
+    /*
+ * FIXME: With efficient frequency enabled, GuC can request
+ * frequencies higher than the SLPC max. While this is fixed
+ * in GuC, we level set these tests with RPn as min.
+ */
+    err = slpc_set_min_freq(slpc, slpc->min_freq);
+    if (err)
+    return err;
  }
  +    saved_min_freq = slpc_min_freq;
+
+    /* New temp min freq = RPn */
+    slpc_min_freq = slpc->min_freq;
+
  intel_gt_pm_wait_for_idle(gt);
  intel_gt_pm_get(gt);
  for_each_engine(engine, gt, id) {
@@ -347,7 +363,7 @@ static int run_test(struct intel_gt *gt, int 
test_type)

    /* Restore min/max frequencies */
  slpc_set_max_freq(slpc, slpc_max_freq);
-    slpc_set_min_freq(slpc, slpc_min_freq);
+    slpc_set_min_freq(slpc, saved_min_freq);
    if (igt_flush_test(gt->i915))
  err = -EIO;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c

index fdd895f73f9f..11613d373a49 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -263,6 +263,7 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
    slpc->max_freq_softlimit = 0;
  slpc->min_freq_softlimit = 0;
+    slpc->min_is_rpmax = false;
    slpc->boost_freq = 0;
  atomic_set(>num_waiters, 0);
@@ -588,6 +589,31 @@ static int slpc_set_softlimits(struct 
intel_guc_slpc *slpc)

  return 0;
  }
  +static bool is_slpc_min_freq_rpmax(struct intel_guc_slpc *slpc)
+{
+    int slpc_min_freq;
+
+    if (intel_guc_slpc_get_min_freq(slpc, _min_freq))
+    return false;
+
+    if (slpc_min_freq > slpc->rp0_freq)
+    return true;
+    else
+    return false;
+}
+
+static void update_server_min_softlimit(struct intel_guc_slpc *slpc)
+{
+    /* For server parts, SLPC min will be at RPMax.
+ * Use min softlimit to clamp it to RP0 instead.
+ */
+    if (is_slpc_min_freq_rpmax(slpc) &&
+    !slpc->min_freq_softlimit) {
+    slpc->min_is_rpmax = true;
+    slpc->min_freq_softlimit = slpc->rp0_freq;
+    }
+}
+
  static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc)
  {
  /* Force SLPC to used platform rp0 */
@@ -647,6 +673,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc 
*slpc)

    slpc_get_rp_values(slpc);
  +    /* Handle the case where min=max=RPmax */
+    update_server_min_softlimit(slpc);
+
  /* Set SLPC max limit to RP0 */
  ret = slpc_use_fused_rp0(slpc);
  if (unlikely(ret)) {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h

index 

Re: [PATCH 1/2] drm/i915: Add a wrapper for frequency debugfs

2022-10-04 Thread Belgaumkar, Vinay



On 10/4/2022 12:36 AM, Jani Nikula wrote:

On Mon, 03 Oct 2022, Vinay Belgaumkar  wrote:

Move it to the RPS source file.

The idea was that the 1st patch would be non-functional code
movement. This is still a functional change.

Or you can do the functional changes first, and then move code, as long
as you don't combine code movement with functional changes.

Yup, will move the SLPC check to the second patch as well.


Please also mark your patch revisions and note the changes. There's no
indication this series is v2.


ok.

Thanks,

Vinay.



BR,
Jani.


Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 157 +---
  drivers/gpu/drm/i915/gt/intel_rps.c   | 169 ++
  drivers/gpu/drm/i915/gt/intel_rps.h   |   3 +
  3 files changed, 173 insertions(+), 156 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 9fd4d9255a97..4319d6cdafe2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -344,162 +344,7 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, 
struct drm_printer *p)
drm_printf(p, "efficient (RPe) frequency: %d MHz\n",
   intel_gpu_freq(rps, rps->efficient_freq));
} else if (GRAPHICS_VER(i915) >= 6) {
-   u32 rp_state_limits;
-   u32 gt_perf_status;
-   struct intel_rps_freq_caps caps;
-   u32 rpmodectl, rpinclimit, rpdeclimit;
-   u32 rpstat, cagf, reqf;
-   u32 rpcurupei, rpcurup, rpprevup;
-   u32 rpcurdownei, rpcurdown, rpprevdown;
-   u32 rpupei, rpupt, rpdownei, rpdownt;
-   u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
-
-   rp_state_limits = intel_uncore_read(uncore, 
GEN6_RP_STATE_LIMITS);
-   gen6_rps_get_freq_caps(rps, );
-   if (IS_GEN9_LP(i915))
-   gt_perf_status = intel_uncore_read(uncore, 
BXT_GT_PERF_STATUS);
-   else
-   gt_perf_status = intel_uncore_read(uncore, 
GEN6_GT_PERF_STATUS);
-
-   /* RPSTAT1 is in the GT power well */
-   intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
-
-   reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ);
-   if (GRAPHICS_VER(i915) >= 9) {
-   reqf >>= 23;
-   } else {
-   reqf &= ~GEN6_TURBO_DISABLE;
-   if (IS_HASWELL(i915) || IS_BROADWELL(i915))
-   reqf >>= 24;
-   else
-   reqf >>= 25;
-   }
-   reqf = intel_gpu_freq(rps, reqf);
-
-   rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL);
-   rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
-   rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
-
-   rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1);
-   rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & 
GEN6_CURICONT_MASK;
-   rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & 
GEN6_CURBSYTAVG_MASK;
-   rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & 
GEN6_CURBSYTAVG_MASK;
-   rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & 
GEN6_CURIAVG_MASK;
-   rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & 
GEN6_CURBSYTAVG_MASK;
-   rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & 
GEN6_CURBSYTAVG_MASK;
-
-   rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI);
-   rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
-
-   rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
-   rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
-
-   cagf = intel_rps_read_actual_frequency(rps);
-
-   intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
-
-   if (GRAPHICS_VER(i915) >= 11) {
-   pm_ier = intel_uncore_read(uncore, 
GEN11_GPM_WGBOXPERF_INTR_ENABLE);
-   pm_imr = intel_uncore_read(uncore, 
GEN11_GPM_WGBOXPERF_INTR_MASK);
-   /*
-* The equivalent to the PM ISR & IIR cannot be read
-* without affecting the current state of the system
-*/
-   pm_isr = 0;
-   pm_iir = 0;
-   } else if (GRAPHICS_VER(i915) >= 8) {
-   pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2));
-   pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2));
-   pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2));
-   pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2));
- 

Re: [PATCH] drm/i915/guc/slpc: Allow SLPC to use efficient frequency

2022-08-24 Thread Belgaumkar, Vinay



On 8/15/2022 10:32 AM, Rodrigo Vivi wrote:

On Sun, Aug 14, 2022 at 04:46:54PM -0700, Vinay Belgaumkar wrote:

Host Turbo operates at efficient frequency when GT is not idle unless
the user or workload has forced it to a higher level. Replicate the same
behavior in SLPC by allowing the algorithm to use efficient frequency.
We had disabled it during boot due to concerns that it might break
kernel ABI for min frequency. However, this is not the case since
SLPC will still abide by the (min,max) range limits.

With this change, min freq will be at efficient frequency level at init
instead of fused min (RPn). If user chooses to reduce min freq below the
efficient freq, we will turn off usage of efficient frequency and honor
the user request. When a higher value is written, it will get toggled
back again.

The patch also corrects the register which needs to be read for obtaining
the correct efficient frequency for Gen9+.

We see much better perf numbers with benchmarks like glmark2 with
efficient frequency usage enabled as expected.

BugLink: https://gitlab.freedesktop.org/drm/intel/-/issues/5468

Cc: Rodrigo Vivi 

First of all sorry for looking to the old patch first... I was delayed in my 
inbox flow.


Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/intel_rps.c |  3 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 66 +++--
  drivers/gpu/drm/i915/intel_mchbar_regs.h|  3 +
  3 files changed, 40 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index c7d381ad90cf..281a086fc265 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1108,6 +1108,9 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct 
intel_rps_freq_caps *c
} else {
caps->rp0_freq = (rp_state_cap >>  0) & 0xff;
caps->rp1_freq = (rp_state_cap >>  8) & 0xff;
+   caps->rp1_freq = REG_FIELD_GET(RPE_MASK,
+  
intel_uncore_read(to_gt(i915)->uncore,
+  GEN10_FREQ_INFO_REC));

This register is only gen10+ while the func is gen6+.
either we handle the platform properly or we create a new rpe_freq tracker 
somewhere
and if that's available we use this rpe, otherwise we use the hw fused rp1 
which is a good
enough, but it is not the actual one resolved by pcode, like this new RPe one.

sure.



caps->min_freq = (rp_state_cap >> 16) & 0xff;
}
  
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c

index e1fa1f32f29e..70a2af5f518d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -465,6 +465,29 @@ int intel_guc_slpc_get_max_freq(struct intel_guc_slpc 
*slpc, u32 *val)
return ret;
  }
  
+static int slpc_ignore_eff_freq(struct intel_guc_slpc *slpc, bool ignore)

I know this code was already there, but I do have some questions around this
and maybe we can simplify now that are touching this function.


+{
+   int ret = 0;
+
+   if (ignore) {
+   ret = slpc_set_param(slpc,
+SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
+ignore);
+   if (!ret)
+   return slpc_set_param(slpc,
+ 
SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+ slpc->min_freq);

why do we need to touch this min request here?

true, not needed anymore.



+   } else {
+   ret = slpc_unset_param(slpc,
+  SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY);

do we really need the unset param?

for me using set_param(SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, freq < rpe_freq)
was enough...


Yup, removed this helper function as discussed.

Thanks,

Vinay.




+   if (!ret)
+   return slpc_unset_param(slpc,
+   
SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ);
+   }
+
+   return ret;
+}
+
  /**
   * intel_guc_slpc_set_min_freq() - Set min frequency limit for SLPC.
   * @slpc: pointer to intel_guc_slpc.
@@ -491,6 +514,14 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc 
*slpc, u32 val)
  
  	with_intel_runtime_pm(>runtime_pm, wakeref) {
  
+		/* Ignore efficient freq if lower min freq is requested */

+   ret = slpc_ignore_eff_freq(slpc, val < slpc->rp1_freq);
+   if (unlikely(ret)) {
+   i915_probe_error(i915, "Failed to toggle efficient freq 
(%pe)\n",
+ERR_PTR(ret));
+   return ret;
+   }
+
ret = slpc_set_param(slpc,
 SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
 

Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Allow SLPC to use efficient frequency

2022-08-15 Thread Belgaumkar, Vinay



On 8/15/2022 9:51 AM, Rodrigo Vivi wrote:

On Tue, Aug 09, 2022 at 05:03:06PM -0700, Vinay Belgaumkar wrote:

Host Turbo operates at efficient frequency when GT is not idle unless
the user or workload has forced it to a higher level. Replicate the same
behavior in SLPC by allowing the algorithm to use efficient frequency.
We had disabled it during boot due to concerns that it might break
kernel ABI for min frequency. However, this is not the case, since
SLPC will still abide by the (min,max) range limits and pcode forces
frequency to 0 anyways when GT is in C6.

We also see much better perf numbers with benchmarks like glmark2 with
efficient frequency usage enabled.

Fixes: 025cb07bebfa ("drm/i915/guc/slpc: Cache platform frequency limits")

Signed-off-by: Vinay Belgaumkar 

I'm honestly surprised that our CI passed cleanly. What happens when user
request both min and max < rpe?

I'm sure that in this case GuC SLPC will put us to rpe ignoring our requests.
Or is this good enough for the users expectation because of the soft limits
showing the requested freq and we not asking to guc what it currently has as
minimal?

I just want to be sure that we are not causing any confusion for end users
out there in the case they request some min/max below RPe and start seeing
mismatches on the expectation because GuC is forcing the real min request
to RPe.

My suggestion is to ignore the RPe whenever we have a min request below it.
So GuC respects our (and users) chosen min. And restore whenever min request
is abobe rpe.


Yup, I have already sent a patch yesterday with that change, doesn't 
look like CI has run on it yet. This was the old version.


Thanks,

Vinay.



Thanks,
Rodrigo.


---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 52 -
  1 file changed, 52 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index e1fa1f32f29e..4b824da3048a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -137,17 +137,6 @@ static int guc_action_slpc_set_param(struct intel_guc 
*guc, u8 id, u32 value)
return ret > 0 ? -EPROTO : ret;
  }
  
-static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id)

-{
-   u32 request[] = {
-   GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
-   SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1),
-   id,
-   };
-
-   return intel_guc_send(guc, request, ARRAY_SIZE(request));
-}
-
  static bool slpc_is_running(struct intel_guc_slpc *slpc)
  {
return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING;
@@ -201,16 +190,6 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 
id, u32 value)
return ret;
  }
  
-static int slpc_unset_param(struct intel_guc_slpc *slpc,

-   u8 id)
-{
-   struct intel_guc *guc = slpc_to_guc(slpc);
-
-   GEM_BUG_ON(id >= SLPC_MAX_PARAM);
-
-   return guc_action_slpc_unset_param(guc, id);
-}
-
  static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
  {
struct drm_i915_private *i915 = slpc_to_i915(slpc);
@@ -597,29 +576,6 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc)
return 0;
  }
  
-static int slpc_ignore_eff_freq(struct intel_guc_slpc *slpc, bool ignore)

-{
-   int ret = 0;
-
-   if (ignore) {
-   ret = slpc_set_param(slpc,
-SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
-ignore);
-   if (!ret)
-   return slpc_set_param(slpc,
- 
SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
- slpc->min_freq);
-   } else {
-   ret = slpc_unset_param(slpc,
-  SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY);
-   if (!ret)
-   return slpc_unset_param(slpc,
-   
SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ);
-   }
-
-   return ret;
-}
-
  static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc)
  {
/* Force SLPC to used platform rp0 */
@@ -679,14 +635,6 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
  
  	slpc_get_rp_values(slpc);
  
-	/* Ignore efficient freq and set min to platform min */

-   ret = slpc_ignore_eff_freq(slpc, true);
-   if (unlikely(ret)) {
-   i915_probe_error(i915, "Failed to set SLPC min to RPn (%pe)\n",
-ERR_PTR(ret));
-   return ret;
-   }
-
/* Set SLPC max limit to RP0 */
ret = slpc_use_fused_rp0(slpc);
if (unlikely(ret)) {
--
2.35.1



Re: [PATCH] drm/i915/guc/slpc: Allow SLPC to use efficient frequency

2022-08-14 Thread Belgaumkar, Vinay



On 8/14/2022 4:46 PM, Vinay Belgaumkar wrote:

Host Turbo operates at efficient frequency when GT is not idle unless
the user or workload has forced it to a higher level. Replicate the same
behavior in SLPC by allowing the algorithm to use efficient frequency.
We had disabled it during boot due to concerns that it might break
kernel ABI for min frequency. However, this is not the case since
SLPC will still abide by the (min,max) range limits.

With this change, min freq will be at efficient frequency level at init
instead of fused min (RPn). If user chooses to reduce min freq below the
efficient freq, we will turn off usage of efficient frequency and honor
the user request. When a higher value is written, it will get toggled
back again.

The patch also corrects the register which needs to be read for obtaining
the correct efficient frequency for Gen9+.

We see much better perf numbers with benchmarks like glmark2 with
efficient frequency usage enabled as expected.

BugLink: https://gitlab.freedesktop.org/drm/intel/-/issues/5468

Cc: Rodrigo Vivi 
Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/intel_rps.c |  3 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 66 +++--
  drivers/gpu/drm/i915/intel_mchbar_regs.h|  3 +
  3 files changed, 40 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index c7d381ad90cf..281a086fc265 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1108,6 +1108,9 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct 
intel_rps_freq_caps *c
} else {
caps->rp0_freq = (rp_state_cap >>  0) & 0xff;
caps->rp1_freq = (rp_state_cap >>  8) & 0xff;


Forgot to remove old code here. Will do so for the next revision as it 
does not affect the patch.


Thanks,

Vinay.


+   caps->rp1_freq = REG_FIELD_GET(RPE_MASK,
+  
intel_uncore_read(to_gt(i915)->uncore,
+  GEN10_FREQ_INFO_REC));
caps->min_freq = (rp_state_cap >> 16) & 0xff;
}
  
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c

index e1fa1f32f29e..70a2af5f518d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -465,6 +465,29 @@ int intel_guc_slpc_get_max_freq(struct intel_guc_slpc 
*slpc, u32 *val)
return ret;
  }
  
+static int slpc_ignore_eff_freq(struct intel_guc_slpc *slpc, bool ignore)

+{
+   int ret = 0;
+
+   if (ignore) {
+   ret = slpc_set_param(slpc,
+SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
+ignore);
+   if (!ret)
+   return slpc_set_param(slpc,
+ 
SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+ slpc->min_freq);
+   } else {
+   ret = slpc_unset_param(slpc,
+  SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY);
+   if (!ret)
+   return slpc_unset_param(slpc,
+   
SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ);
+   }
+
+   return ret;
+}
+
  /**
   * intel_guc_slpc_set_min_freq() - Set min frequency limit for SLPC.
   * @slpc: pointer to intel_guc_slpc.
@@ -491,6 +514,14 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc 
*slpc, u32 val)
  
  	with_intel_runtime_pm(>runtime_pm, wakeref) {
  
+		/* Ignore efficient freq if lower min freq is requested */

+   ret = slpc_ignore_eff_freq(slpc, val < slpc->rp1_freq);
+   if (unlikely(ret)) {
+   i915_probe_error(i915, "Failed to toggle efficient freq 
(%pe)\n",
+ERR_PTR(ret));
+   return ret;
+   }
+
ret = slpc_set_param(slpc,
 SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
 val);
@@ -587,7 +618,9 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc)
return ret;
  
  	if (!slpc->min_freq_softlimit) {

-   slpc->min_freq_softlimit = slpc->min_freq;
+   ret = intel_guc_slpc_get_min_freq(slpc, 
>min_freq_softlimit);
+   if (unlikely(ret))
+   return ret;
slpc_to_gt(slpc)->defaults.min_freq = slpc->min_freq_softlimit;
} else if (slpc->min_freq_softlimit != slpc->min_freq) {
return intel_guc_slpc_set_min_freq(slpc,
@@ -597,29 +630,6 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc)
return 0;
  }
  
-static int slpc_ignore_eff_freq(struct intel_guc_slpc *slpc, bool ignore)

-{
-   int 

Re: [PATCH] drm/i915/guc/slpc: Add a new SLPC selftest

2022-06-27 Thread Belgaumkar, Vinay



On 6/24/2022 8:59 PM, Dixit, Ashutosh wrote:

On Thu, 23 Jun 2022 16:33:20 -0700, Vinay Belgaumkar wrote:

+static int max_granted_freq(struct intel_guc_slpc *slpc, struct intel_rps 
*rps, u32 *max_act_freq)
+{
+   struct intel_gt *gt = rps_to_gt(rps);
+   u32 perf_limit_reasons;
+   int err = 0;

-   igt_spinner_end();
-   st_engine_heartbeat_enable(engine);
-   }
+   err = slpc_set_min_freq(slpc, slpc->rp0_freq);
+   if (err)
+   return err;

-   pr_info("Max actual frequency for %s was %d\n",
-   engine->name, max_act_freq);
+   *max_act_freq =  intel_rps_read_actual_frequency(rps);
+   if (!(*max_act_freq == slpc->rp0_freq)) {

nit but '*max_act_freq != slpc->rp0_freq'



+   /* Check if there was some throttling by pcode */
+   perf_limit_reasons = intel_uncore_read(gt->uncore, 
GT0_PERF_LIMIT_REASONS);

-   /* Actual frequency should rise above min */
-   if (max_act_freq == slpc_min_freq) {
-   pr_err("Actual freq did not rise above min\n");
+   /* If not, this is an error */
+   if (!(perf_limit_reasons && GT0_PERF_LIMIT_REASONS_MASK)) {

Still wrong, should be & not &&


+   pr_err("Pcode did not grant max freq\n");
err = -EINVAL;
-   }
+   } else {
+   pr_info("Pcode throttled frequency 0x%x\n", 
perf_limit_reasons);

Another question, why are we using pr_err/info here rather than
drm_err/info? pr_err/info is ok for mock selftests since there is no drm
device but that is not the case here, I think this is done in other
selftests too but maybe fix this as well if we are making so many changes
here? Anyway can do later too.


Yup, will send a separate patch to change them to drm_err/info.

Thanks,

Vinay.



So let's settle issues in v2 thread first.

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Add a new SLPC selftest

2022-06-27 Thread Belgaumkar, Vinay



On 6/24/2022 8:59 PM, Dixit, Ashutosh wrote:

On Thu, 23 Jun 2022 16:21:46 -0700, Belgaumkar, Vinay wrote:

On 6/22/2022 1:32 PM, Dixit, Ashutosh wrote:

On Fri, 10 Jun 2022 16:47:12 -0700, Vinay Belgaumkar wrote:

This test will validate we can achieve actual frequency of RP0. Pcode
grants frequencies based on what GuC is requesting. However, thermal
throttling can limit what is being granted. Add a test to request for
max, but don't fail the test if RP0 is not granted due to throttle
reasons.

Also optimize the selftest by using a common run_test function to avoid
code duplication.

The refactoring does change the order of operations (changing the freq vs
spawning the spinner) but should be fine I think.

Yes, we now start the spinner outside the for loop, so that freq changes
occur quickly. This ensures we don't mess with SLPC algorithm's history by
frequently restarting the WL in the for loop.

Rename the "clamp" tests to vary_max_freq and vary_min_freq.

Either is ok, but maybe "clamp" names were ok I think since they verify req
freq is clamped at min/max.

True, though clamp usually is associated with limiting, whereas we actually
increase the min.

v2: Fix compile warning

Fixes 8ee2c227822e ("drm/i915/guc/slpc: Add SLPC selftest")
Signed-off-by: Vinay Belgaumkar 
---
   drivers/gpu/drm/i915/gt/selftest_slpc.c | 323 
   1 file changed, 158 insertions(+), 165 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c 
b/drivers/gpu/drm/i915/gt/selftest_slpc.c
index b768cea5943d..099129aae9a5 100644
--- a/drivers/gpu/drm/i915/gt/selftest_slpc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c
@@ -8,6 +8,11 @@
   #define delay_for_h2g() usleep_range(H2G_DELAY, H2G_DELAY + 1)
   #define FREQUENCY_REQ_UNIT   DIV_ROUND_CLOSEST(GT_FREQUENCY_MULTIPLIER, \
  GEN9_FREQ_SCALER)
+enum test_type {
+   VARY_MIN,
+   VARY_MAX,
+   MAX_GRANTED
+};

   static int slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 freq)
   {
@@ -36,147 +41,120 @@ static int slpc_set_max_freq(struct intel_guc_slpc *slpc, 
u32 freq)
return ret;
   }

-static int live_slpc_clamp_min(void *arg)
+static int vary_max_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps,
+ u32 *max_act_freq)

Please run checkpatch, indentation seems off.

I had run it. Not sure why this wasn't caught.

Need to use 'checkpatch --strict'.

ok.



   {
-   struct drm_i915_private *i915 = arg;
-   struct intel_gt *gt = to_gt(i915);
-   struct intel_guc_slpc *slpc = >uc.guc.slpc;
-   struct intel_rps *rps = >rps;
-   struct intel_engine_cs *engine;
-   enum intel_engine_id id;
-   struct igt_spinner spin;
+   u32 step, max_freq, req_freq;
+   u32 act_freq;
u32 slpc_min_freq, slpc_max_freq;
int err = 0;

-   if (!intel_uc_uses_guc_slpc(>uc))
-   return 0;
-
-   if (igt_spinner_init(, gt))
-   return -ENOMEM;
+   slpc_min_freq = slpc->min_freq;
+   slpc_max_freq = slpc->rp0_freq;

nit but we don't really need such variables since we don't change their
values, we should just use slpc->min_freq, slpc->rp0_freq directly. I'd
change this in all places in this patch.

I will remove it from the sub-functions, but will need to keep the one in
the main run_test(). We should query SLPC's min and max and then restore
that at the end of the test. It is possible that SLPC's min is different
from platform min for certain skus.

Sorry, I am not following. The tests are varying freq between platform min
to platform max correct? And platform min can be different from slpc min?
So why don't the tests start at slpc min rather than platform min? Can't
this return error?
Will start the tests from platform min -> platform max, that way we 
remain consistent.


And shouldn't slpc->min set to the real slpc min rather than to the
platform min when slpc initializes (in intel_guc_slpc_enable() or
slpc_get_rp_values())? (I am assuming the issue is only for the min and not
the max but not sure).
Certain conditions may result in SLPC setting the min to a different 
value. We can worry about that in a different patch.


So I'd expect everywhere a consistent set of freq's be used, in run_test()
and the actual vary_min/max_freq tests and also in the main driver.

Agree.



-   if (intel_guc_slpc_get_max_freq(slpc, _max_freq)) {
-   pr_err("Could not get SLPC max freq\n");
-   return -EIO;
-   }
-
-   if (intel_guc_slpc_get_min_freq(slpc, _min_freq)) {
-   pr_err("Could not get SLPC min freq\n");
-   return -EIO;

Why do we need these two function calls? Can't we just use slpc->rp0_freq
and slpc->min_freq as we are doing in the vary_min/max_freq() functions
above?

Same as above.

Also, as mentioned below I think here we should just do:

  

Re: [PATCH] drm/i915/guc/slpc: Add a new SLPC selftest

2022-06-27 Thread Belgaumkar, Vinay



On 6/24/2022 8:59 PM, Dixit, Ashutosh wrote:

On Thu, 23 Jun 2022 16:33:20 -0700, Vinay Belgaumkar wrote:

+static int max_granted_freq(struct intel_guc_slpc *slpc, struct intel_rps 
*rps, u32 *max_act_freq)
+{
+   struct intel_gt *gt = rps_to_gt(rps);
+   u32 perf_limit_reasons;
+   int err = 0;

-   igt_spinner_end();
-   st_engine_heartbeat_enable(engine);
-   }
+   err = slpc_set_min_freq(slpc, slpc->rp0_freq);
+   if (err)
+   return err;

-   pr_info("Max actual frequency for %s was %d\n",
-   engine->name, max_act_freq);
+   *max_act_freq =  intel_rps_read_actual_frequency(rps);
+   if (!(*max_act_freq == slpc->rp0_freq)) {

nit but '*max_act_freq != slpc->rp0_freq'

ok.




+   /* Check if there was some throttling by pcode */
+   perf_limit_reasons = intel_uncore_read(gt->uncore, 
GT0_PERF_LIMIT_REASONS);

-   /* Actual frequency should rise above min */
-   if (max_act_freq == slpc_min_freq) {
-   pr_err("Actual freq did not rise above min\n");
+   /* If not, this is an error */
+   if (!(perf_limit_reasons && GT0_PERF_LIMIT_REASONS_MASK)) {

Still wrong, should be & not &&

yup, third time's the charm.



+   pr_err("Pcode did not grant max freq\n");
err = -EINVAL;
-   }
+   } else {
+   pr_info("Pcode throttled frequency 0x%x\n", 
perf_limit_reasons);

Another question, why are we using pr_err/info here rather than
drm_err/info? pr_err/info is ok for mock selftests since there is no drm
device but that is not the case here, I think this is done in other
selftests too but maybe fix this as well if we are making so many changes
here? Anyway can do later too.

So let's settle issues in v2 thread first.


Thanks,

Vinay.



Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Add a new SLPC selftest

2022-06-23 Thread Belgaumkar, Vinay



On 6/22/2022 1:32 PM, Dixit, Ashutosh wrote:

On Fri, 10 Jun 2022 16:47:12 -0700, Vinay Belgaumkar wrote:

This test will validate we can achieve actual frequency of RP0. Pcode
grants frequencies based on what GuC is requesting. However, thermal
throttling can limit what is being granted. Add a test to request for
max, but don't fail the test if RP0 is not granted due to throttle
reasons.

Also optimize the selftest by using a common run_test function to avoid
code duplication.

The refactoring does change the order of operations (changing the freq vs
spawning the spinner) but should be fine I think.
Yes, we now start the spinner outside the for loop, so that freq changes 
occur quickly. This ensures we don't mess with SLPC algorithm's history 
by frequently restarting the WL in the for loop.



Rename the "clamp" tests to vary_max_freq and vary_min_freq.

Either is ok, but maybe "clamp" names were ok I think since they verify req
freq is clamped at min/max.
True, though clamp usually is associated with limiting, whereas we 
actually increase the min.



v2: Fix compile warning

Fixes 8ee2c227822e ("drm/i915/guc/slpc: Add SLPC selftest")
Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/selftest_slpc.c | 323 
  1 file changed, 158 insertions(+), 165 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c 
b/drivers/gpu/drm/i915/gt/selftest_slpc.c
index b768cea5943d..099129aae9a5 100644
--- a/drivers/gpu/drm/i915/gt/selftest_slpc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c
@@ -8,6 +8,11 @@
  #define delay_for_h2g() usleep_range(H2G_DELAY, H2G_DELAY + 1)
  #define FREQUENCY_REQ_UNITDIV_ROUND_CLOSEST(GT_FREQUENCY_MULTIPLIER, \
  GEN9_FREQ_SCALER)
+enum test_type {
+   VARY_MIN,
+   VARY_MAX,
+   MAX_GRANTED
+};

  static int slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 freq)
  {
@@ -36,147 +41,120 @@ static int slpc_set_max_freq(struct intel_guc_slpc *slpc, 
u32 freq)
return ret;
  }

-static int live_slpc_clamp_min(void *arg)
+static int vary_max_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps,
+ u32 *max_act_freq)

Please run checkpatch, indentation seems off.

I had run it. Not sure why this wasn't caught.



  {
-   struct drm_i915_private *i915 = arg;
-   struct intel_gt *gt = to_gt(i915);
-   struct intel_guc_slpc *slpc = >uc.guc.slpc;
-   struct intel_rps *rps = >rps;
-   struct intel_engine_cs *engine;
-   enum intel_engine_id id;
-   struct igt_spinner spin;
+   u32 step, max_freq, req_freq;
+   u32 act_freq;
u32 slpc_min_freq, slpc_max_freq;
int err = 0;

-   if (!intel_uc_uses_guc_slpc(>uc))
-   return 0;
-
-   if (igt_spinner_init(, gt))
-   return -ENOMEM;
+   slpc_min_freq = slpc->min_freq;
+   slpc_max_freq = slpc->rp0_freq;

nit but we don't really need such variables since we don't change their
values, we should just use slpc->min_freq, slpc->rp0_freq directly. I'd
change this in all places in this patch.


I will remove it from the sub-functions, but will need to keep the one 
in the main run_test(). We should query SLPC's min and max and then 
restore that at the end of the test. It is possible that SLPC's min is 
different from platform min for certain skus.





-   if (intel_guc_slpc_get_max_freq(slpc, _max_freq)) {
-   pr_err("Could not get SLPC max freq\n");
-   return -EIO;
-   }
-
-   if (intel_guc_slpc_get_min_freq(slpc, _min_freq)) {
-   pr_err("Could not get SLPC min freq\n");
-   return -EIO;

Why do we need these two function calls? Can't we just use slpc->rp0_freq
and slpc->min_freq as we are doing in the vary_min/max_freq() functions
above?

Same as above.


Also, as mentioned below I think here we should just do:

 slpc_set_max_freq(slpc, slpc->rp0_freq);
 slpc_set_min_freq(slpc, slpc->min_freq);

to restore freq to a known state before starting the test (just in case a
previous test changed the values).

Any test that changes the frequencies should restore them as well.



-   }
-
-   if (slpc_min_freq == slpc_max_freq) {
-   pr_err("Min/Max are fused to the same value\n");
-   return -EINVAL;

What if they are actually equal? I think basically the max/min freq test
loops will just not be entered (so effectively the tests will just
skip). The granted freq test will be fine. So I think we can just delete
this if statement?

(It is showing deleted above in the patch but is in the new code somewhere
too).
Actually, we should set it to min/rp0 if this is the case. That change 
will be in a separate patch. This is needed for certain cases.



-   }
-
-   intel_gt_pm_wait_for_idle(gt);
-   intel_gt_pm_get(gt);
-   for_each_engine(engine, gt, id) {
-   struct i915_request *rq;
-   u32 

Re: [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost

2022-06-22 Thread Belgaumkar, Vinay



On 6/21/2022 5:26 PM, Dixit, Ashutosh wrote:

On Sat, 14 May 2022 23:05:06 -0700, Vinay Belgaumkar wrote:

SLPC min/max frequency updates require H2G calls. We are seeing
timeouts when GuC channel is backed up and it is unable to respond
in a timely fashion causing warnings and affecting CI.

This is seen when waitboosting happens during a stress test.
this patch updates the waitboost path to use a non-blocking
H2G call instead, which returns as soon as the message is
successfully transmitted.

Overall I am ok moving waitboost to use the non-blocking H2G. We can
consider increasing the timeout in wait_for_ct_request_update() to be a
separate issue for blocking cases and we can handle that separately.

Still there a couple of issues with this patch mentioned below.


v2: Use drm_notice to report any errors that might occur while
sending the waitboost H2G request (Tvrtko)

Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 44 +
  1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 1db833da42df..e5e869c96262 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc)
return data->header.global_state;
  }

+static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 
value)
+{
+   u32 request[] = {
+   GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+   SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
+   id,
+   value,
+   };
+   int ret;
+
+   ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0);
+
+   return ret > 0 ? -EPROTO : ret;
+}
+
+static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value)
+{
+   struct intel_guc *guc = slpc_to_guc(slpc);
+
+   GEM_BUG_ON(id >= SLPC_MAX_PARAM);
+
+   return guc_action_slpc_set_param_nb(guc, id, value);
+}
+
  static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)
  {
u32 request[] = {
@@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct intel_guc_slpc 
*slpc, u32 freq)
 */

with_intel_runtime_pm(>runtime_pm, wakeref) {
-   ret = slpc_set_param(slpc,
-SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
-freq);
-   if (ret)
-   i915_probe_error(i915, "Unable to force min freq to %u: 
%d",
-freq, ret);
+   /* Non-blocking request will avoid stalls */
+   ret = slpc_set_param_nb(slpc,
+   
SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+   freq);
}

return ret;
@@ -222,6 +244,8 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, 
u32 freq)
  static void slpc_boost_work(struct work_struct *work)
  {
struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), 
boost_work);
+   struct drm_i915_private *i915 = slpc_to_i915(slpc);
+   int err;

/*
 * Raise min freq to boost. It's possible that
@@ -231,8 +255,12 @@ static void slpc_boost_work(struct work_struct *work)
 */
mutex_lock(>lock);
if (atomic_read(>num_waiters)) {
-   slpc_force_min_freq(slpc, slpc->boost_freq);
-   slpc->num_boosts++;
+   err = slpc_force_min_freq(slpc, slpc->boost_freq);
+   if (!err)
+   slpc->num_boosts++;
+   else
+   drm_notice(>drm, "Failed to send waitboost request 
(%d)\n",
+  err);

The issue I have is what happens when we de-boost (restore min freq to its
previous value in intel_guc_slpc_dec_waiters()). It would seem that that
call is fairly important to get the min freq down when there are no pending
requests. Therefore what do we do in that case?

This is the function:

void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc)
{
 mutex_lock(>lock);
 if (atomic_dec_and_test(>num_waiters))
 slpc_force_min_freq(slpc, slpc->min_freq_softlimit);
 mutex_unlock(>lock);
}


1. First it would seem that at the minimum we need a similar drm_notice()
in intel_guc_slpc_dec_waiters(). That would mean we need to put the
drm_notice() back in slpc_force_min_freq() (replacing
i915_probe_error()) rather than in slpc_boost_work() above?

Sure.


2. Further, if de-boosting is important then maybe as was being discussed
in v1 of this patch (see the bottom of
https://patchwork.freedesktop.org/patch/485004/?series=103598=1) do
we need to use intel_guc_send_busy_loop() in the
intel_guc_slpc_dec_waiters() code path?


Using a busy_loop here would 

Re: [PATCH] drm/i915: Add global forcewake status to drpc

2022-06-17 Thread Belgaumkar, Vinay



On 6/17/2022 1:53 PM, Dixit, Ashutosh wrote:

On Fri, 17 Jun 2022 13:25:34 -0700, Vinay Belgaumkar wrote:

We have seen multiple RC6 issues where it is useful to know
which global forcewake bits are set. Add this to the 'drpc'
debugfs output.

A couple of optional nits below to look at but otherwise this is:

Reviewed-by: Ashutosh Dixit 


+static u32 mt_fwake_status(struct intel_uncore *uncore)
+{
+   return intel_uncore_read_fw(uncore, FORCEWAKE_MT);
+}
+
  static int vlv_drpc(struct seq_file *m)
  {
struct intel_gt *gt = m->private;
struct intel_uncore *uncore = gt->uncore;
-   u32 rcctl1, pw_status;
+   u32 rcctl1, pw_status, mt_fwake;

+   mt_fwake = mt_fwake_status(uncore);

I would get rid of the function and just duplicate the intel_uncore_read_fw().
Made it a function in case we can find the equivalent register for ILK. 
Though, I am not sure if ILK even had the concept of MT fwake.



pw_status = intel_uncore_read(uncore, VLV_GTLC_PW_STATUS);
rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL);

seq_printf(m, "RC6 Enabled: %s\n",
   str_yes_no(rcctl1 & (GEN7_RC_CTL_TO_MODE |
GEN6_RC_CTL_EI_MODE(1;
+   seq_printf(m, "Multi-threaded Forcewake: 0x%x\n", mt_fwake);

Is "Multi-threaded Forcewake Request" (the Bspec register name) a more
descriptive print?

Same for gen6_drpc() below. Thanks!


Sure.

Thanks,

Vinay.



Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost

2022-05-14 Thread Belgaumkar, Vinay



On 5/6/2022 9:43 AM, John Harrison wrote:

On 5/6/2022 00:18, Tvrtko Ursulin wrote:

On 05/05/2022 19:36, John Harrison wrote:

On 5/5/2022 10:21, Belgaumkar, Vinay wrote:

On 5/5/2022 5:13 AM, Tvrtko Ursulin wrote:

On 05/05/2022 06:40, Vinay Belgaumkar wrote:

SLPC min/max frequency updates require H2G calls. We are seeing
timeouts when GuC channel is backed up and it is unable to respond
in a timely fashion causing warnings and affecting CI.


Is it the "Unable to force min freq" error? Do you have a link to 
the GitLab issue to add to commit message?
We don't have a specific error for this one, but have seen similar 
issues with other H2G which are blocking.



This is seen when waitboosting happens during a stress test.
this patch updates the waitboost path to use a non-blocking
H2G call instead, which returns as soon as the message is
successfully transmitted.


AFAIU with this approach, when CT channel is congested, you 
instead achieve silent dropping of the waitboost request, right?

We are hoping it makes it, but just not waiting for it to complete.
We are not 'hoping it makes it'. We know for a fact that it will 
make it. We just don't know when. The issue is not about whether the 
waitboost request itself gets dropped/lost it is about the ack that 
comes back. The GuC will process the message and it will send an 
ack. It's just a question of whether the i915 driver has given up 
waiting for it yet. And if it has, then you get the initial 'timed 
out waiting for ack' followed by a later 'got unexpected ack' message.


Whereas, if we make the call asynchronous, there is no ack. i915 
doesn't bother waiting and it won't get surprised later.


Also, note that this is only an issue when GuC itself is backed up. 
Normally that requires the creation/destruction of large numbers of 
contexts in rapid succession (context management is about the 
slowest thing we do with GuC). Some of the IGTs and selftests do 
that with thousands of contexts all at once. Those are generally 
where we see this kind of problem. It would be highly unlikely (but 
not impossible) to hit it in real world usage.


Goto ->

The general design philosophy of H2G messages is that asynchronous 
mode should be used for everything if at all possible. It is fire 
and forget and will all get processed in the order sent (same as 
batch buffer execution, really). Synchronous messages should only be 
used when an ack/status is absolutely required. E.g. start of day 
initialisation or things like TLB invalidation where we need to know 
that a cache has been cleared/flushed before updating memory from 
the CPU.


John.




It sounds like a potentially important feedback from the field to 
lose so easily. How about you added drm_notice to the worker when 
it fails?


Or simply a "one line patch" to replace i915_probe_error (!?) with 
drm_notice and keep the blocking behavior. (I have no idea what is 
the typical time to drain the CT buffer, and so to decide whether 
waiting or dropping makes more sense for effectiveness of 
waitboosting.)


Or since the congestion /should not/ happen in production, then 
the argument is why complicate with more code, in which case going 
with one line patch is an easy way forward?


Here. Where I did hint I understood the "should not happen in 
production angle".


So statement is GuC is congested in processing requests, but the h2g 
buffer is not congested so no chance intel_guc_send_nb() will fail 
with no space in that buffer? Sounds a bit un-intuitive.
That's two different things. The problem of no space in the H2G buffer 
is the same whether the call is sent blocking or non-blocking. The 
wait-for-space version is intel_guc_send_busy_loop() rather than 
intel_guc_send_nb(). NB: _busy_loop is a wrapper around _nb, so the 
wait-for-space version is also non-blocking ;). If a non-looping 
version is used (blocking or otherwise) it will return -EBUSY if there 
is no space. So both the original SLPC call and this non-blocking 
version will still get an immediate EBUSY return code if the H2G 
channel is backed up completely.


Whether the code should be handling EBUSY or not is another matter. 
Vinay, does anything higher up do a loop on EBUSY? If not, maybe it 
should be using the _busy_loop() call instead?


The blocking vs non-blocking is about waiting for a response if the 
command is successfully sent. The blocking case will sit and spin for 
a reply, the non-blocking assumes success and expects an asynchronous 
error report on failure. The assumption being that the call can't fail 
unless something is already broken - i915 sending invalid data to GuC 
for example. And thus any failure is in the BUG_ON category rather 
than the try again with a different approach and/or try again later 
category.


This is the point of the change. We are currently getting timeout 
errors when the H2G channel has space so the command can be sent, but 
the channel already contains a lo

Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost

2022-05-06 Thread Belgaumkar, Vinay



On 5/6/2022 12:18 AM, Tvrtko Ursulin wrote:


On 05/05/2022 19:36, John Harrison wrote:

On 5/5/2022 10:21, Belgaumkar, Vinay wrote:

On 5/5/2022 5:13 AM, Tvrtko Ursulin wrote:

On 05/05/2022 06:40, Vinay Belgaumkar wrote:

SLPC min/max frequency updates require H2G calls. We are seeing
timeouts when GuC channel is backed up and it is unable to respond
in a timely fashion causing warnings and affecting CI.


Is it the "Unable to force min freq" error? Do you have a link to 
the GitLab issue to add to commit message?
We don't have a specific error for this one, but have seen similar 
issues with other H2G which are blocking.



This is seen when waitboosting happens during a stress test.
this patch updates the waitboost path to use a non-blocking
H2G call instead, which returns as soon as the message is
successfully transmitted.


AFAIU with this approach, when CT channel is congested, you instead 
achieve silent dropping of the waitboost request, right?

We are hoping it makes it, but just not waiting for it to complete.
We are not 'hoping it makes it'. We know for a fact that it will make 
it. We just don't know when. The issue is not about whether the 
waitboost request itself gets dropped/lost it is about the ack that 
comes back. The GuC will process the message and it will send an ack. 
It's just a question of whether the i915 driver has given up waiting 
for it yet. And if it has, then you get the initial 'timed out 
waiting for ack' followed by a later 'got unexpected ack' message.


Whereas, if we make the call asynchronous, there is no ack. i915 
doesn't bother waiting and it won't get surprised later.


Also, note that this is only an issue when GuC itself is backed up. 
Normally that requires the creation/destruction of large numbers of 
contexts in rapid succession (context management is about the slowest 
thing we do with GuC). Some of the IGTs and selftests do that with 
thousands of contexts all at once. Those are generally where we see 
this kind of problem. It would be highly unlikely (but not 
impossible) to hit it in real world usage.


Goto ->

The general design philosophy of H2G messages is that asynchronous 
mode should be used for everything if at all possible. It is fire and 
forget and will all get processed in the order sent (same as batch 
buffer execution, really). Synchronous messages should only be used 
when an ack/status is absolutely required. E.g. start of day 
initialisation or things like TLB invalidation where we need to know 
that a cache has been cleared/flushed before updating memory from the 
CPU.


John.




It sounds like a potentially important feedback from the field to 
lose so easily. How about you added drm_notice to the worker when 
it fails?


Or simply a "one line patch" to replace i915_probe_error (!?) with 
drm_notice and keep the blocking behavior. (I have no idea what is 
the typical time to drain the CT buffer, and so to decide whether 
waiting or dropping makes more sense for effectiveness of 
waitboosting.)


Or since the congestion /should not/ happen in production, then the 
argument is why complicate with more code, in which case going with 
one line patch is an easy way forward?


Here. Where I did hint I understood the "should not happen in 
production angle".


So statement is GuC is congested in processing requests, but the h2g 
buffer is not congested so no chance intel_guc_send_nb() will fail 
with no space in that buffer? Sounds a bit un-intuitive.


Anyway, it sounds okay to me to use the non-blocking, but I would like 
to see some logging if the unexpected does happen. Hence I was 
suggesting the option of adding drm_notice logging if the send fails 
from the worker. (Because I think other callers would already 
propagate the error, like sysfs.)


  err = slpc_force_min_freq(slpc, slpc->boost_freq);
  if (!err)
   slpc->num_boosts++;
  else
   drm_notice(... "Failed to send waitboost request (%d)", err);


Ok, makes sense. Will send out another rev with this change.

Thanks,

Vinay.




Something like that.

Regards,

Tvrtko


Even if we soften the blow here, the actual timeout error occurs in 
the intel_guc_ct.c code, so we cannot hide that error anyways. 
Making this call non-blocking will achieve both things.


Thanks,

Vinay.



Regards,

Tvrtko


Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 38 
-

  1 file changed, 30 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c

index 1db833da42df..c852f73cf521 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc 
*slpc)

  return data->header.global_state;
  }
  +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, 
u8 id, u32 value)

+{
+    u32 reques

Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use non-blocking H2G for waitboost

2022-05-05 Thread Belgaumkar, Vinay



On 5/5/2022 5:13 AM, Tvrtko Ursulin wrote:


On 05/05/2022 06:40, Vinay Belgaumkar wrote:

SLPC min/max frequency updates require H2G calls. We are seeing
timeouts when GuC channel is backed up and it is unable to respond
in a timely fashion causing warnings and affecting CI.


Is it the "Unable to force min freq" error? Do you have a link to the 
GitLab issue to add to commit message?
We don't have a specific error for this one, but have seen similar 
issues with other H2G which are blocking.



This is seen when waitboosting happens during a stress test.
this patch updates the waitboost path to use a non-blocking
H2G call instead, which returns as soon as the message is
successfully transmitted.


AFAIU with this approach, when CT channel is congested, you instead 
achieve silent dropping of the waitboost request, right?

We are hoping it makes it, but just not waiting for it to complete.


It sounds like a potentially important feedback from the field to lose 
so easily. How about you added drm_notice to the worker when it fails?


Or simply a "one line patch" to replace i915_probe_error (!?) with 
drm_notice and keep the blocking behavior. (I have no idea what is the 
typical time to drain the CT buffer, and so to decide whether waiting 
or dropping makes more sense for effectiveness of waitboosting.)


Or since the congestion /should not/ happen in production, then the 
argument is why complicate with more code, in which case going with 
one line patch is an easy way forward?


Even if we soften the blow here, the actual timeout error occurs in the 
intel_guc_ct.c code, so we cannot hide that error anyways. Making this 
call non-blocking will achieve both things.


Thanks,

Vinay.



Regards,

Tvrtko


Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 38 -
  1 file changed, 30 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c

index 1db833da42df..c852f73cf521 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc 
*slpc)

  return data->header.global_state;
  }
  +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 
id, u32 value)

+{
+    u32 request[] = {
+    GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+    SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
+    id,
+    value,
+    };
+    int ret;
+
+    ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0);
+
+    return ret > 0 ? -EPROTO : ret;
+}
+
+static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 
value)

+{
+    struct intel_guc *guc = slpc_to_guc(slpc);
+
+    GEM_BUG_ON(id >= SLPC_MAX_PARAM);
+
+    return guc_action_slpc_set_param_nb(guc, id, value);
+}
+
  static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, 
u32 value)

  {
  u32 request[] = {
@@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct 
intel_guc_slpc *slpc, u32 freq)

   */
    with_intel_runtime_pm(>runtime_pm, wakeref) {
-    ret = slpc_set_param(slpc,
- SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
- freq);
-    if (ret)
-    i915_probe_error(i915, "Unable to force min freq to %u: 
%d",

- freq, ret);
+    /* Non-blocking request will avoid stalls */
+    ret = slpc_set_param_nb(slpc,
+    SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+    freq);
  }
    return ret;
@@ -231,8 +253,8 @@ static void slpc_boost_work(struct work_struct 
*work)

   */
  mutex_lock(>lock);
  if (atomic_read(>num_waiters)) {
-    slpc_force_min_freq(slpc, slpc->boost_freq);
-    slpc->num_boosts++;
+    if (!slpc_force_min_freq(slpc, slpc->boost_freq))
+    slpc->num_boosts++;
  }
  mutex_unlock(>lock);
  }


Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Use i915_probe_error instead of drm_err

2022-04-14 Thread Belgaumkar, Vinay



On 4/13/2022 11:41 PM, Anshuman Gupta wrote:

On 2022-04-13 at 04:18:52 +0530, Vinay Belgaumkar wrote:

This will ensure we don't have false positives when we run
error injection tests.

Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 42 ++---
  1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index b170238aa15c..639de3c10545 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -152,8 +152,8 @@ static int slpc_query_task_state(struct intel_guc_slpc 
*slpc)
  
  	ret = guc_action_slpc_query(guc, offset);

if (unlikely(ret))

As commit logs describe, this code patch can hit, when we run error injection 
test.
Do we need unlikely() here?
Br,
Anshuman Gupta.


I think we still need the unlikely(). Majority of the time, we still 
need the compiler optimization.


Only in the rare case of running the error injection test will it not be 
needed.


Thanks,

Vinay.


-   drm_err(>drm, "Failed to query task state (%pe)\n",
-   ERR_PTR(ret));
+   i915_probe_error(i915, "Failed to query task state (%pe)\n",
+ERR_PTR(ret));
  
  	drm_clflush_virt_range(slpc->vaddr, SLPC_PAGE_SIZE_BYTES);
  
@@ -170,8 +170,8 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value)
  
  	ret = guc_action_slpc_set_param(guc, id, value);

if (ret)
-   drm_err(>drm, "Failed to set param %d to %u (%pe)\n",
-   id, value, ERR_PTR(ret));
+   i915_probe_error(i915, "Failed to set param %d to %u (%pe)\n",
+id, value, ERR_PTR(ret));
  
  	return ret;

  }
@@ -211,8 +211,8 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, 
u32 freq)
 SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
 freq);
if (ret)
-   drm_err(>drm, "Unable to force min freq to %u: 
%d",
-   freq, ret);
+   i915_probe_error(i915, "Unable to force min freq to %u: 
%d",
+freq, ret);
}
  
  	return ret;

@@ -247,9 +247,9 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
  
  	err = intel_guc_allocate_and_map_vma(guc, size, >vma, (void **)>vaddr);

if (unlikely(err)) {
-   drm_err(>drm,
-   "Failed to allocate SLPC struct (err=%pe)\n",
-   ERR_PTR(err));
+   i915_probe_error(i915,
+"Failed to allocate SLPC struct (err=%pe)\n",
+ERR_PTR(err));
return err;
}
  
@@ -316,15 +316,15 @@ static int slpc_reset(struct intel_guc_slpc *slpc)

ret = guc_action_slpc_reset(guc, offset);
  
  	if (unlikely(ret < 0)) {

-   drm_err(>drm, "SLPC reset action failed (%pe)\n",
-   ERR_PTR(ret));
+   i915_probe_error(i915, "SLPC reset action failed (%pe)\n",
+ERR_PTR(ret));
return ret;
}
  
  	if (!ret) {

if (wait_for(slpc_is_running(slpc), SLPC_RESET_TIMEOUT_MS)) {
-   drm_err(>drm, "SLPC not enabled! State = %s\n",
-   slpc_get_state_string(slpc));
+   i915_probe_error(i915, "SLPC not enabled! State = %s\n",
+slpc_get_state_string(slpc));
return -EIO;
}
}
@@ -616,8 +616,8 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
  
  	ret = slpc_reset(slpc);

if (unlikely(ret < 0)) {
-   drm_err(>drm, "SLPC Reset event returned (%pe)\n",
-   ERR_PTR(ret));
+   i915_probe_error(i915, "SLPC Reset event returned (%pe)\n",
+ERR_PTR(ret));
return ret;
}
  
@@ -632,24 +632,24 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)

/* Ignore efficient freq and set min to platform min */
ret = slpc_ignore_eff_freq(slpc, true);
if (unlikely(ret)) {
-   drm_err(>drm, "Failed to set SLPC min to RPn (%pe)\n",
-   ERR_PTR(ret));
+   i915_probe_error(i915, "Failed to set SLPC min to RPn (%pe)\n",
+ERR_PTR(ret));
return ret;
}
  
  	/* Set SLPC max limit to RP0 */

ret = slpc_use_fused_rp0(slpc);
if (unlikely(ret)) {
-   drm_err(>drm, "Failed to set SLPC max to RP0 (%pe)\n",
-   ERR_PTR(ret));
+   i915_probe_error(i915, "Failed to set SLPC max to RP0 (%pe)\n",
+

Re: [Intel-gfx] [PATCH] drm/i915/guc/slpc: Correct the param count for unset param

2022-02-17 Thread Belgaumkar, Vinay



On 2/17/2022 1:41 AM, Tvrtko Ursulin wrote:


On 16/02/2022 18:15, Vinay Belgaumkar wrote:

SLPC unset param H2G only needs one parameter - the id of the
param.

Fixes: 025cb07bebfa ("drm/i915/guc/slpc: Cache platform frequency 
limits")


How serious is this? Does it need backporting? If so:

Cc:  # v5.15+

?


This path (unset_param) is not being exercised currently, so not very 
serious.


Thanks,

Vinay.



Regards,

Tvrtko


Suggested-by: Umesh Nerlige Ramappa 
Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c

index 13b27b8ff74e..ba21ace973da 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -110,7 +110,7 @@ static int guc_action_slpc_unset_param(struct 
intel_guc *guc, u8 id)

  {
  u32 request[] = {
  GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
-    SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 2),
+    SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1),
  id,
  };


Re: [PATCH] drm/i915/guc: Update guc shim control programming on newer platforms

2022-01-25 Thread Belgaumkar, Vinay



On 1/20/2022 2:24 PM, Daniele Ceraolo Spurio wrote:

Starting from xehpsdv, bit 0 of of the GuC shim control register has
been repurposed, while bit 2 is now reserved, so we need to avoid
setting those for their old meaning on newer platforms.

Cc: Vinay Belgaumkar 
Cc: Stuart Summers 
Signed-off-by: Daniele Ceraolo Spurio 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 8 +---
  1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index f773e7f35bc1a..40f7d4779c9ec 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -15,13 +15,15 @@
  
  static void guc_prepare_xfer(struct intel_uncore *uncore)

  {
-   u32 shim_flags = GUC_DISABLE_SRAM_INIT_TO_ZEROES |
-GUC_ENABLE_READ_CACHE_LOGIC |
-GUC_ENABLE_MIA_CACHING |
+   u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC |
 GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA |
 GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA |
 GUC_ENABLE_MIA_CLOCK_GATING;
  
+	if (GRAPHICS_VER_FULL(uncore->i915) < IP_VER(12, 50))

+   shim_flags |= GUC_DISABLE_SRAM_INIT_TO_ZEROES |
+ GUC_ENABLE_MIA_CACHING;
+


LGTM.

Reviewed-by: Vinay Belgaumkar 


/* Must program this register before loading the ucode with DMA */
intel_uncore_write(uncore, GUC_SHIM_CONTROL, shim_flags);
  


Re: [PATCH 3/3] drm/i915/gt: Improve "race-to-idle" at low frequencies

2021-11-23 Thread Belgaumkar, Vinay




On 11/17/2021 2:49 PM, Vinay Belgaumkar wrote:

From: Chris Wilson 

While the power consumption is proportional to the frequency, there is
also a static draw for active gates. The longer we are able to powergate
(rc6), the lower the static draw. Thus there is a sweetspot in the
frequency/power curve where we run at higher frequency in order to sleep
longer, aka race-to-idle. This is more evident at lower frequencies, so
let's look to bump the frequency if we think we will benefit by sleeping
longer at the higher frequency and so conserving power.

Signed-off-by: Chris Wilson 
Cc: Vinay Belgaumkar 
Cc: Tvrtko Ursulin 


Data collected does show some power savings.

Reviewed-by: Vinay Belgaumkar 

---
  drivers/gpu/drm/i915/gt/intel_rps.c | 31 -
  1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 3675ac93ded0..6af3231982af 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -63,6 +63,22 @@ static void set(struct intel_uncore *uncore, i915_reg_t reg, 
u32 val)
intel_uncore_write_fw(uncore, reg, val);
  }
  
+static bool race_to_idle(struct intel_rps *rps, u64 busy, u64 dt)

+{
+   unsigned int this = rps->cur_freq;
+   unsigned int next = rps->cur_freq + 1;
+   u64 next_dt = next * max(busy, dt);
+
+   /*
+* Compare estimated time spent in rc6 at the next power bin. If
+* we expect to sleep longer than the estimated increased power
+* cost of running at a higher frequency, it will be reduced power
+* consumption overall.
+*/
+   return (((next_dt - this * busy) >> 10) * this * this >
+   ((next_dt - next * busy) >> 10) * next * next);
+}
+
  static void rps_timer(struct timer_list *t)
  {
struct intel_rps *rps = from_timer(rps, t, timer);
@@ -133,7 +149,7 @@ static void rps_timer(struct timer_list *t)
if (!max_busy[i])
break;
  
-			busy += div_u64(max_busy[i], 1 << i);

+   busy += max_busy[i] >> i;
}
GT_TRACE(rps_to_gt(rps),
 "busy:%lld [%d%%], max:[%lld, %lld, %lld], 
interval:%d\n",
@@ -141,13 +157,18 @@ static void rps_timer(struct timer_list *t)
 max_busy[0], max_busy[1], max_busy[2],
 rps->pm_interval);
  
-		if (100 * busy > rps->power.up_threshold * dt &&

-   rps->cur_freq < rps->max_freq_softlimit) {
+   if (rps->cur_freq < rps->max_freq_softlimit &&
+   race_to_idle(rps, max_busy[0], dt)) {
+   rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD;
+   rps->pm_interval = 1;
+   schedule_work(>work);
+   } else if (rps->cur_freq < rps->max_freq_softlimit &&
+  100 * busy > rps->power.up_threshold * dt) {
rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD;
rps->pm_interval = 1;
schedule_work(>work);
-   } else if (100 * busy < rps->power.down_threshold * dt &&
-  rps->cur_freq > rps->min_freq_softlimit) {
+   } else if (rps->cur_freq > rps->min_freq_softlimit &&
+  100 * busy < rps->power.down_threshold * dt) {
rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD;
rps->pm_interval = 1;
schedule_work(>work);



Re: [PATCH 2/3] drm/i915/gt: Compare average group occupancy for RPS evaluation

2021-11-23 Thread Belgaumkar, Vinay




On 11/17/2021 2:49 PM, Vinay Belgaumkar wrote:

From: Chris Wilson 

Currently, we inspect each engine individually and measure the occupancy
of that engine over the last evaluation interval. If that exceeds our
busyness thresholds, we decide to increase the GPU frequency. However,
under a load balancer, we should consider the occupancy of entire engine
groups, as work may be spread out across the group. In doing so, we
prefer wide over fast, power consumption is approximately proportional to
the square of the frequency. However, since the load balancer is greedy,
the first idle engine gets all the work, and preferrentially reuses the
last active engine, under light loads all work is assigned to one
engine, and so that engine appears very busy. But if the work happened
to overlap slightly, the workload would spread across multiple engines,
reducing each individual engine's runtime, and so reducing the rps
contribution, keeping the frequency low. Instead, when considering the
contribution, consider the contribution over the entire engine group
(capacity).

Signed-off-by: Chris Wilson 
Cc: Vinay Belgaumkar 
Cc: Tvrtko Ursulin 


Reviewed-by: Vinay Belgaumkar 


---
  drivers/gpu/drm/i915/gt/intel_rps.c | 48 -
  1 file changed, 34 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 07ff7ba7b2b7..3675ac93ded0 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -7,6 +7,7 @@
  
  #include "i915_drv.h"

  #include "intel_breadcrumbs.h"
+#include "intel_engine_pm.h"
  #include "intel_gt.h"
  #include "intel_gt_clock_utils.h"
  #include "intel_gt_irq.h"
@@ -65,26 +66,45 @@ static void set(struct intel_uncore *uncore, i915_reg_t 
reg, u32 val)
  static void rps_timer(struct timer_list *t)
  {
struct intel_rps *rps = from_timer(rps, t, timer);
-   struct intel_engine_cs *engine;
-   ktime_t dt, last, timestamp;
-   enum intel_engine_id id;
+   struct intel_gt *gt = rps_to_gt(rps);
+   ktime_t dt, last, timestamp = 0;
s64 max_busy[3] = {};
+   int i, j;
  
-	timestamp = 0;

-   for_each_engine(engine, rps_to_gt(rps), id) {
-   s64 busy;
-   int i;
+   /* Compare average occupancy over each engine group */
+   for (i = 0; i < ARRAY_SIZE(gt->engine_class); i++) {
+   s64 busy = 0;
+   int count = 0;
+
+   for (j = 0; j < ARRAY_SIZE(gt->engine_class[i]); j++) {
+   struct intel_engine_cs *engine;
  
-		dt = intel_engine_get_busy_time(engine, );

-   last = engine->stats.rps;
-   engine->stats.rps = dt;
+   engine = gt->engine_class[i][j];
+   if (!engine)
+   continue;
  
-		busy = ktime_to_ns(ktime_sub(dt, last));

-   for (i = 0; i < ARRAY_SIZE(max_busy); i++) {
-   if (busy > max_busy[i])
-   swap(busy, max_busy[i]);
+   dt = intel_engine_get_busy_time(engine, );
+   last = engine->stats.rps;
+   engine->stats.rps = dt;
+
+   if (!intel_engine_pm_is_awake(engine))
+   continue;
+
+   busy += ktime_to_ns(ktime_sub(dt, last));
+   count++;
+   }
+
+   if (count > 1)
+   busy = div_u64(busy, count);
+   if (busy <= max_busy[ARRAY_SIZE(max_busy) - 1])
+   continue;
+
+   for (j = 0; j < ARRAY_SIZE(max_busy); j++) {
+   if (busy > max_busy[j])
+   swap(busy, max_busy[j]);
}
}
+
last = rps->pm_timestamp;
rps->pm_timestamp = timestamp;
  



Re: [PATCH 1/3] drm/i915/guc/slpc: Define and initialize boost frequency

2021-11-01 Thread Belgaumkar, Vinay




On 11/1/2021 1:26 PM, Dixit, Ashutosh wrote:

On Sun, 31 Oct 2021 21:39:35 -0700, Belgaumkar, Vinay wrote:


Define helpers and struct members required to record boost info.
Boost frequency is initialized to RP0 at SLPC init. Also define num_waiters
which can track the pending boost requests.

Boost will be done by scheduling a worker thread. This will allow
us to make H2G calls inside an interrupt context. Initialize the


"to not make H2G calls from interrupt context" is probably better.


+static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
+{
+   struct drm_i915_private *i915 = slpc_to_i915(slpc);
+   intel_wakeref_t wakeref;
+   int ret = 0;
+
+   lockdep_assert_held(>lock);
+
+   /**


nit: this I believe should just be

/*


ok.



/** I believe shows up in kerneldoc so shouldn't be used unless we want
something in kerneldoc.


+* This function is a little different as compared to
+* intel_guc_slpc_set_min_freq(). Softlimit will not be updated
+* here since this is used to temporarily change min freq,
+* for example, during a waitboost. Caller is responsible for
+* checking bounds.
+*/
+
+   with_intel_runtime_pm(>runtime_pm, wakeref) {
+   ret = slpc_set_param(slpc,
+SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+freq);
+   if (ret)
+   drm_err(>drm, "Unable to force min freq to %u: 
%d",


Probably drm_err_ratelimited since it's called at run time not only at
init? Not sure if drm_err_once suffizes, probably not.


Keeping it drm_err as discussed offline.




+   freq, ret);
+   }
+
+   return ret;
+}
+
+static void slpc_boost_work(struct work_struct *work)
+{
+   struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), 
boost_work);
+
+   /* Raise min freq to boost. It's possible that
+* this is greater than current max. But it will
+* certainly be limited by RP0. An error setting
+* the min param is not fatal.
+*/


nit: do we follow the following format for multi-line comments,
Documentation/process/coding-style.rst mentions this:

/*
  * Line 1
  * Line 2
  */


Ok.

Thanks,
Vinay.




Re: [PATCH 2/3] drm/i915/guc/slpc: Add waitboost functionality for SLPC

2021-11-01 Thread Belgaumkar, Vinay




On 11/1/2021 1:28 PM, Dixit, Ashutosh wrote:

On Sun, 31 Oct 2021 21:39:36 -0700, Belgaumkar, Vinay wrote:


@@ -945,6 +960,17 @@ void intel_rps_boost(struct i915_request *rq)
if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, >fence.flags)) {
struct intel_rps *rps = _ONCE(rq->engine)->gt->rps;

+   if (rps_uses_slpc(rps)) {
+   slpc = rps_to_slpc(rps);
+
+   /* Return if old value is non zero */
+   if (atomic_fetch_inc(>num_waiters))
+   return;
+
+   if (intel_rps_get_requested_frequency(rps) < 
slpc->boost_freq)


I think this check is not needed because:

a. The waitboost code only changes min_freq. i915 code should not depend on
how GuC changes requested_freq in response to change in min_freq.

b. What is more worrisome is that when we "de-boost" we set min_freq to
min_freq_softlimit. If GuC e.g. has a delay in bringing requested_freq
down and intel_rps_boost() gets called meanwhile we will miss the one
opportunity we have to boost the freq (when num_waiters goes from 0 to
1. Asking GuC to boost when actual_freq is already boost_freq is
harmless in comparison). So to avoid this risk of missing the chance to
boost I think we should delete this check and replace the code above
with something like:

 if (rps_uses_slpc(rps)) {
 struct intel_guc_slpc *slpc = rps_to_slpc(rps);

 if (slpc->boost_freq <= slpc->min_freq_softlimit)
 return;

 if (!atomic_fetch_inc(>num_waiters))
 schedule_work(>boost_work);

 return;
 }

Note that this check:

 if (slpc->boost_freq <= slpc->min_freq_softlimit)
 return;

(which is basically a degenerate case in which we don't have to do
anything), can be probably be implemented when boost_freq is set in sysfs,
or may already be encompassed in "val < slpc->min_freq" in
intel_guc_slpc_set_boost_freq() in which case this check can also be
skipped from this function.


We already have that check in set_boost_freq function. So, just adding 
the atomic_fetch_inc check.





+void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc)
+{
+   /* Return min back to the softlimit.
+* This is called during request retire,
+* so we don't need to fail that if the
+* set_param fails.
+*/


nit: maybe follow kernel multi-line comment format.


Ok.

Thanks,
Vinay.


Re: [PATCH v2 0/3] drm/i915/guc/slpc: Implement waitboost for SLPC

2021-11-01 Thread Belgaumkar, Vinay




On 11/1/2021 1:24 PM, Dixit, Ashutosh wrote:

On Sun, 31 Oct 2021 21:39:34 -0700, Belgaumkar, Vinay wrote:


Waitboost is a legacy feature implemented in the Host Turbo algorithm. This
patch set implements it for the SLPC path. A "boost" happens when user
calls gem_wait ioctl on a submission that has not landed on HW yet.


Afaiu user doesn't have to call gem_wait, the boost will happen whenever a
request waits to be submitted to GuC because of an unmet depedency. This
has to be done from i915 because GuC has not yet seen the request.

Rest of the cover letter is fine.


Ok, thanks,
Vinay.




Re: [PATCH] drm/i915/guc/slpc: remove unneeded clflush calls

2021-09-15 Thread Belgaumkar, Vinay




On 9/14/2021 12:51 PM, Lucas De Marchi wrote:

The clflush calls here aren't doing anything since we are not writting
something and flushing the cache lines to be visible to GuC. Here the
intention seems to be to make sure whatever GuC has written is visible
to the CPU before we read them. However a clflush from the CPU side is
the wrong instruction to use.

 From code inspection on the other clflush() calls in i915/gt/uc/ these
are the only ones with this behavrior. The others are apparently making
sure what we write is visible to GuC.

Signed-off-by: Lucas De Marchi 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 3 ---
  1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 65a3e7fdb2b2..2e996b77df80 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -108,7 +108,6 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc)
  
  	GEM_BUG_ON(!slpc->vma);
  
-	drm_clflush_virt_range(slpc->vaddr, sizeof(u32));

data = slpc->vaddr;
  
  	return data->header.global_state;

@@ -172,8 +171,6 @@ static int slpc_query_task_state(struct intel_guc_slpc 
*slpc)
drm_err(>drm, "Failed to query task state (%pe)\n",
ERR_PTR(ret));
  
-	drm_clflush_virt_range(slpc->vaddr, SLPC_PAGE_SIZE_BYTES);

-


LGTM.
Reviewed-by: Vinay Belgaumkar 


return ret;
  }
  



Re: [Intel-gfx] [PATCH 13/14] drm/i915/guc/slpc: Add SLPC selftest

2021-07-29 Thread Belgaumkar, Vinay




On 7/29/2021 4:40 PM, Matthew Brost wrote:

On Wed, Jul 28, 2021 at 02:11:43PM -0700, Vinay Belgaumkar wrote:

Tests that exercise the SLPC get/set frequency interfaces.

Clamp_max will set max frequency to multiple levels and check
that SLPC requests frequency lower than or equal to it.

Clamp_min will set min frequency to different levels and check
if SLPC requests are higher or equal to those levels.

v2: Address review comments (Michal W)
v3: Checkpatch() corrections
v4: Remove unnecessary header file (Matthew Brost)

Signed-off-by: Vinay Belgaumkar 


Overall looks but need to address the checkpatch warnings, have question
/ nit below.


---
  drivers/gpu/drm/i915/gt/intel_rps.c   |   1 +
  drivers/gpu/drm/i915/gt/selftest_slpc.c   | 309 ++
  .../drm/i915/selftests/i915_live_selftests.h  |   1 +
  3 files changed, 311 insertions(+)
  create mode 100644 drivers/gpu/drm/i915/gt/selftest_slpc.c

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 49db8ed9f80d..7a2aa0031cf6 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2330,4 +2330,5 @@ EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
  
  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)

  #include "selftest_rps.c"
+#include "selftest_slpc.c"
  #endif
diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c 
b/drivers/gpu/drm/i915/gt/selftest_slpc.c
new file mode 100644
index ..119d012d1e1e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#define NUM_STEPS 5
+#define H2G_DELAY 5
+#define delay_for_h2g() usleep_range(H2G_DELAY, H2G_DELAY + 1)
+
+static int slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 freq)
+{
+   int ret;
+
+   ret = intel_guc_slpc_set_min_freq(slpc, freq);
+   if (ret)
+   pr_err("Could not set min frequency to [%u]\n", freq);
+   else /* Delay to ensure h2g completes */
+   delay_for_h2g();
+
+   return ret;
+}
+
+static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq)
+{
+   int ret;
+
+   ret = intel_guc_slpc_set_max_freq(slpc, freq);
+   if (ret)
+   pr_err("Could not set maximum frequency [%u]\n",
+   freq);
+   else /* Delay to ensure h2g completes */
+   delay_for_h2g();
+
+   return ret;
+}
+
+int live_slpc_clamp_min(void *arg)
+{
+   struct drm_i915_private *i915 = arg;
+   struct intel_gt *gt = >gt;
+   struct intel_guc_slpc *slpc = >uc.guc.slpc;
+   struct intel_rps *rps = >rps;
+   struct intel_engine_cs *engine;
+   enum intel_engine_id id;
+   struct igt_spinner spin;
+   u32 slpc_min_freq, slpc_max_freq;
+   int err = 0;
+
+   if (!intel_uc_uses_guc_slpc(>uc))
+   return 0;
+
+   if (igt_spinner_init(, gt))
+   return -ENOMEM;
+
+   if (intel_guc_slpc_get_max_freq(slpc, _max_freq)) {
+   pr_err("Could not get SLPC max freq\n");
+   return -EIO;
+   }
+
+   if (intel_guc_slpc_get_min_freq(slpc, _min_freq)) {
+   pr_err("Could not get SLPC min freq\n");
+   return -EIO;
+   }
+
+   if (slpc_min_freq == slpc_max_freq) {
+   pr_err("Min/Max are fused to the same value\n");
+   return -EINVAL;
+   }
+
+   intel_gt_pm_wait_for_idle(gt);
+   intel_gt_pm_get(gt);
+   for_each_engine(engine, gt, id) {
+   struct i915_request *rq;
+   u32 step, min_freq, req_freq;
+   u32 act_freq, max_act_freq;
+
+   if (!intel_engine_can_store_dword(engine))
+   continue;
+
+   /* Go from min to max in 5 steps */
+   step = (slpc_max_freq - slpc_min_freq) / NUM_STEPS;
+   max_act_freq = slpc_min_freq;
+   for (min_freq = slpc_min_freq; min_freq < slpc_max_freq;
+   min_freq += step) {
+   err = slpc_set_min_freq(slpc, min_freq);
+   if (err)
+   break;
+
+   st_engine_heartbeat_disable(engine);
+
+   rq = igt_spinner_create_request(,
+   engine->kernel_context,
+   MI_NOOP);
+   if (IS_ERR(rq)) {
+   err = PTR_ERR(rq);
+   st_engine_heartbeat_enable(engine);
+   break;
+   }
+
+   i915_request_add(rq);
+
+   if (!igt_wait_for_spinner(, rq)) {
+   pr_err("%s: Spinner did not start\n",
+   engine->name);
+   igt_spinner_end();
+   

Re: [Intel-gfx] [PATCH 11/14] drm/i915/guc/slpc: Cache platform frequency limits

2021-07-29 Thread Belgaumkar, Vinay




On 7/29/2021 9:21 AM, Michal Wajdeczko wrote:



On 28.07.2021 23:11, Vinay Belgaumkar wrote:

Cache rp0, rp1 and rpn platform limits into SLPC structure
for range checking while setting min/max frequencies.

Also add "soft" limits which keep track of frequency changes
made from userland. These are initially set to platform min
and max.

v2: Address review comments (Michal W)
v3: Formatting (Michal W)
v4: Add separate function to parse rp values (Michal W)
v5: Perform range checking for set min/max (Michal W)

Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 115 ++
  .../gpu/drm/i915/gt/uc/intel_guc_slpc_types.h |   9 ++
  drivers/gpu/drm/i915/i915_reg.h   |   3 +
  3 files changed, 127 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 742918875593..bfd5fb0751fd 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -94,6 +94,9 @@ static int slpc_shared_data_init(struct intel_guc_slpc *slpc)
return err;
}
  
+	slpc->max_freq_softlimit = 0;

+   slpc->min_freq_softlimit = 0;


shouldn't this be in intel_guc_slpc_init() ?


No, we want to maintain softlimits across suspend resume.




+
return err;
  }
  
@@ -124,6 +127,18 @@ static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)

return ret > 0 ? -EPROTO : ret;
  }
  
+static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id)

+{
+   u32 request[] = {
+   GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+   SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 2),
+   id,
+   };
+
+   return intel_guc_send(guc, request, ARRAY_SIZE(request));
+}
+
+
  static bool slpc_is_running(struct intel_guc_slpc *slpc)
  {
return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING;
@@ -177,6 +192,16 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 
id, u32 value)
return ret;
  }
  
+static int slpc_unset_param(struct intel_guc_slpc *slpc,

+   u8 id)
+{
+   struct intel_guc *guc = slpc_to_guc(slpc);
+
+   GEM_BUG_ON(id >= SLPC_MAX_PARAM);
+
+   return guc_action_slpc_unset_param(guc, id);
+}
+
  static const char *slpc_global_state_to_string(enum slpc_global_state state)
  {
switch (state) {
@@ -307,6 +332,11 @@ int intel_guc_slpc_set_max_freq(struct intel_guc_slpc 
*slpc, u32 val)
intel_wakeref_t wakeref;
int ret;
  
+	if ((val < slpc->min_freq) ||

+   (val > slpc->rp0_freq) ||
+   (val < slpc->min_freq_softlimit))
+   return -EINVAL;
+
with_intel_runtime_pm(>runtime_pm, wakeref) {
ret = slpc_set_param(slpc,
   SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ,
@@ -317,6 +347,8 @@ int intel_guc_slpc_set_max_freq(struct intel_guc_slpc 
*slpc, u32 val)
ret = -EIO;
}
  
+	slpc->max_freq_softlimit = val;

+
return ret;
  }
  
@@ -363,6 +395,11 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val)

intel_wakeref_t wakeref;
int ret;
  
+	if ((val < slpc->min_freq) ||

+   (val > slpc->rp0_freq) ||
+   (val > slpc->max_freq_softlimit))
+   return -EINVAL;
+
with_intel_runtime_pm(>runtime_pm, wakeref) {
ret = slpc_set_param(slpc,
   SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
@@ -373,6 +410,8 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc 
*slpc, u32 val)
ret = -EIO;
}
  
+	slpc->min_freq_softlimit = val;

+
return ret;
  }
  
@@ -418,6 +457,71 @@ void intel_guc_pm_intrmsk_enable(struct intel_gt *gt)

   GEN6_PMINTRMSK, pm_intrmsk_mbz, 0);
  }
  
+static int slpc_set_softlimits(struct intel_guc_slpc *slpc)

+{
+   int ret = 0;
+
+   /*
+* Softlimits are initially equivalent to platform limits
+* unless they have deviated from defaults, in which case,
+* we retain the values and set min/max accordingly.
+*/
+   if (!slpc->max_freq_softlimit)
+   slpc->max_freq_softlimit = slpc->rp0_freq;
+   else if (slpc->max_freq_softlimit != slpc->rp0_freq)
+   ret = intel_guc_slpc_set_max_freq(slpc,
+   slpc->max_freq_softlimit);


if this fails, shouldn't we reset max_freq_softlimit to platform limit ?
otherwise we could be with some potentially bad value forever


Well, if this call fails, it's likely the next set_max_freq call will 
also fail, so not much point. Also, it will likely (?) just retain the 
old value, which is fine.





+
+   if (!slpc->min_freq_softlimit)
+   slpc->min_freq_softlimit = slpc->min_freq;
+   else if (slpc->min_freq_softlimit != slpc->min_freq)
+

Re: [PATCH 13/15] drm/i915/guc/slpc: Sysfs hooks for SLPC

2021-07-28 Thread Belgaumkar, Vinay




On 7/27/2021 9:59 AM, Michal Wajdeczko wrote:



On 26.07.2021 21:07, Vinay Belgaumkar wrote:

Update the get/set min/max freq hooks to work for
SLPC case as well. Consolidate helpers for requested/min/max
frequency get/set to intel_rps where the proper action can
be taken depending on whether SLPC is enabled.

v2: Add wrappers for getting rp0/1/n frequencies, update
softlimits in set min/max SLPC functions. Also check for
boundary conditions before setting them.

v3: Address review comments (Michal W)

Acked-by: Michal Wajdeczko 
Signed-off-by: Vinay Belgaumkar 
Signed-off-by: Tvrtko Ursulin 
Signed-off-by: Sujaritha Sundaresan 
---
  drivers/gpu/drm/i915/gt/intel_rps.c | 165 
  drivers/gpu/drm/i915/gt/intel_rps.h |  11 ++
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c |  14 ++
  drivers/gpu/drm/i915/i915_pmu.c |   2 +-
  drivers/gpu/drm/i915/i915_reg.h |   2 +
  drivers/gpu/drm/i915/i915_sysfs.c   |  77 ++---
  6 files changed, 207 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index e858eeb2c59d..48d4147165a9 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -37,6 +37,13 @@ static struct intel_uncore *rps_to_uncore(struct intel_rps 
*rps)
return rps_to_gt(rps)->uncore;
  }
  
+static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps)

+{
+   struct intel_gt *gt = rps_to_gt(rps);
+
+   return >uc.guc.slpc;
+}
+
  static bool rps_uses_slpc(struct intel_rps *rps)
  {
struct intel_gt *gt = rps_to_gt(rps);
@@ -1960,6 +1967,164 @@ u32 intel_rps_read_actual_frequency(struct intel_rps 
*rps)
return freq;
  }
  
+u32 intel_rps_read_punit_req(struct intel_rps *rps)

+{
+   struct intel_uncore *uncore = rps_to_uncore(rps);
+
+   return intel_uncore_read(uncore, GEN6_RPNSWREQ);
+}
+
+u32 intel_rps_get_req(struct intel_rps *rps, u32 pureq)


hmm, "rps" looks to be not needed here
btw, shouldn't this function be static ?


sure.




+{
+   u32 req = pureq >> GEN9_SW_REQ_UNSLICE_RATIO_SHIFT;
+
+   return req;
+}
+
+u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps)
+{
+   u32 freq = intel_rps_get_req(rps, intel_rps_read_punit_req(rps));
+
+   return intel_gpu_freq(rps, freq);
+}
+
+u32 intel_rps_get_requested_frequency(struct intel_rps *rps)
+{
+   if (rps_uses_slpc(rps))
+   return intel_rps_read_punit_req_frequency(rps);
+   else
+   return intel_gpu_freq(rps, rps->cur_freq);
+}
+
+u32 intel_rps_get_max_frequency(struct intel_rps *rps)
+{
+   struct intel_guc_slpc *slpc = rps_to_slpc(rps);
+
+   if (rps_uses_slpc(rps))
+   return slpc->max_freq_softlimit;
+   else
+   return intel_gpu_freq(rps, rps->max_freq_softlimit);
+}
+
+u32 intel_rps_get_rp0_frequency(struct intel_rps *rps)
+{
+   struct intel_guc_slpc *slpc = rps_to_slpc(rps);
+
+   if (rps_uses_slpc(rps))
+   return slpc->rp0_freq;
+   else
+   return intel_gpu_freq(rps, rps->rp0_freq);
+}
+
+u32 intel_rps_get_rp1_frequency(struct intel_rps *rps)
+{
+   struct intel_guc_slpc *slpc = rps_to_slpc(rps);
+
+   if (rps_uses_slpc(rps))
+   return slpc->rp1_freq;
+   else
+   return intel_gpu_freq(rps, rps->rp1_freq);
+}
+
+u32 intel_rps_get_rpn_frequency(struct intel_rps *rps)
+{
+   struct intel_guc_slpc *slpc = rps_to_slpc(rps);
+
+   if (rps_uses_slpc(rps))
+   return slpc->min_freq;
+   else
+   return intel_gpu_freq(rps, rps->min_freq);
+}
+
+int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val)
+{
+   struct drm_i915_private *i915 = rps_to_i915(rps);
+   struct intel_guc_slpc *slpc = rps_to_slpc(rps);
+   int ret = 0;
+
+   if (rps_uses_slpc(rps))
+   return intel_guc_slpc_set_max_freq(slpc, val);


few above functions are implemented as nice dispatcher

if (rps_uses_slpc(rps))
return ... slpc stuff;
else
return ... gpu stuff;

can we have something similar here ?
likely just putting below code into helper will do the trick


ok.




+
+   mutex_lock(>lock);
+
+   val = intel_freq_opcode(rps, val);
+   if (val < rps->min_freq ||
+   val > rps->max_freq ||
+   val < rps->min_freq_softlimit) {
+   ret = -EINVAL;
+   goto unlock;
+   }
+
+   if (val > rps->rp0_freq)
+   drm_dbg(>drm, "User requested overclocking to %d\n",
+ intel_gpu_freq(rps, val));
+
+   rps->max_freq_softlimit = val;
+
+   val = clamp_t(int, rps->cur_freq,
+ rps->min_freq_softlimit,
+ rps->max_freq_softlimit);
+
+   /*
+* We still need *_set_rps to process the new max_delay and
+* update the interrupt limits and 

Re: [Intel-gfx] [PATCH 08/15] drm/i915/guc/slpc: Add methods to set min/max frequency

2021-07-27 Thread Belgaumkar, Vinay




On 7/27/2021 8:24 AM, Michal Wajdeczko wrote:



On 26.07.2021 21:07, Vinay Belgaumkar wrote:

Add param set h2g helpers to set the min and max frequencies


s/h2g/H2G


for use by SLPC.

v2: Address review comments (Michal W)
v3: Check for positive error code (Michal W)

Signed-off-by: Sundaresan Sujaritha 
Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 89 -
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h |  2 +
  2 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index f5808d2acbca..63656640189c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -109,6 +109,21 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc)
return data->header.global_state;
  }
  
+static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)

+{
+   u32 request[] = {
+   INTEL_GUC_ACTION_SLPC_REQUEST,
+   SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
+   id,
+   value,
+   };
+   int ret;
+
+   ret = intel_guc_send(guc, request, ARRAY_SIZE(request));
+
+   return ret > 0 ? -EPROTO : ret;
+}
+
  static bool slpc_is_running(struct intel_guc_slpc *slpc)
  {
return (slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING);
@@ -118,7 +133,7 @@ static int guc_action_slpc_query(struct intel_guc *guc, u32 
offset)
  {
u32 request[] = {
INTEL_GUC_ACTION_SLPC_REQUEST,
-   SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2),
+   SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2),


this should be fixed in original patch


offset,
0,
};
@@ -146,6 +161,15 @@ static int slpc_query_task_state(struct intel_guc_slpc 
*slpc)
return ret;
  }
  
+static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value)

+{
+   struct intel_guc *guc = slpc_to_guc(slpc);
+
+   GEM_BUG_ON(id >= SLPC_MAX_PARAM);
+
+   return guc_action_slpc_set_param(guc, id, value);
+}
+
  static const char *slpc_global_state_to_string(enum slpc_global_state state)
  {
const char *str = NULL;
@@ -251,6 +275,69 @@ static u32 slpc_decode_max_freq(struct intel_guc_slpc 
*slpc)
GT_FREQUENCY_MULTIPLIER, GEN9_FREQ_SCALER);
  }
  
+/**

+ * intel_guc_slpc_set_max_freq() - Set max frequency limit for SLPC.
+ * @slpc: pointer to intel_guc_slpc.
+ * @val: frequency (MHz)
+ *
+ * This function will invoke GuC SLPC action to update the max frequency
+ * limit for unslice.
+ *
+ * Return: 0 on success, non-zero error code on failure.
+ */
+int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val)
+{
+   struct drm_i915_private *i915 = slpc_to_i915(slpc);
+   intel_wakeref_t wakeref;
+   int ret;
+
+   with_intel_runtime_pm(>runtime_pm, wakeref) {
+   ret = slpc_set_param(slpc,
+  SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ,
+  val);
+   if (ret) {
+   drm_err(>drm,
+   "Set max frequency unslice returned (%pe)\n", 
ERR_PTR(ret));


maybe generic error reporting could be moved to slpc_set_param() ?


+   /* Return standardized err code for sysfs */
+   ret = -EIO;


at this point we don't know if this function is for sysfs only
I would sanitize error in "store" hook if really needed

ssize_t slpc_max_freq_store(... const char *buf, size_t count)
{
...
err = intel_guc_slpc_set_max_freq(slpc, val);
return err ? -EIO : count;


that's the problem, sysfs wrapper will need to check for -EIO and 
-EINVAL, we want the ability to return either.


Thanks,
Vinay.

}


+   }
+   }
+
+   return ret;
+}
+
+/**
+ * intel_guc_slpc_set_min_freq() - Set min frequency limit for SLPC.
+ * @slpc: pointer to intel_guc_slpc.
+ * @val: frequency (MHz)
+ *
+ * This function will invoke GuC SLPC action to update the min unslice
+ * frequency.
+ *
+ * Return: 0 on success, non-zero error code on failure.
+ */
+int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val)
+{
+   int ret;
+   struct intel_guc *guc = slpc_to_guc(slpc);
+   struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+   intel_wakeref_t wakeref;
+
+   with_intel_runtime_pm(>runtime_pm, wakeref) {
+   ret = slpc_set_param(slpc,
+  SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+  val);
+   if (ret) {
+   drm_err(>drm,
+   "Set min frequency for unslice returned 
(%pe)\n", ERR_PTR(ret));
+   /* Return standardized err code for sysfs */
+   ret = -EIO;
+   }
+   }


same here


Re: [PATCH 12/15] drm/i915/guc/slpc: Cache platform frequency limits

2021-07-27 Thread Belgaumkar, Vinay




On 7/27/2021 9:00 AM, Michal Wajdeczko wrote:



On 26.07.2021 21:07, Vinay Belgaumkar wrote:

Cache rp0, rp1 and rpn platform limits into SLPC structure
for range checking while setting min/max frequencies.

Also add "soft" limits which keep track of frequency changes
made from userland. These are initially set to platform min
and max.

v2: Address review comments (Michal W)
v3: Formatting (Michal W)

Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 97 +++
  .../gpu/drm/i915/gt/uc/intel_guc_slpc_types.h | 11 +++
  drivers/gpu/drm/i915/i915_reg.h   |  3 +
  3 files changed, 111 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index c79dba60b2e6..a98cbf274862 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -94,6 +94,9 @@ static int slpc_shared_data_init(struct intel_guc_slpc *slpc)
return err;
}
  
+	slpc->max_freq_softlimit = 0;

+   slpc->min_freq_softlimit = 0;
+
return err;
  }
  
@@ -124,6 +127,18 @@ static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)

return ret > 0 ? -EPROTO : ret;
  }
  
+static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id)

+{
+   u32 request[] = {
+   INTEL_GUC_ACTION_SLPC_REQUEST,
+   SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 2),
+   id,
+   };
+
+   return intel_guc_send(guc, request, ARRAY_SIZE(request));
+}
+
+
  static bool slpc_is_running(struct intel_guc_slpc *slpc)
  {
return (slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING);
@@ -170,6 +185,16 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 
id, u32 value)
return guc_action_slpc_set_param(guc, id, value);
  }
  
+static int slpc_unset_param(struct intel_guc_slpc *slpc,

+   u8 id)
+{
+   struct intel_guc *guc = slpc_to_guc(slpc);
+
+   GEM_BUG_ON(id >= SLPC_MAX_PARAM);
+
+   return guc_action_slpc_unset_param(guc, id);
+}
+
  static const char *slpc_global_state_to_string(enum slpc_global_state state)
  {
const char *str = NULL;
@@ -406,6 +431,55 @@ void intel_guc_pm_intrmsk_enable(struct intel_gt *gt)
   GEN6_PMINTRMSK, pm_intrmsk_mbz, 0);
  }
  
+static int intel_guc_slpc_set_softlimits(struct intel_guc_slpc *slpc)


nit: "intel_" prefix not needed for static function


ok.




+{
+   int ret = 0;
+
+   /* Softlimits are initially equivalent to platform limits
+* unless they have deviated from defaults, in which case,
+* we retain the values and set min/max accordingly.
+*/


fix style for multi-line comment


done.




+   if (!slpc->max_freq_softlimit)
+   slpc->max_freq_softlimit = slpc->rp0_freq;
+   else if (slpc->max_freq_softlimit != slpc->rp0_freq)
+   ret = intel_guc_slpc_set_max_freq(slpc,
+   slpc->max_freq_softlimit);
+
+   if (!slpc->min_freq_softlimit)
+   slpc->min_freq_softlimit = slpc->min_freq;
+   else if (slpc->min_freq_softlimit != slpc->min_freq)
+   ret = intel_guc_slpc_set_min_freq(slpc,
+   slpc->min_freq_softlimit);
+
+   return ret;
+}
+
+static void intel_guc_slpc_ignore_eff_freq(struct intel_guc_slpc *slpc, bool 
ignore)
+{
+   if (ignore) {
+   /* A failure here does not affect the algorithm in a fatal way 
*/


is this comment just for "ignore" case or whole function ? (as you don't
check for errors in "else" case anyway)


moved it above.



+   slpc_set_param(slpc,
+  SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
+  ignore);
+   slpc_set_param(slpc,
+  SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+  slpc->min_freq);
+   } else {
+   slpc_unset_param(slpc,
+  SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY);
+   slpc_unset_param(slpc,
+  SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ);
+   }
+}
+
+static void intel_guc_slpc_use_fused_rp0(struct intel_guc_slpc *slpc)
+{
+   /* Force slpc to used platform rp0 */


s/slpc/SLPC

ok.



+   slpc_set_param(slpc,
+  SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ,
+  slpc->rp0_freq);


hmm, likely indent is wrong, did you run checkpatch.pl ?


Fixed.



+}
+
  /*
   * intel_guc_slpc_enable() - Start SLPC
   * @slpc: pointer to intel_guc_slpc.
@@ -423,6 +497,7 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
  {
struct drm_i915_private *i915 = slpc_to_i915(slpc);
struct slpc_shared_data *data;
+   u32 rp_state_cap;
int ret;
  
  	GEM_BUG_ON(!slpc->vma);

@@ -460,6 +535,28 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)

Re: [PATCH 07/15] drm/i915/guc/slpc: Remove BUG_ON in guc_submission_disable

2021-07-27 Thread Belgaumkar, Vinay




On 7/27/2021 5:20 PM, Matthew Brost wrote:

On Mon, Jul 26, 2021 at 12:07:52PM -0700, Vinay Belgaumkar wrote:

The assumption when it was added was there would be no wakerefs
held. However, if we fail to enable SLPC, we will still be
holding a wakeref.



So this is if intel_guc_slpc_enable() fails, right? Not seeing where the
wakeref is taken. It also seems wrong not to drop the wakeref before
calling intel_guc_submission_disable, hence the GEM_BUG_ON in this
function.

Can you explain this bit more?


I should change the desc a little. The BUG_ON assumed GT would not be 
awake i.e at shutdown, and there would be 0 GT_PM references. However, 
this slpc_enable is in gt_resume path (gt_init_hw calls uc_init_hw). 
Here, gt_pm_get reference is held, so it will result in BUG_ON when 
submission_disable is called.


Thanks,
Vinay.


Matt


Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 
  1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index b6338742a594..48cbd800ca54 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -2523,10 +2523,6 @@ void intel_guc_submission_enable(struct intel_guc *guc)
  
  void intel_guc_submission_disable(struct intel_guc *guc)

  {
-   struct intel_gt *gt = guc_to_gt(guc);
-
-   GEM_BUG_ON(gt->awake); /* GT should be parked first */
-
/* Note: By the time we're here, GuC may have already been reset */
  }
  
--

2.25.0



Re: [PATCH 11/15] drm/i915/guc/slpc: Enable ARAT timer interrupt

2021-07-27 Thread Belgaumkar, Vinay




On 7/27/2021 8:40 AM, Matthew Brost wrote:

On Mon, Jul 26, 2021 at 12:07:56PM -0700, Vinay Belgaumkar wrote:

This interrupt is enabled during RPS initialization, and
now needs to be done by SLPC code. It allows ARAT timer
expiry interrupts to get forwarded to GuC.

Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 16 
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h |  2 ++
  drivers/gpu/drm/i915/gt/uc/intel_uc.c   |  8 
  3 files changed, 26 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 995d3d4807a3..c79dba60b2e6 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -392,6 +392,20 @@ int intel_guc_slpc_get_min_freq(struct intel_guc_slpc 
*slpc, u32 *val)
return ret;
  }
  
+void intel_guc_pm_intrmsk_enable(struct intel_gt *gt)

+{
+   u32 pm_intrmsk_mbz = 0;
+
+   /* Allow GuC to receive ARAT timer expiry event.


I've been berated for using comments like this this by other engineers.
I personally don't care at all (nor does checkpatch) but if you want to
avoid the wrath of others I'd change this to what I have below:

/*
  * Allow GuC to receive ARAT timer expiry event.
  * This interrupt register is setup by RPS code
  * when host based Turbo is enabled.
  */

Same goes for comment below of same style.

Either way, patch looks good to me. With that:
Reviewed-by: Matthew Brost 


Fixed.
Thanks,
Vinay.



+* This interrupt register is setup by RPS code
+* when host based Turbo is enabled.
+*/
+   pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK;
+
+   intel_uncore_rmw(gt->uncore,
+  GEN6_PMINTRMSK, pm_intrmsk_mbz, 0);
+}
+
  /*
   * intel_guc_slpc_enable() - Start SLPC
   * @slpc: pointer to intel_guc_slpc.
@@ -439,6 +453,8 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
  
  	slpc_query_task_state(slpc);
  
+	intel_guc_pm_intrmsk_enable(>gt);

+
/* min and max frequency limits being used by SLPC */
drm_info(>drm, "SLPC min freq: %u Mhz, max is %u Mhz\n",
slpc_decode_min_freq(slpc),
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
index d133c8020c16..f128143cc1d8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
@@ -9,6 +9,7 @@
  #include "intel_guc_submission.h"
  #include "intel_guc_slpc_types.h"
  
+struct intel_gt;

  struct drm_printer;
  
  static inline bool intel_guc_slpc_is_supported(struct intel_guc *guc)

@@ -35,5 +36,6 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, 
u32 val);
  int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val);
  int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val);
  int intel_guc_slpc_info(struct intel_guc_slpc *slpc, struct drm_printer *p);
+void intel_guc_pm_intrmsk_enable(struct intel_gt *gt);
  
  #endif

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index b98c14f8c229..9238bc076605 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -652,6 +652,7 @@ void intel_uc_suspend(struct intel_uc *uc)
  static int __uc_resume(struct intel_uc *uc, bool enable_communication)
  {
struct intel_guc *guc = >guc;
+   struct intel_gt *gt = guc_to_gt(guc);
int err;
  
  	if (!intel_guc_is_fw_running(guc))

@@ -663,6 +664,13 @@ static int __uc_resume(struct intel_uc *uc, bool 
enable_communication)
if (enable_communication)
guc_enable_communication(guc);
  
+	/* If we are only resuming GuC communication but not reloading

+* GuC, we need to ensure the ARAT timer interrupt is enabled
+* again. In case of GuC reload, it is enabled during SLPC enable.
+*/
+   if (enable_communication && intel_uc_uses_guc_slpc(uc))
+   intel_guc_pm_intrmsk_enable(gt);
+
err = intel_guc_resume(guc);
if (err) {
DRM_DEBUG_DRIVER("Failed to resume GuC, err=%d", err);
--
2.25.0



Re: [PATCH 10/15] drm/i915/guc/slpc: Add debugfs for SLPC info

2021-07-27 Thread Belgaumkar, Vinay




On 7/27/2021 8:37 AM, Michal Wajdeczko wrote:



On 26.07.2021 21:07, Vinay Belgaumkar wrote:

This prints out relevant SLPC info from the SLPC shared structure.

We will send a h2g message which forces SLPC to update the


s/h2g/H2G


ok.



shared data structure with latest information before reading it.

v2: Address review comments (Michal W)
v3: Remove unnecessary tasks from slpc_info (Michal W)

Signed-off-by: Vinay Belgaumkar 
Signed-off-by: Sundaresan Sujaritha 
---
  .../gpu/drm/i915/gt/uc/intel_guc_debugfs.c| 22 ++
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 29 +++
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   |  4 ++-
  3 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
index 72ddfff42f7d..3244e54b1337 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
@@ -12,6 +12,7 @@
  #include "gt/uc/intel_guc_ct.h"
  #include "gt/uc/intel_guc_ads.h"
  #include "gt/uc/intel_guc_submission.h"
+#include "gt/uc/intel_guc_slpc.h"
  
  static int guc_info_show(struct seq_file *m, void *data)

  {
@@ -50,11 +51,32 @@ static int guc_registered_contexts_show(struct seq_file *m, 
void *data)
  }
  DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_registered_contexts);
  
+static int guc_slpc_info_show(struct seq_file *m, void *unused)

+{
+   struct intel_guc *guc = m->private;
+   struct intel_guc_slpc *slpc = >slpc;
+   struct drm_printer p = drm_seq_file_printer(m);
+
+   if (!intel_guc_slpc_is_used(guc))
+   return -ENODEV;
+
+   return intel_guc_slpc_info(slpc, );
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_slpc_info);
+
+static bool intel_eval_slpc_support(void *data)
+{
+   struct intel_guc *guc = (struct intel_guc *)data;
+
+   return intel_guc_slpc_is_used(guc);
+}
+
  void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root)
  {
static const struct debugfs_gt_file files[] = {
{ "guc_info", _info_fops, NULL },
{ "guc_registered_contexts", _registered_contexts_fops, 
NULL },
+   { "guc_slpc_info", _slpc_info_fops, 
_eval_slpc_support},
};
  
  	if (!intel_guc_is_supported(guc))

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index c653bba3b5eb..995d3d4807a3 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -448,6 +448,35 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
return 0;
  }
  
+int intel_guc_slpc_info(struct intel_guc_slpc *slpc, struct drm_printer *p)


nit: intel_guc_slpc_print_info ?


ok.




+{
+   struct drm_i915_private *i915 = guc_to_gt(slpc_to_guc(slpc))->i915;


use slpc_to_i915()


ok.




+   struct slpc_shared_data *data = slpc->vaddr;
+   struct slpc_task_state_data *slpc_tasks;
+   intel_wakeref_t wakeref;
+   int ret = 0;
+
+   GEM_BUG_ON(!slpc->vma);
+
+   with_intel_runtime_pm(>runtime_pm, wakeref) {
+   ret = slpc_query_task_state(slpc);
+
+   if (!ret) {
+   slpc_tasks = >task_state_data;
+
+   drm_printf(p, "\tSLPC state: %s\n", 
slpc_get_state_string(slpc));
+   drm_printf(p, "\tGTPERF task active: %s\n",
+   yesno(slpc_tasks->status & 
SLPC_GTPERF_TASK_ENABLED));
+   drm_printf(p, "\tMax freq: %u MHz\n",
+   slpc_decode_max_freq(slpc));
+   drm_printf(p, "\tMin freq: %u MHz\n",
+   slpc_decode_min_freq(slpc));
+   }
+   }
+
+   return ret;
+}
+
  void intel_guc_slpc_fini(struct intel_guc_slpc *slpc)
  {
if (!slpc->vma)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
index 92d7afd44f07..d133c8020c16 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
@@ -9,6 +9,8 @@
  #include "intel_guc_submission.h"
  #include "intel_guc_slpc_types.h"
  
+struct drm_printer;

+
  static inline bool intel_guc_slpc_is_supported(struct intel_guc *guc)
  {
return guc->slpc_supported;
@@ -25,7 +27,6 @@ static inline bool intel_guc_slpc_is_used(struct intel_guc 
*guc)
  }
  
  void intel_guc_slpc_init_early(struct intel_guc_slpc *slpc);

-


this should be fixed in earlier patch

with all that fixed,


Done.
Thanks,
Vinay.


Reviewed-by: Michal Wajdeczko 


  int intel_guc_slpc_init(struct intel_guc_slpc *slpc);
  int intel_guc_slpc_enable(struct intel_guc_slpc *slpc);
  void intel_guc_slpc_fini(struct intel_guc_slpc *slpc);
@@ -33,5 +34,6 @@ int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, 
u32 val);
  int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, 

Re: [Intel-gfx] [PATCH 09/15] drm/i915/guc/slpc: Add get max/min freq hooks

2021-07-27 Thread Belgaumkar, Vinay




On 7/27/2021 8:32 AM, Michal Wajdeczko wrote:



On 26.07.2021 21:07, Vinay Belgaumkar wrote:

Add helpers to read the min/max frequency being used
by SLPC. This is done by send a H2G command which forces
SLPC to update the shared data struct which can then be
read. These helpers will be used in a sysfs patch later
on.

v2: Address review comments (Michal W)
v3: Return err in case of query failure (Michal W)

Signed-off-by: Vinay Belgaumkar 
Signed-off-by: Sundaresan Sujaritha 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 54 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h |  2 +
  2 files changed, 56 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 63656640189c..c653bba3b5eb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -306,6 +306,33 @@ int intel_guc_slpc_set_max_freq(struct intel_guc_slpc 
*slpc, u32 val)
return ret;
  }
  
+/**

+ * intel_guc_slpc_get_max_freq() - Get max frequency limit for SLPC.
+ * @slpc: pointer to intel_guc_slpc.
+ * @val: pointer to val which will hold max frequency (MHz)
+ *
+ * This function will invoke GuC SLPC action to read the max frequency
+ * limit for unslice.
+ *
+ * Return: 0 on success, non-zero error code on failure.
+ */
+int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val)
+{
+   struct drm_i915_private *i915 = slpc_to_i915(slpc);
+   intel_wakeref_t wakeref;
+   int ret = 0;
+
+   with_intel_runtime_pm(>runtime_pm, wakeref) {
+   /* Force GuC to update task data */
+   ret = slpc_query_task_state(slpc);
+
+   if (!ret)
+   *val = slpc_decode_max_freq(slpc);
+   }
+
+   return ret;
+}
+
  /**
   * intel_guc_slpc_set_min_freq() - Set min frequency limit for SLPC.
   * @slpc: pointer to intel_guc_slpc.
@@ -338,6 +365,33 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc 
*slpc, u32 val)
return ret;
  }
  
+/**

+ * intel_guc_slpc_get_min_freq() - Get min frequency limit for SLPC.
+ * @slpc: pointer to intel_guc_slpc.
+ * @val: pointer to val which will hold min frequency (MHz)
+ *
+ * This function will invoke GuC SLPC action to read the min frequency
+ * limit for unslice.
+ *
+ * Return: 0 on success, non-zero error code on failure.
+ */
+int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val)
+{
+   intel_wakeref_t wakeref;
+   struct drm_i915_private *i915 = guc_to_gt(slpc_to_guc(slpc))->i915;


use slpc_to_i915() and in this order:

struct drm_i915_private *i915 = slpc_to_i915(slpc);
intel_wakeref_t wakeref;
int ret = 0;

with that fixed,

Reviewed-by: Michal Wajdeczko 


done.
Thanks,
vinay.



+   int ret = 0;
+
+   with_intel_runtime_pm(>runtime_pm, wakeref) {
+   /* Force GuC to update task data */
+   ret = slpc_query_task_state(slpc);
+
+   if (!ret)
+   *val = slpc_decode_min_freq(slpc);
+   }
+
+   return ret;
+}
+
  /*
   * intel_guc_slpc_enable() - Start SLPC
   * @slpc: pointer to intel_guc_slpc.
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
index e594510497ec..92d7afd44f07 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
@@ -31,5 +31,7 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc);
  void intel_guc_slpc_fini(struct intel_guc_slpc *slpc);
  int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val);
  int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val);
+int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val);
+int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val);
  
  #endif




Re: [PATCH 03/15] drm/i915/guc/slpc: Gate Host RPS when SLPC is enabled

2021-07-27 Thread Belgaumkar, Vinay




On 7/27/2021 3:44 PM, Matthew Brost wrote:

On Mon, Jul 26, 2021 at 12:07:48PM -0700, Vinay Belgaumkar wrote:

Also ensure uc_init is called before we initialize RPS so that we
can check for SLPC support. We do not need to enable up/down
interrupts when SLPC is enabled. However, we still need the ARAT
interrupt, which will be enabled separately later.



Do we not need a check for rps_uses_slpc in intel_rps_enable? I guessing
there is a reason why we don't but can't seem to figure that out.


Yeah, it's due to this check in there -
if (rps->max_freq <= rps->min_freq)
/* leave disabled, no room for dynamic reclocking */;

With slpc, rps->max_freq and rps->min freq remain uninitialized, so the 
if condition just falls through and returns with this-

if (!enabled)
return;

Thanks,
Vinay.



Matt


Signed-off-by: Vinay Belgaumkar 
Signed-off-by: Sundaresan Sujaritha 
---
  drivers/gpu/drm/i915/gt/intel_gt.c  |  2 +-
  drivers/gpu/drm/i915/gt/intel_rps.c | 20 
  2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index a64aa43f7cd9..04dd69bcf6cb 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -41,8 +41,8 @@ void intel_gt_init_early(struct intel_gt *gt, struct 
drm_i915_private *i915)
intel_gt_init_timelines(gt);
intel_gt_pm_init_early(gt);
  
-	intel_rps_init_early(>rps);

intel_uc_init_early(>uc);
+   intel_rps_init_early(>rps);
  }
  
  int intel_gt_probe_lmem(struct intel_gt *gt)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 0c8e7f2b06f0..e858eeb2c59d 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -37,6 +37,13 @@ static struct intel_uncore *rps_to_uncore(struct intel_rps 
*rps)
return rps_to_gt(rps)->uncore;
  }
  
+static bool rps_uses_slpc(struct intel_rps *rps)

+{
+   struct intel_gt *gt = rps_to_gt(rps);
+
+   return intel_uc_uses_guc_slpc(>uc);
+}
+
  static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask)
  {
return mask & ~rps->pm_intrmsk_mbz;
@@ -167,6 +174,8 @@ static void rps_enable_interrupts(struct intel_rps *rps)
  {
struct intel_gt *gt = rps_to_gt(rps);
  
+	GEM_BUG_ON(rps_uses_slpc(rps));

+
GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n",
 rps->pm_events, rps_pm_mask(rps, rps->last_freq));
  
@@ -771,6 +780,8 @@ static int gen6_rps_set(struct intel_rps *rps, u8 val)

struct drm_i915_private *i915 = rps_to_i915(rps);
u32 swreq;
  
+	GEM_BUG_ON(rps_uses_slpc(rps));

+
if (GRAPHICS_VER(i915) >= 9)
swreq = GEN9_FREQUENCY(val);
else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
@@ -861,6 +872,9 @@ void intel_rps_park(struct intel_rps *rps)
  {
int adj;
  
+	if (!intel_rps_is_enabled(rps))

+   return;
+
GEM_BUG_ON(atomic_read(>num_waiters));
  
  	if (!intel_rps_clear_active(rps))

@@ -1829,6 +1843,9 @@ void intel_rps_init(struct intel_rps *rps)
  {
struct drm_i915_private *i915 = rps_to_i915(rps);
  
+	if (rps_uses_slpc(rps))

+   return;
+
if (IS_CHERRYVIEW(i915))
chv_rps_init(rps);
else if (IS_VALLEYVIEW(i915))
@@ -1885,6 +1902,9 @@ void intel_rps_init(struct intel_rps *rps)
  
  void intel_rps_sanitize(struct intel_rps *rps)

  {
+   if (rps_uses_slpc(rps))
+   return;
+
if (GRAPHICS_VER(rps_to_i915(rps)) >= 6)
rps_disable_interrupts(rps);
  }
--
2.25.0



Re: [Intel-gfx] [PATCH 08/15] drm/i915/guc/slpc: Add methods to set min/max frequency

2021-07-27 Thread Belgaumkar, Vinay




On 7/27/2021 8:24 AM, Michal Wajdeczko wrote:



On 26.07.2021 21:07, Vinay Belgaumkar wrote:

Add param set h2g helpers to set the min and max frequencies


s/h2g/H2G


for use by SLPC.

v2: Address review comments (Michal W)
v3: Check for positive error code (Michal W)

Signed-off-by: Sundaresan Sujaritha 
Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 89 -
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h |  2 +
  2 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index f5808d2acbca..63656640189c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -109,6 +109,21 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc)
return data->header.global_state;
  }
  
+static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)

+{
+   u32 request[] = {
+   INTEL_GUC_ACTION_SLPC_REQUEST,
+   SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
+   id,
+   value,
+   };
+   int ret;
+
+   ret = intel_guc_send(guc, request, ARRAY_SIZE(request));
+
+   return ret > 0 ? -EPROTO : ret;
+}
+
  static bool slpc_is_running(struct intel_guc_slpc *slpc)
  {
return (slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING);
@@ -118,7 +133,7 @@ static int guc_action_slpc_query(struct intel_guc *guc, u32 
offset)
  {
u32 request[] = {
INTEL_GUC_ACTION_SLPC_REQUEST,
-   SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2),
+   SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2),


this should be fixed in original patch


ok.




offset,
0,
};
@@ -146,6 +161,15 @@ static int slpc_query_task_state(struct intel_guc_slpc 
*slpc)
return ret;
  }
  
+static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value)

+{
+   struct intel_guc *guc = slpc_to_guc(slpc);
+
+   GEM_BUG_ON(id >= SLPC_MAX_PARAM);
+
+   return guc_action_slpc_set_param(guc, id, value);
+}
+
  static const char *slpc_global_state_to_string(enum slpc_global_state state)
  {
const char *str = NULL;
@@ -251,6 +275,69 @@ static u32 slpc_decode_max_freq(struct intel_guc_slpc 
*slpc)
GT_FREQUENCY_MULTIPLIER, GEN9_FREQ_SCALER);
  }
  
+/**

+ * intel_guc_slpc_set_max_freq() - Set max frequency limit for SLPC.
+ * @slpc: pointer to intel_guc_slpc.
+ * @val: frequency (MHz)
+ *
+ * This function will invoke GuC SLPC action to update the max frequency
+ * limit for unslice.
+ *
+ * Return: 0 on success, non-zero error code on failure.
+ */
+int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val)
+{
+   struct drm_i915_private *i915 = slpc_to_i915(slpc);
+   intel_wakeref_t wakeref;
+   int ret;
+
+   with_intel_runtime_pm(>runtime_pm, wakeref) {
+   ret = slpc_set_param(slpc,
+  SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ,
+  val);
+   if (ret) {
+   drm_err(>drm,
+   "Set max frequency unslice returned (%pe)\n", 
ERR_PTR(ret));


maybe generic error reporting could be moved to slpc_set_param() ?


+   /* Return standardized err code for sysfs */
+   ret = -EIO;


at this point we don't know if this function is for sysfs only
I would sanitize error in "store" hook if really needed


ok.



ssize_t slpc_max_freq_store(... const char *buf, size_t count)
{
...
err = intel_guc_slpc_set_max_freq(slpc, val);
return err ? -EIO : count;
}


+   }
+   }
+
+   return ret;
+}
+
+/**
+ * intel_guc_slpc_set_min_freq() - Set min frequency limit for SLPC.
+ * @slpc: pointer to intel_guc_slpc.
+ * @val: frequency (MHz)
+ *
+ * This function will invoke GuC SLPC action to update the min unslice
+ * frequency.
+ *
+ * Return: 0 on success, non-zero error code on failure.
+ */
+int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val)
+{
+   int ret;
+   struct intel_guc *guc = slpc_to_guc(slpc);
+   struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+   intel_wakeref_t wakeref;
+
+   with_intel_runtime_pm(>runtime_pm, wakeref) {
+   ret = slpc_set_param(slpc,
+  SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+  val);
+   if (ret) {
+   drm_err(>drm,
+   "Set min frequency for unslice returned 
(%pe)\n", ERR_PTR(ret));
+   /* Return standardized err code for sysfs */
+   ret = -EIO;
+   }
+   }


same here


Fixed.
Thanks,
Vinay.


Michal


+
+   return ret;
+}
+
  /*
   * intel_guc_slpc_enable() - Start SLPC
   * @slpc: 

Re: [PATCH 14/15] drm/i915/guc/slpc: Add SLPC selftest

2021-07-27 Thread Belgaumkar, Vinay




On 7/27/2021 12:16 PM, Matthew Brost wrote:

On Mon, Jul 26, 2021 at 12:07:59PM -0700, Vinay Belgaumkar wrote:

Tests that exercise the SLPC get/set frequency interfaces.

Clamp_max will set max frequency to multiple levels and check
that SLPC requests frequency lower than or equal to it.

Clamp_min will set min frequency to different levels and check
if SLPC requests are higher or equal to those levels.

v2: Address review comments (Michal W)
v3: Checkpatch() corrections

Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/intel_rps.c   |   1 +
  drivers/gpu/drm/i915/gt/selftest_slpc.c   | 311 ++
  drivers/gpu/drm/i915/gt/selftest_slpc.h   |  12 +
  .../drm/i915/selftests/i915_live_selftests.h  |   1 +
  4 files changed, 325 insertions(+)
  create mode 100644 drivers/gpu/drm/i915/gt/selftest_slpc.c
  create mode 100644 drivers/gpu/drm/i915/gt/selftest_slpc.h

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 48d4147165a9..6237332835fe 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2318,4 +2318,5 @@ EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
  
  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)

  #include "selftest_rps.c"
+#include "selftest_slpc.c"
  #endif
diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c 
b/drivers/gpu/drm/i915/gt/selftest_slpc.c
new file mode 100644
index ..5018f686686f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "selftest_slpc.h"
+
+#define NUM_STEPS 5
+#define H2G_DELAY 5
+#define delay_for_h2g() usleep_range(H2G_DELAY, H2G_DELAY + 1)
+
+static int set_min_freq(struct intel_guc_slpc *slpc, u32 freq)
+{
+   int ret;
+
+   ret = intel_guc_slpc_set_min_freq(slpc, freq);
+   if (ret)
+   pr_err("Could not set min frequency to [%u]\n", freq);
+   else /* Delay to ensure h2g completes */
+   delay_for_h2g();
+
+   return ret;
+}
+
+static int set_max_freq(struct intel_guc_slpc *slpc, u32 freq)
+{
+   int ret;
+
+   ret = intel_guc_slpc_set_max_freq(slpc, freq);
+   if (ret)
+   pr_err("Could not set maximum frequency [%u]\n",
+   freq);
+   else /* Delay to ensure h2g completes */
+   delay_for_h2g();
+
+   return ret;
+}
+
+int live_slpc_clamp_min(void *arg)
+{
+   struct drm_i915_private *i915 = arg;
+   struct intel_gt *gt = >gt;
+   struct intel_guc_slpc *slpc = >uc.guc.slpc;
+   struct intel_rps *rps = >rps;
+   struct intel_engine_cs *engine;
+   enum intel_engine_id id;
+   struct igt_spinner spin;
+   u32 slpc_min_freq, slpc_max_freq;
+   int err = 0;
+
+   if (!intel_uc_uses_guc_slpc(>uc))
+   return 0;
+
+   if (igt_spinner_init(, gt))
+   return -ENOMEM;
+
+   if (intel_guc_slpc_get_max_freq(slpc, _max_freq)) {
+   pr_err("Could not get SLPC max freq\n");
+   return -EIO;
+   }
+
+   if (intel_guc_slpc_get_min_freq(slpc, _min_freq)) {
+   pr_err("Could not get SLPC min freq\n");
+   return -EIO;
+   }
+
+   if (slpc_min_freq == slpc_max_freq) {
+   pr_err("Min/Max are fused to the same value\n");
+   return -EINVAL;
+   }
+
+   intel_gt_pm_wait_for_idle(gt);
+   intel_gt_pm_get(gt);
+   for_each_engine(engine, gt, id) {
+   struct i915_request *rq;
+   u32 step, min_freq, req_freq;
+   u32 act_freq, max_act_freq;
+
+   if (!intel_engine_can_store_dword(engine))
+   continue;
+
+   /* Go from min to max in 5 steps */
+   step = (slpc_max_freq - slpc_min_freq) / NUM_STEPS;
+   max_act_freq = slpc_min_freq;
+   for (min_freq = slpc_min_freq; min_freq < slpc_max_freq;
+   min_freq += step) {
+   err = set_min_freq(slpc, min_freq);
+   if (err)
+   break;
+
+   st_engine_heartbeat_disable(engine);
+
+   rq = igt_spinner_create_request(,
+   engine->kernel_context,
+   MI_NOOP);
+   if (IS_ERR(rq)) {
+   err = PTR_ERR(rq);
+   st_engine_heartbeat_enable(engine);
+   break;
+   }
+
+   i915_request_add(rq);
+
+   if (!igt_wait_for_spinner(, rq)) {
+   pr_err("%s: Spinner did not start\n",
+   engine->name);
+   igt_spinner_end();
+   

Re: [Intel-gfx] [PATCH 06/15] drm/i915/guc/slpc: Enable SLPC and add related H2G events

2021-07-27 Thread Belgaumkar, Vinay




On 7/27/2021 1:19 PM, Michal Wajdeczko wrote:



On 27.07.2021 22:00, Belgaumkar, Vinay wrote:



On 7/27/2021 8:12 AM, Michal Wajdeczko wrote:



On 26.07.2021 21:07, Vinay Belgaumkar wrote:

Add methods for interacting with GuC for enabling SLPC. Enable
SLPC after GuC submission has been established. GuC load will
fail if SLPC cannot be successfully initialized. Add various
helper methods to set/unset the parameters for SLPC. They can
be set using H2G calls or directly setting bits in the shared
data structure.

v2: Address several review comments, add new helpers for
decoding the SLPC min/max frequencies. Use masks instead of hardcoded
constants. (Michal W)

v3: Split global_state_to_string function, and check for positive
non-zero return value from intel_guc_send() (Michal W)

Signed-off-by: Vinay Belgaumkar 
Signed-off-by: Sundaresan Sujaritha 
---
   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 237 ++
   .../gpu/drm/i915/gt/uc/intel_guc_slpc_types.h |   2 +
   drivers/gpu/drm/i915/gt/uc/intel_uc.c |   8 +
   3 files changed, 247 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index bae4e33db0f8..f5808d2acbca 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -45,6 +45,40 @@ void intel_guc_slpc_init_early(struct
intel_guc_slpc *slpc)
   guc->slpc_selected = __guc_slpc_selected(guc);
   }
   +static void slpc_mem_set_param(struct slpc_shared_data *data,
+    u32 id, u32 value)
+{
+    GEM_BUG_ON(id >= SLPC_MAX_OVERRIDE_PARAMETERS);
+    /*
+ * When the flag bit is set, corresponding value will be read
+ * and applied by slpc.


s/slpc/SLPC

ok.




+ */
+    data->override_params.bits[id >> 5] |= (1 << (id % 32));
+    data->override_params.values[id] = value;
+}
+
+static void slpc_mem_set_enabled(struct slpc_shared_data *data,
+    u8 enable_id, u8 disable_id)
+{
+    /*
+ * Enabling a param involves setting the enable_id
+ * to 1 and disable_id to 0.
+ */
+    slpc_mem_set_param(data, enable_id, 1);
+    slpc_mem_set_param(data, disable_id, 0);
+}
+
+static void slpc_mem_set_disabled(struct slpc_shared_data *data,
+    u8 enable_id, u8 disable_id)
+{
+    /*
+ * Disabling a param involves setting the enable_id
+ * to 0 and disable_id to 1.
+ */
+    slpc_mem_set_param(data, disable_id, 1);
+    slpc_mem_set_param(data, enable_id, 0);
+}
+
   static int slpc_shared_data_init(struct intel_guc_slpc *slpc)
   {
   struct intel_guc *guc = slpc_to_guc(slpc);
@@ -63,6 +97,129 @@ static int slpc_shared_data_init(struct
intel_guc_slpc *slpc)
   return err;
   }
   +static u32 slpc_get_state(struct intel_guc_slpc *slpc)
+{
+    struct slpc_shared_data *data;
+
+    GEM_BUG_ON(!slpc->vma);
+
+    drm_clflush_virt_range(slpc->vaddr, sizeof(u32));
+    data = slpc->vaddr;
+
+    return data->header.global_state;
+}
+
+static bool slpc_is_running(struct intel_guc_slpc *slpc)
+{
+    return (slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING);


extra ( ) not needed


ok.




+}
+
+static int guc_action_slpc_query(struct intel_guc *guc, u32 offset)
+{
+    u32 request[] = {
+    INTEL_GUC_ACTION_SLPC_REQUEST,
+ SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2),
+    offset,
+    0,
+    };
+    int ret;
+
+    ret = intel_guc_send(guc, request, ARRAY_SIZE(request));
+
+    return ret > 0 ? -EPROTO : ret;
+}
+
+static int slpc_query_task_state(struct intel_guc_slpc *slpc)
+{
+    struct intel_guc *guc = slpc_to_guc(slpc);
+    struct drm_i915_private *i915 = slpc_to_i915(slpc);
+    u32 shared_data_gtt_offset = intel_guc_ggtt_offset(guc, slpc->vma);


just "offset" ? or maybe pass directly in call below ?


ok.




+    int ret;
+
+    ret = guc_action_slpc_query(guc, shared_data_gtt_offset);
+    if (ret)
+    drm_err(>drm, "Query task state data returned (%pe)\n",


"Failed to query task state (%pe)\n" ?


ok.



+    ERR_PTR(ret));
+
+    drm_clflush_virt_range(slpc->vaddr, SLPC_PAGE_SIZE_BYTES);
+
+    return ret;
+}
+
+static const char *slpc_global_state_to_string(enum
slpc_global_state state)
+{
+    const char *str = NULL;
+
+    switch (state) {
+    case SLPC_GLOBAL_STATE_NOT_RUNNING:
+    str = "not running";
+    break;
+    case SLPC_GLOBAL_STATE_INITIALIZING:
+    str = "initializing";
+    break;
+    case SLPC_GLOBAL_STATE_RESETTING:
+    str = "resetting";
+    break;
+    case SLPC_GLOBAL_STATE_RUNNING:
+    str = "running";
+    break;
+    case SLPC_GLOBAL_STATE_SHUTTING_DOWN:
+    str = "shutting down";
+    break;
+    case SLPC_GLOBAL_STATE_ERROR:
+    str = "error";
+    break;
+    default:
+    str = "unknown";


nit: yo

Re: [Intel-gfx] [PATCH 06/15] drm/i915/guc/slpc: Enable SLPC and add related H2G events

2021-07-27 Thread Belgaumkar, Vinay




On 7/27/2021 8:12 AM, Michal Wajdeczko wrote:



On 26.07.2021 21:07, Vinay Belgaumkar wrote:

Add methods for interacting with GuC for enabling SLPC. Enable
SLPC after GuC submission has been established. GuC load will
fail if SLPC cannot be successfully initialized. Add various
helper methods to set/unset the parameters for SLPC. They can
be set using H2G calls or directly setting bits in the shared
data structure.

v2: Address several review comments, add new helpers for
decoding the SLPC min/max frequencies. Use masks instead of hardcoded
constants. (Michal W)

v3: Split global_state_to_string function, and check for positive
non-zero return value from intel_guc_send() (Michal W)

Signed-off-by: Vinay Belgaumkar 
Signed-off-by: Sundaresan Sujaritha 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 237 ++
  .../gpu/drm/i915/gt/uc/intel_guc_slpc_types.h |   2 +
  drivers/gpu/drm/i915/gt/uc/intel_uc.c |   8 +
  3 files changed, 247 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index bae4e33db0f8..f5808d2acbca 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -45,6 +45,40 @@ void intel_guc_slpc_init_early(struct intel_guc_slpc *slpc)
guc->slpc_selected = __guc_slpc_selected(guc);
  }
  
+static void slpc_mem_set_param(struct slpc_shared_data *data,

+   u32 id, u32 value)
+{
+   GEM_BUG_ON(id >= SLPC_MAX_OVERRIDE_PARAMETERS);
+   /*
+* When the flag bit is set, corresponding value will be read
+* and applied by slpc.


s/slpc/SLPC

ok.




+*/
+   data->override_params.bits[id >> 5] |= (1 << (id % 32));
+   data->override_params.values[id] = value;
+}
+
+static void slpc_mem_set_enabled(struct slpc_shared_data *data,
+   u8 enable_id, u8 disable_id)
+{
+   /*
+* Enabling a param involves setting the enable_id
+* to 1 and disable_id to 0.
+*/
+   slpc_mem_set_param(data, enable_id, 1);
+   slpc_mem_set_param(data, disable_id, 0);
+}
+
+static void slpc_mem_set_disabled(struct slpc_shared_data *data,
+   u8 enable_id, u8 disable_id)
+{
+   /*
+* Disabling a param involves setting the enable_id
+* to 0 and disable_id to 1.
+*/
+   slpc_mem_set_param(data, disable_id, 1);
+   slpc_mem_set_param(data, enable_id, 0);
+}
+
  static int slpc_shared_data_init(struct intel_guc_slpc *slpc)
  {
struct intel_guc *guc = slpc_to_guc(slpc);
@@ -63,6 +97,129 @@ static int slpc_shared_data_init(struct intel_guc_slpc 
*slpc)
return err;
  }
  
+static u32 slpc_get_state(struct intel_guc_slpc *slpc)

+{
+   struct slpc_shared_data *data;
+
+   GEM_BUG_ON(!slpc->vma);
+
+   drm_clflush_virt_range(slpc->vaddr, sizeof(u32));
+   data = slpc->vaddr;
+
+   return data->header.global_state;
+}
+
+static bool slpc_is_running(struct intel_guc_slpc *slpc)
+{
+   return (slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING);


extra ( ) not needed


ok.




+}
+
+static int guc_action_slpc_query(struct intel_guc *guc, u32 offset)
+{
+   u32 request[] = {
+   INTEL_GUC_ACTION_SLPC_REQUEST,
+   SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2),
+   offset,
+   0,
+   };
+   int ret;
+
+   ret = intel_guc_send(guc, request, ARRAY_SIZE(request));
+
+   return ret > 0 ? -EPROTO : ret;
+}
+
+static int slpc_query_task_state(struct intel_guc_slpc *slpc)
+{
+   struct intel_guc *guc = slpc_to_guc(slpc);
+   struct drm_i915_private *i915 = slpc_to_i915(slpc);
+   u32 shared_data_gtt_offset = intel_guc_ggtt_offset(guc, slpc->vma);


just "offset" ? or maybe pass directly in call below ?


ok.




+   int ret;
+
+   ret = guc_action_slpc_query(guc, shared_data_gtt_offset);
+   if (ret)
+   drm_err(>drm, "Query task state data returned (%pe)\n",


"Failed to query task state (%pe)\n" ?


ok.



+   ERR_PTR(ret));
+
+   drm_clflush_virt_range(slpc->vaddr, SLPC_PAGE_SIZE_BYTES);
+
+   return ret;
+}
+
+static const char *slpc_global_state_to_string(enum slpc_global_state state)
+{
+   const char *str = NULL;
+
+   switch (state) {
+   case SLPC_GLOBAL_STATE_NOT_RUNNING:
+   str = "not running";
+   break;
+   case SLPC_GLOBAL_STATE_INITIALIZING:
+   str = "initializing";
+   break;
+   case SLPC_GLOBAL_STATE_RESETTING:
+   str = "resetting";
+   break;
+   case SLPC_GLOBAL_STATE_RUNNING:
+   str = "running";
+   break;
+   case SLPC_GLOBAL_STATE_SHUTTING_DOWN:
+   str = "shutting down";
+   break;
+   case SLPC_GLOBAL_STATE_ERROR:
+   str = "error";
+

Re: [PATCH 04/15] drm/i915/guc/slpc: Adding SLPC communication interfaces

2021-07-27 Thread Belgaumkar, Vinay




On 7/27/2021 6:59 AM, Michal Wajdeczko wrote:



On 26.07.2021 21:07, Vinay Belgaumkar wrote:

Add constants and params that are needed to configure SLPC.

v2: Add a new abi header for SLPC. Replace bitfields with
genmasks. Address other comments from Michal W.

v3: Add slpc H2G format in abi, other review commments (Michal W)

v4: Update status bits according to latest spec

Signed-off-by: Vinay Belgaumkar 
Signed-off-by: Sundaresan Sujaritha 
---
  .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |   1 -
  .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 235 ++
  drivers/gpu/drm/i915/gt/uc/intel_guc.c|   3 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   7 +
  4 files changed, 245 insertions(+), 1 deletion(-)
  create mode 100644 drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h

diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index d832c8f11c11..ca538e5de940 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -135,7 +135,6 @@ enum intel_guc_action {
INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007,
INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008,
INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009,
-   INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003,
INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
INTEL_GUC_ACTION_REGISTER_CONTEXT = 0x4502,
INTEL_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503,
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
new file mode 100644
index ..70b300d4a536
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
@@ -0,0 +1,235 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _GUC_ACTIONS_SLPC_ABI_H_
+#define _GUC_ACTIONS_SLPC_ABI_H_
+
+#include 
+
+/**
+ * DOC: SLPC SHARED DATA STRUCTURE
+ *
+ *  
++--+--+
+ *  | CL | Bytes| Description  
|
+ *  
++==+==+
+ *  | 1  | 0-3  | SHARED DATA SIZE 
|
+ *  |
+--+--+
+ *  || 4-7  | GLOBAL STATE 
|
+ *  |
+--+--+
+ *  || 8-11 | DISPLAY DATA ADDRESS 
|
+ *  |
+--+--+
+ *  || 12:63| PADDING  
|
+ *  
++--+--+
+ *  || 0:63 | PADDING(PLATFORM INFO)   
|
+ *  
++--+--+
+ *  | 3  | 0-3  | TASK STATE DATA  
|
+ *  +
+--+--+
+ *  || 4:63 | PADDING  
|
+ *  
++--+--+
+ *  |4-21|0:1087| OVERRIDE PARAMS AND BIT FIELDS   
|
+ *  
++--+--+
+ *  ||  | PADDING + EXTRA RESERVED PAGE
|
+ *  
++--+--+
+ */
+
+/*
+ * SLPC exposes certain parameters for global configuration by the host.
+ * These are referred to as override parameters, because in most cases
+ * the host will not need to modify the default values used by SLPC.
+ * SLPC remembers the default values which allows the host to easily restore
+ * them by simply unsetting the override. The host can set or unset override
+ * parameters during SLPC (re-)initialization using the SLPC Reset event.
+ * The host can also set or unset override parameters on the fly using the
+ * Parameter Set and Parameter Unset events
+ */
+
+#define SLPC_MAX_OVERRIDE_PARAMETERS   256
+#define SLPC_OVERRIDE_BITFIELD_SIZE \
+   (SLPC_MAX_OVERRIDE_PARAMETERS / 32)
+
+#define SLPC_PAGE_SIZE_BYTES   4096
+#define SLPC_CACHELINE_SIZE_BYTES  64
+#define SLPC_SHARED_DATA_SIZE_BYTE_HEADER  SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO   
SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE  SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE  SLPC_PAGE_SIZE_BYTES
+#define SLPC_SHARED_DATA_SIZE_BYTE_MAX (2 * SLPC_PAGE_SIZE_BYTES)
+
+/*
+ * Cacheline size 

Re: [PATCH 02/15] drm/i915/guc/slpc: Initial definitions for SLPC

2021-07-27 Thread Belgaumkar, Vinay




On 7/27/2021 6:43 AM, Michal Wajdeczko wrote:



On 26.07.2021 21:07, Vinay Belgaumkar wrote:

Add macros to check for SLPC support. This feature is currently supported
for Gen12+ and enabled whenever GuC submission is enabled/selected.

Include templates for SLPC init/fini and enable.

v2: Move SLPC helper functions to intel_guc_slpc.c/.h. Define basic
template for SLPC structure in intel_guc_slpc_types.h. Fix copyright (Michal W)

v3: Review comments (Michal W)

Signed-off-by: Vinay Belgaumkar 
Signed-off-by: Sundaresan Sujaritha 
Signed-off-by: Daniele Ceraolo Spurio 

drm/i915/guc/slpc: Lay out slpc init/enable/fini

Declare init/fini and enable function templates.

v2: Rebase

Signed-off-by: Vinay Belgaumkar 
Signed-off-by: Sundaresan Sujaritha 
---
  drivers/gpu/drm/i915/Makefile |  1 +
  drivers/gpu/drm/i915/gt/uc/intel_guc.c|  2 +
  drivers/gpu/drm/i915/gt/uc/intel_guc.h|  4 ++
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 45 +++
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   | 33 ++
  .../gpu/drm/i915/gt/uc/intel_guc_slpc_types.h | 12 +
  drivers/gpu/drm/i915/gt/uc/intel_uc.c |  6 ++-
  drivers/gpu/drm/i915/gt/uc/intel_uc.h |  2 +
  8 files changed, 103 insertions(+), 2 deletions(-)
  create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
  create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
  create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index ab7679957623..d8eac4468df9 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -186,6 +186,7 @@ i915-y += gt/uc/intel_uc.o \
  gt/uc/intel_guc_fw.o \
  gt/uc/intel_guc_log.o \
  gt/uc/intel_guc_log_debugfs.o \
+ gt/uc/intel_guc_slpc.o \
  gt/uc/intel_guc_submission.o \
  gt/uc/intel_huc.o \
  gt/uc/intel_huc_debugfs.o \
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 979128e28372..39bc3c16057b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -7,6 +7,7 @@
  #include "gt/intel_gt_irq.h"
  #include "gt/intel_gt_pm_irq.h"
  #include "intel_guc.h"
+#include "intel_guc_slpc.h"
  #include "intel_guc_ads.h"
  #include "intel_guc_submission.h"
  #include "i915_drv.h"
@@ -157,6 +158,7 @@ void intel_guc_init_early(struct intel_guc *guc)
intel_guc_ct_init_early(>ct);
intel_guc_log_init_early(>log);
intel_guc_submission_init_early(guc);
+   intel_guc_slpc_init_early(>slpc);
  
  	mutex_init(>send_mutex);

spin_lock_init(>irq_lock);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index a9547069ee7e..15ad2eaee473 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -15,6 +15,7 @@
  #include "intel_guc_ct.h"
  #include "intel_guc_log.h"
  #include "intel_guc_reg.h"
+#include "intel_guc_slpc_types.h"
  #include "intel_uc_fw.h"
  #include "i915_utils.h"
  #include "i915_vma.h"
@@ -30,6 +31,7 @@ struct intel_guc {
struct intel_uc_fw fw;
struct intel_guc_log log;
struct intel_guc_ct ct;
+   struct intel_guc_slpc slpc;
  
  	/* Global engine used to submit requests to GuC */

struct i915_sched_engine *sched_engine;
@@ -57,6 +59,8 @@ struct intel_guc {
  
  	bool submission_supported;

bool submission_selected;
+   bool slpc_supported;
+   bool slpc_selected;


(I know that you were following existing code, but we might do better
and since you have to resend it anyway without patch 1/15 ...)

as we have here:

+   struct intel_guc_slpc slpc;

then maybe both supported/selected shall be moved there as:

  struct intel_guc_slpc {
+   bool supported;
+   bool selected;
  };

so the struct wont be empty any more, with that fixed:

Reviewed-by: Michal Wajdeczko 


Ok, done,

Thanks for the review,
Vinay.


  
  	struct i915_vma *ads_vma;

struct __guc_ads_blob *ads_blob;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
new file mode 100644
index ..7275100ef8f8
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_guc_slpc.h"
+#include "gt/intel_gt.h"
+
+static inline struct intel_guc *slpc_to_guc(struct intel_guc_slpc *slpc)
+{
+   return container_of(slpc, struct intel_guc, slpc);
+}
+
+static bool __detect_slpc_supported(struct intel_guc *guc)
+{
+   /* GuC SLPC is unavailable for pre-Gen12 */
+   return guc->submission_supported &&
+   GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12;
+}
+
+static bool __guc_slpc_selected(struct intel_guc *guc)
+{
+   if 

Re: [PATCH 15/15] drm/i915/guc/rc: Setup and enable GUCRC feature

2021-07-27 Thread Belgaumkar, Vinay




On 7/27/2021 8:37 AM, Matt Roper wrote:

On Mon, Jul 26, 2021 at 12:08:00PM -0700, Vinay Belgaumkar wrote:

This feature hands over the control of HW RC6 to the GuC.
GuC decides when to put HW into RC6 based on it's internal
busyness algorithms.

GUCRC needs GuC submission to be enabled, and only
supported on Gen12+ for now.

When GUCRC is enabled, do not set HW RC6. Use a H2G message
to tell GuC to enable GUCRC. When disabling RC6, tell GuC to
revert RC6 control back to KMD.

v2: Address comments (Michal W)

Reviewed-by: Michal Wajdeczko 
Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/Makefile |  1 +
  drivers/gpu/drm/i915/gt/intel_rc6.c   | 22 +++--
  .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |  6 ++
  drivers/gpu/drm/i915/gt/uc/intel_guc.c|  1 +
  drivers/gpu/drm/i915/gt/uc/intel_guc.h|  2 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c | 80 +++
  drivers/gpu/drm/i915/gt/uc/intel_guc_rc.h | 31 +++
  drivers/gpu/drm/i915/gt/uc/intel_uc.h |  2 +
  8 files changed, 140 insertions(+), 5 deletions(-)
  create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
  create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_rc.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index d8eac4468df9..3fc17f20d88e 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -186,6 +186,7 @@ i915-y += gt/uc/intel_uc.o \
  gt/uc/intel_guc_fw.o \
  gt/uc/intel_guc_log.o \
  gt/uc/intel_guc_log_debugfs.o \
+ gt/uc/intel_guc_rc.o \
  gt/uc/intel_guc_slpc.o \
  gt/uc/intel_guc_submission.o \
  gt/uc/intel_huc.o \
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c 
b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 259d7eb4e165..299fcf10b04b 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -98,11 +98,19 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60);
set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60);


Do steps 2b and 2c above this still apply to gucrc?  Are those still
controlling the behavior of gucrc or does the GuC firmware just
overwrite them with its own values?  If they're still impacting the
behavior when gucrc is enabled, is there any updated guidance on how the
values should be set?  It seems that there isn't any guidance in the
bspec for the last several platforms, so we've pretty much been re-using
old values without knowing if there's additional adjustment that should
be done for the newer platforms.

If the tuning values the driver sets get ignored/overwritten during GuC
operation, maybe we should add a new gucrc_rc6_enable() that gets used
instead of gen11_rc6_enable() and drops the unnecessary steps to help
clarify what's truly important?


Yeah, 2b does get overwritten by guc, but we still need 2c.




  
-	/* 3a: Enable RC6 */

-   rc6->ctl_enable =
-   GEN6_RC_CTL_HW_ENABLE |
-   GEN6_RC_CTL_RC6_ENABLE |
-   GEN6_RC_CTL_EI_MODE(1);
+   /* 3a: Enable RC6
+*
+* With GUCRC, we do not enable bit 31 of RC_CTL,
+* thus allowing GuC to control RC6 entry/exit fully instead.
+* We will not set the HW ENABLE and EI bits
+*/
+   if (!intel_guc_rc_enable(>uc.guc))
+   rc6->ctl_enable = GEN6_RC_CTL_RC6_ENABLE;
+   else
+   rc6->ctl_enable =
+   GEN6_RC_CTL_HW_ENABLE |
+   GEN6_RC_CTL_RC6_ENABLE |
+   GEN6_RC_CTL_EI_MODE(1);
  
  	pg_enable =

GEN9_RENDER_PG_ENABLE |


We should probably clarify in the commit message that gucrc doesn't
cover powergating and leaves that under driver control.  Maybe we should
even pull this out into its own function rather than leaving it in the
"rc6 enable" function since it really is its own thing?


I have a note in the summary patch about this, will pull it into this 
patch header as well.


There is already a separate effort underway from Suja to decouple RC6 
and coarse power gate enabling. Might become more streamlined after that.


For now, I can have an if check around 2b so that there is more clarity?

Thanks,
Vinay.



Matt


@@ -513,6 +521,10 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6)
  {
struct drm_i915_private *i915 = rc6_to_i915(rc6);
struct intel_uncore *uncore = rc6_to_uncore(rc6);
+   struct intel_gt *gt = rc6_to_gt(rc6);
+
+   /* Take control of RC6 back from GuC */
+   intel_guc_rc_disable(>uc.guc);
  
  	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);

if (GRAPHICS_VER(i915) >= 9)
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index ca538e5de940..8ff58aff 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ 

Re: [PATCH 14/14] drm/i915/guc/rc: Setup and enable GUCRC feature

2021-07-23 Thread Belgaumkar, Vinay




On 7/21/2021 11:21 AM, Michal Wajdeczko wrote:



On 21.07.2021 18:11, Vinay Belgaumkar wrote:

This feature hands over the control of HW RC6 to the GuC.
GuC decides when to put HW into RC6 based on it's internal
busyness algorithms.

GUCRC needs GuC submission to be enabled, and only
supported on Gen12+ for now.

When GUCRC is enabled, do not set HW RC6. Use a H2G message
to tell GuC to enable GUCRC. When disabling RC6, tell GuC to
revert RC6 control back to KMD.

v2: Address comments (Michal W)

Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/Makefile |  1 +
  drivers/gpu/drm/i915/gt/intel_rc6.c   | 22 +++--
  .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |  6 ++
  drivers/gpu/drm/i915/gt/uc/intel_guc.c|  1 +
  drivers/gpu/drm/i915/gt/uc/intel_guc.h|  2 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c | 81 +++
  drivers/gpu/drm/i915/gt/uc/intel_guc_rc.h | 31 +++
  drivers/gpu/drm/i915/gt/uc/intel_uc.h |  2 +
  8 files changed, 141 insertions(+), 5 deletions(-)
  create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
  create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_rc.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index d8eac4468df9..3fc17f20d88e 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -186,6 +186,7 @@ i915-y += gt/uc/intel_uc.o \
  gt/uc/intel_guc_fw.o \
  gt/uc/intel_guc_log.o \
  gt/uc/intel_guc_log_debugfs.o \
+ gt/uc/intel_guc_rc.o \
  gt/uc/intel_guc_slpc.o \
  gt/uc/intel_guc_submission.o \
  gt/uc/intel_huc.o \
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c 
b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 259d7eb4e165..299fcf10b04b 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -98,11 +98,19 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60);
set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60);
  
-	/* 3a: Enable RC6 */

-   rc6->ctl_enable =
-   GEN6_RC_CTL_HW_ENABLE |
-   GEN6_RC_CTL_RC6_ENABLE |
-   GEN6_RC_CTL_EI_MODE(1);
+   /* 3a: Enable RC6
+*
+* With GUCRC, we do not enable bit 31 of RC_CTL,
+* thus allowing GuC to control RC6 entry/exit fully instead.
+* We will not set the HW ENABLE and EI bits
+*/
+   if (!intel_guc_rc_enable(>uc.guc))
+   rc6->ctl_enable = GEN6_RC_CTL_RC6_ENABLE;
+   else
+   rc6->ctl_enable =
+   GEN6_RC_CTL_HW_ENABLE |
+   GEN6_RC_CTL_RC6_ENABLE |
+   GEN6_RC_CTL_EI_MODE(1);
  
  	pg_enable =

GEN9_RENDER_PG_ENABLE |
@@ -513,6 +521,10 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6)
  {
struct drm_i915_private *i915 = rc6_to_i915(rc6);
struct intel_uncore *uncore = rc6_to_uncore(rc6);
+   struct intel_gt *gt = rc6_to_gt(rc6);
+
+   /* Take control of RC6 back from GuC */
+   intel_guc_rc_disable(>uc.guc);
  
  	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);

if (GRAPHICS_VER(i915) >= 9)
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index d832c8f11c11..5f1c82f35d97 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -136,6 +136,7 @@ enum intel_guc_action {
INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008,
INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009,
INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003,
+   INTEL_GUC_ACTION_SETUP_PC_GUCRC = 0x3004,
INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
INTEL_GUC_ACTION_REGISTER_CONTEXT = 0x4502,
INTEL_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503,
@@ -146,6 +147,11 @@ enum intel_guc_action {
INTEL_GUC_ACTION_LIMIT
  };
  
+enum intel_guc_rc_options {

+   INTEL_GUCRC_HOST_CONTROL,
+   INTEL_GUCRC_FIRMWARE_CONTROL,
+};
+
  enum intel_guc_preempt_options {
INTEL_GUC_PREEMPT_OPTION_DROP_WORK_Q = 0x4,
INTEL_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8,
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 686cb978662d..e474f554b17a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -159,6 +159,7 @@ void intel_guc_init_early(struct intel_guc *guc)
intel_guc_log_init_early(>log);
intel_guc_submission_init_early(guc);
intel_guc_slpc_init_early(>slpc);
+   intel_guc_rc_init_early(guc);
  
  	mutex_init(>send_mutex);

spin_lock_init(>irq_lock);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 8cecfad9d7b1..dcac31098687 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ 

Re: [PATCH 12/14] drm/i915/guc/slpc: Sysfs hooks for SLPC

2021-07-23 Thread Belgaumkar, Vinay




On 7/21/2021 11:13 AM, Michal Wajdeczko wrote:



On 21.07.2021 18:11, Vinay Belgaumkar wrote:

Update the get/set min/max freq hooks to work for
SLPC case as well. Consolidate helpers for requested/min/max
frequency get/set to intel_rps where the proper action can
be taken depending on whether slpc is enabled.


s/slpc/SLPC


ok.





v2: Add wrappers for getting rp0/1/n frequencies, update
softlimits in set min/max slpc functions. Also check for
boundary conditions before setting them.

v3: Address review comments (Michal W)

Signed-off-by: Vinay Belgaumkar 
Signed-off-by: Tvrtko Ursulin 
Signed-off-by: Sujaritha Sundaresan 
---
  drivers/gpu/drm/i915/gt/intel_rps.c | 165 
  drivers/gpu/drm/i915/gt/intel_rps.h |  11 ++
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c |  14 ++
  drivers/gpu/drm/i915/i915_pmu.c |   2 +-
  drivers/gpu/drm/i915/i915_reg.h |   2 +
  drivers/gpu/drm/i915/i915_sysfs.c   |  77 ++---
  6 files changed, 207 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index e858eeb2c59d..48d4147165a9 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -37,6 +37,13 @@ static struct intel_uncore *rps_to_uncore(struct intel_rps 
*rps)
return rps_to_gt(rps)->uncore;
  }
  
+static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps)

+{
+   struct intel_gt *gt = rps_to_gt(rps);
+
+   return >uc.guc.slpc;
+}
+
  static bool rps_uses_slpc(struct intel_rps *rps)
  {
struct intel_gt *gt = rps_to_gt(rps);
@@ -1960,6 +1967,164 @@ u32 intel_rps_read_actual_frequency(struct intel_rps 
*rps)
return freq;
  }
  
+u32 intel_rps_read_punit_req(struct intel_rps *rps)

+{
+   struct intel_uncore *uncore = rps_to_uncore(rps);
+
+   return intel_uncore_read(uncore, GEN6_RPNSWREQ);
+}
+
+u32 intel_rps_get_req(struct intel_rps *rps, u32 pureq)
+{
+   u32 req = pureq >> GEN9_SW_REQ_UNSLICE_RATIO_SHIFT;
+
+   return req;
+}
+
+u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps)
+{
+   u32 freq = intel_rps_get_req(rps, intel_rps_read_punit_req(rps));
+
+   return intel_gpu_freq(rps, freq);
+}
+
+u32 intel_rps_get_requested_frequency(struct intel_rps *rps)
+{
+   if (rps_uses_slpc(rps))
+   return intel_rps_read_punit_req_frequency(rps);
+   else
+   return intel_gpu_freq(rps, rps->cur_freq);
+}
+
+u32 intel_rps_get_max_frequency(struct intel_rps *rps)
+{
+   struct intel_guc_slpc *slpc = rps_to_slpc(rps);
+
+   if (rps_uses_slpc(rps))
+   return slpc->max_freq_softlimit;
+   else
+   return intel_gpu_freq(rps, rps->max_freq_softlimit);
+}
+
+u32 intel_rps_get_rp0_frequency(struct intel_rps *rps)
+{
+   struct intel_guc_slpc *slpc = rps_to_slpc(rps);
+
+   if (rps_uses_slpc(rps))
+   return slpc->rp0_freq;
+   else
+   return intel_gpu_freq(rps, rps->rp0_freq);
+}
+
+u32 intel_rps_get_rp1_frequency(struct intel_rps *rps)
+{
+   struct intel_guc_slpc *slpc = rps_to_slpc(rps);
+
+   if (rps_uses_slpc(rps))
+   return slpc->rp1_freq;
+   else
+   return intel_gpu_freq(rps, rps->rp1_freq);
+}
+
+u32 intel_rps_get_rpn_frequency(struct intel_rps *rps)
+{
+   struct intel_guc_slpc *slpc = rps_to_slpc(rps);
+
+   if (rps_uses_slpc(rps))
+   return slpc->min_freq;
+   else
+   return intel_gpu_freq(rps, rps->min_freq);
+}
+
+int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val)
+{
+   struct drm_i915_private *i915 = rps_to_i915(rps);
+   struct intel_guc_slpc *slpc = rps_to_slpc(rps);
+   int ret = 0;
+
+   if (rps_uses_slpc(rps))
+   return intel_guc_slpc_set_max_freq(slpc, val);
+
+   mutex_lock(>lock);
+
+   val = intel_freq_opcode(rps, val);
+   if (val < rps->min_freq ||
+   val > rps->max_freq ||
+   val < rps->min_freq_softlimit) {
+   ret = -EINVAL;
+   goto unlock;
+   }
+
+   if (val > rps->rp0_freq)
+   drm_dbg(>drm, "User requested overclocking to %d\n",
+ intel_gpu_freq(rps, val));
+
+   rps->max_freq_softlimit = val;
+
+   val = clamp_t(int, rps->cur_freq,
+ rps->min_freq_softlimit,
+ rps->max_freq_softlimit);
+
+   /*
+* We still need *_set_rps to process the new max_delay and
+* update the interrupt limits and PMINTRMSK even though
+* frequency request may be unchanged.
+*/
+   intel_rps_set(rps, val);
+
+unlock:
+   mutex_unlock(>lock);
+
+   return ret;
+}
+
+u32 intel_rps_get_min_frequency(struct intel_rps *rps)
+{
+   struct intel_guc_slpc *slpc = rps_to_slpc(rps);
+
+   if (rps_uses_slpc(rps))
+   return slpc->min_freq_softlimit;
+  

Re: [PATCH 11/14] drm/i915/guc/slpc: Cache platform frequency limits

2021-07-23 Thread Belgaumkar, Vinay




On 7/21/2021 11:09 AM, Michal Wajdeczko wrote:



On 21.07.2021 18:11, Vinay Belgaumkar wrote:

Cache rp0, rp1 and rpn platform limits into SLPC structure
for range checking while setting min/max frequencies.

Also add "soft" limits which keep track of frequency changes
made from userland. These are initially set to platform min
and max.

v2: Address review comments (Michal W)

Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 98 +++
  .../gpu/drm/i915/gt/uc/intel_guc_slpc_types.h |  9 ++
  drivers/gpu/drm/i915/i915_reg.h   |  3 +
  3 files changed, 110 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 8796a8929d89..134c57ca10b7 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -94,6 +94,9 @@ static int slpc_shared_data_init(struct intel_guc_slpc *slpc)
return err;
}
  
+	slpc->max_freq_softlimit = 0;

+   slpc->min_freq_softlimit = 0;
+
return err;
  }
  
@@ -121,6 +124,19 @@ static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)

return intel_guc_send(guc, request, ARRAY_SIZE(request));
  }
  
+static int guc_action_slpc_unset_param(struct intel_guc *guc,

+   u8 id)
+{
+   u32 request[] = {
+   INTEL_GUC_ACTION_SLPC_REQUEST,
+   SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 2),
+   id,
+   };
+
+   return intel_guc_send(guc, request, ARRAY_SIZE(request));
+}
+
+
  static bool slpc_is_running(struct intel_guc_slpc *slpc)
  {
return (slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING);
@@ -164,6 +180,16 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 
id, u32 value)
return guc_action_slpc_set_param(guc, id, value);
  }
  
+static int slpc_unset_param(struct intel_guc_slpc *slpc,

+   u8 id)


likely can fit into one line


done.




+{
+   struct intel_guc *guc = slpc_to_guc(slpc);
+
+   GEM_BUG_ON(id >= SLPC_MAX_PARAM);
+
+   return guc_action_slpc_unset_param(guc, id);
+}
+
  static const char *slpc_state_string(struct intel_guc_slpc *slpc)
  {
const char *str = NULL;
@@ -388,6 +414,55 @@ void intel_guc_pm_intrmsk_enable(struct intel_gt *gt)
   GEN6_PMINTRMSK, pm_intrmsk_mbz, 0);
  }
  
+static int intel_guc_slpc_set_softlimits(struct intel_guc_slpc *slpc)

+{
+   int ret = 0;
+
+   /* Softlimits are initially equivalent to platform limits
+* unless they have deviated from defaults, in which case,
+* we retain the values and set min/max accordingly.
+*/
+   if (!slpc->max_freq_softlimit)
+   slpc->max_freq_softlimit = slpc->rp0_freq;
+   else if (slpc->max_freq_softlimit != slpc->rp0_freq)
+   ret = intel_guc_slpc_set_max_freq(slpc,
+   slpc->max_freq_softlimit);
+
+   if (!slpc->min_freq_softlimit)
+   slpc->min_freq_softlimit = slpc->min_freq;
+   else if (slpc->min_freq_softlimit != slpc->min_freq)
+   ret = intel_guc_slpc_set_min_freq(slpc,
+   slpc->min_freq_softlimit);
+
+   return ret;
+}
+
+static void intel_guc_slpc_ignore_eff_freq(struct intel_guc_slpc *slpc, bool 
ignore)
+{
+   if (ignore) {
+   /* A failure here does not affect the algorithm in a fatal way 
*/
+   slpc_set_param(slpc,
+  SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
+  ignore);
+   slpc_set_param(slpc,
+  SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+  slpc->min_freq);
+   } else {
+   slpc_unset_param(slpc,
+  SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY);
+   slpc_unset_param(slpc,
+  SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ);


hard to tell from mail client, but likely misalignment
did you run checkpatch.pl ?


Yup, ran it this time.




+   }
+}
+
+static void intel_guc_slpc_use_fused_rp0(struct intel_guc_slpc *slpc)
+{
+   /* Force slpc to used platform rp0 */
+   slpc_set_param(slpc,
+  SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ,
+  slpc->rp0_freq);
+}
+
  /*
   * intel_guc_slpc_enable() - Start SLPC
   * @slpc: pointer to intel_guc_slpc.
@@ -405,6 +480,7 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
  {
struct drm_i915_private *i915 = slpc_to_i915(slpc);
struct slpc_shared_data *data;
+   u32 rp_state_cap;
int ret;
  
  	GEM_BUG_ON(!slpc->vma);

@@ -442,6 +518,28 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
slpc_decode_min_freq(slpc),
slpc_decode_max_freq(slpc));
  
+	rp_state_cap = intel_uncore_read(i915->gt.uncore, GEN6_RP_STATE_CAP);

Re: [PATCH 09/14] drm/i915/guc/slpc: Add debugfs for SLPC info

2021-07-23 Thread Belgaumkar, Vinay




On 7/21/2021 11:05 AM, Michal Wajdeczko wrote:



On 21.07.2021 18:11, Vinay Belgaumkar wrote:

This prints out relevant SLPC info from the SLPC shared structure.

We will send a h2g message which forces SLPC to update the
shared data structure with latest information before reading it.

v2: Address review comments (Michal W)

Signed-off-by: Vinay Belgaumkar 
Signed-off-by: Sundaresan Sujaritha 
---
  .../gpu/drm/i915/gt/uc/intel_guc_debugfs.c| 23 +++
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 40 +++
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   |  4 +-
  3 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
index 72ddfff42f7d..46b22187927b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
@@ -12,6 +12,7 @@
  #include "gt/uc/intel_guc_ct.h"
  #include "gt/uc/intel_guc_ads.h"
  #include "gt/uc/intel_guc_submission.h"
+#include "gt/uc/intel_guc_slpc.h"
  
  static int guc_info_show(struct seq_file *m, void *data)

  {
@@ -50,11 +51,33 @@ static int guc_registered_contexts_show(struct seq_file *m, 
void *data)
  }
  DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_registered_contexts);
  
+static int guc_slpc_info_show(struct seq_file *m, void *unused)

+{
+   struct intel_guc *guc = m->private;
+   struct intel_guc_slpc *slpc = >slpc;
+   struct drm_printer p = drm_seq_file_printer(m);
+
+   if (!intel_guc_slpc_is_used(guc))
+   return -ENODEV;
+
+   return intel_guc_slpc_info(slpc, );
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_slpc_info);
+
+bool intel_eval_slpc_support(void *data)
+{
+   struct intel_guc *guc;
+
+   guc = (struct intel_guc *)data;


struct intel_guc *guc = (struct intel_guc *)data;


+   return intel_guc_slpc_is_used(guc);
+}
+
  void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root)
  {
static const struct debugfs_gt_file files[] = {
{ "guc_info", _info_fops, NULL },
{ "guc_registered_contexts", _registered_contexts_fops, 
NULL },
+   { "guc_slpc_info", _slpc_info_fops, 
_eval_slpc_support},
};
  
  	if (!intel_guc_is_supported(guc))

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index c1cf8d46e360..73379985c105 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -430,6 +430,46 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
return 0;
  }
  
+int intel_guc_slpc_info(struct intel_guc_slpc *slpc, struct drm_printer *p)

+{
+   struct drm_i915_private *i915 = guc_to_gt(slpc_to_guc(slpc))->i915;
+   struct slpc_shared_data *data;
+   struct slpc_task_state_data *slpc_tasks;
+   intel_wakeref_t wakeref;
+   int ret = 0;
+
+   with_intel_runtime_pm(>runtime_pm, wakeref) {
+   if (slpc_query_task_state(slpc))
+   return -EIO;


not sure if you can return directly from "with_rpm"


Good point, checking err instead.




+
+   slpc_tasks = >task_state_data;
+
+   drm_printf(p, "SLPC state: %s\n", slpc_state_string(slpc));
+   drm_printf(p, "\tgtperf task active: %s\n",
+   yesno(slpc_tasks->status & SLPC_GTPERF_TASK_ACTIVE));
+   drm_printf(p, "\tdcc task active: %s\n",
+   yesno(slpc_tasks->status & SLPC_DCC_TASK_ACTIVE));
+   drm_printf(p, "\tin dcc: %s\n",
+   yesno(slpc_tasks->status & SLPC_IN_DCC));
+   drm_printf(p, "\tfreq switch active: %s\n",
+   yesno(slpc_tasks->status & SLPC_FREQ_SWITCH_ACTIVE));
+   drm_printf(p, "\tibc enabled: %s\n",
+   yesno(slpc_tasks->status & SLPC_IBC_ENABLED));
+   drm_printf(p, "\tibc active: %s\n",
+   yesno(slpc_tasks->status & SLPC_IBC_ACTIVE));
+   drm_printf(p, "\tpg1 enabled: %s\n",
+   yesno(slpc_tasks->status & SLPC_PG1_ENABLED));
+   drm_printf(p, "\tpg1 active: %s\n",
+   yesno(slpc_tasks->status & SLPC_PG1_ACTIVE));
+   drm_printf(p, "\tmax freq: %dMHz\n",
+   slpc_decode_max_freq(slpc));
+   drm_printf(p, "\tmin freq: %dMHz\n",
+   slpc_decode_min_freq(slpc));


not sure what they are:

DCC ?
IBC ?
PG1 ?


Removed these for now, since we don't use/enable them yet anyways.



and make sure to use %u for unsigned


Ok.
Thanks,
Vinay.



Michal


+   }
+
+   return ret;
+}
+
  void intel_guc_slpc_fini(struct intel_guc_slpc *slpc)
  {
if (!slpc->vma)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
index 

Re: [PATCH 08/14] drm/i915/guc/slpc: Add get max/min freq hooks

2021-07-23 Thread Belgaumkar, Vinay




On 7/21/2021 11:00 AM, Michal Wajdeczko wrote:



On 21.07.2021 18:11, Vinay Belgaumkar wrote:

Add helpers to read the min/max frequency being used
by SLPC. This is done by send a H2G command which forces
SLPC to update the shared data struct which can then be
read.


add note that functions will be used later


ok.





v2: Address review comments (Michal W)

Signed-off-by: Vinay Belgaumkar 
Signed-off-by: Sundaresan Sujaritha 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 52 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h |  2 +
  2 files changed, 54 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index b40c39ba4049..c1cf8d46e360 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -290,6 +290,32 @@ int intel_guc_slpc_set_max_freq(struct intel_guc_slpc 
*slpc, u32 val)
return ret;
  }
  
+/**

+ * intel_guc_slpc_get_max_freq() - Get max frequency limit for SLPC.
+ * @slpc: pointer to intel_guc_slpc.
+ * @val: pointer to val which will hold max frequency (MHz)
+ *
+ * This function will invoke GuC SLPC action to read the max frequency
+ * limit for unslice.
+ *
+ * Return: 0 on success, non-zero error code on failure.
+ */
+int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val)
+{
+   intel_wakeref_t wakeref;
+   struct drm_i915_private *i915 = guc_to_gt(slpc_to_guc(slpc))->i915;
+   int ret = 0;


struct drm_i915_private *i915 = slpc_to_i915(slpc);
intel_wakeref_t wakeref;
int ret = 0;


+
+   with_intel_runtime_pm(>runtime_pm, wakeref) {
+   /* Force GuC to update task data */
+   slpc_query_task_state(slpc);


what if this call fails ?


saving error in ret.




+
+   *val = slpc_decode_max_freq(slpc);
+   }
+
+   return ret;
+}
+
  /**
   * intel_guc_slpc_set_min_freq() - Set min frequency limit for SLPC.
   * @slpc: pointer to intel_guc_slpc.
@@ -322,6 +348,32 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc 
*slpc, u32 val)
return ret;
  }
  
+/**

+ * intel_guc_slpc_get_min_freq() - Get min frequency limit for SLPC.
+ * @slpc: pointer to intel_guc_slpc.
+ * @val: pointer to val which will hold min frequency (MHz)
+ *
+ * This function will invoke GuC SLPC action to read the min frequency
+ * limit for unslice.
+ *
+ * Return: 0 on success, non-zero error code on failure.
+ */
+int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val)
+{
+   intel_wakeref_t wakeref;
+   struct drm_i915_private *i915 = guc_to_gt(slpc_to_guc(slpc))->i915;
+   int ret = 0;
+
+   with_intel_runtime_pm(>runtime_pm, wakeref) {
+   /* Force GuC to update task data */
+   slpc_query_task_state(slpc);


same here


Populated ret with return code.

Thanks,
Vinay.


Michal


+
+   *val = slpc_decode_min_freq(slpc);
+   }
+
+   return ret;
+}
+
  /*
   * intel_guc_slpc_enable() - Start SLPC
   * @slpc: pointer to intel_guc_slpc.
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
index 3a1a7eaafc12..627c71a95777 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
@@ -32,5 +32,7 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc);
  void intel_guc_slpc_fini(struct intel_guc_slpc *slpc);
  int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val);
  int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val);
+int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val);
+int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val);
  
  #endif




Re: [PATCH 07/14] drm/i915/guc/slpc: Add methods to set min/max frequency

2021-07-23 Thread Belgaumkar, Vinay




On 7/21/2021 10:42 AM, Michal Wajdeczko wrote:



On 21.07.2021 18:11, Vinay Belgaumkar wrote:

Add param set h2g helpers to set the min and max frequencies
for use by SLPC.

v2: Address review comments (Michal W)

Signed-off-by: Sundaresan Sujaritha 
Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 84 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h |  2 +
  2 files changed, 86 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 48db2a8f67d1..b40c39ba4049 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -109,6 +109,18 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc)
return data->header.global_state;
  }
  
+static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)

+{
+   u32 request[] = {
+   INTEL_GUC_ACTION_SLPC_REQUEST,
+   SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
+   id,
+   value,
+   };
+
+   return intel_guc_send(guc, request, ARRAY_SIZE(request));


beware of possible non-zero data0 returned by guc_send()


Ok, added -EPROTO check.



+}
+
  static bool slpc_is_running(struct intel_guc_slpc *slpc)
  {
return (slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING);
@@ -143,6 +155,15 @@ static int slpc_query_task_state(struct intel_guc_slpc 
*slpc)
return ret;
  }
  
+static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value)

+{
+   struct intel_guc *guc = slpc_to_guc(slpc);
+
+   GEM_BUG_ON(id >= SLPC_MAX_PARAM);
+
+   return guc_action_slpc_set_param(guc, id, value);
+}
+
  static const char *slpc_state_string(struct intel_guc_slpc *slpc)
  {
const char *str = NULL;
@@ -238,6 +259,69 @@ u32 slpc_decode_max_freq(struct intel_guc_slpc *slpc)
GT_FREQUENCY_MULTIPLIER, GEN9_FREQ_SCALER);
  }
  
+/**

+ * intel_guc_slpc_set_max_freq() - Set max frequency limit for SLPC.
+ * @slpc: pointer to intel_guc_slpc.
+ * @val: frequency (MHz)
+ *
+ * This function will invoke GuC SLPC action to update the max frequency
+ * limit for unslice.
+ *
+ * Return: 0 on success, non-zero error code on failure.
+ */
+int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val)
+{
+   int ret;
+   struct drm_i915_private *i915 = slpc_to_i915(slpc);
+   intel_wakeref_t wakeref;


nit: move "ret" as last


ok.




+
+   with_intel_runtime_pm(>runtime_pm, wakeref) {
+   ret = slpc_set_param(slpc,
+  SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ,
+  val);
+   if (ret) {
+   drm_err(>drm,
+   "Set max frequency unslice returned (%pe)\n", 
ERR_PTR(ret));
+   /* Return standardized err code for sysfs */
+   ret = -EIO;


maybe caller (hook in sysfs) can sanitize this error ?


Caller will then need to check the error type - something like-

if (err) {
if (err != -EINVAL)
return -EIO;
}

Seems cleaner to return specific error type from here instead.
Anything other than -EINVAL or -EIO causes garbage in sysfs output.

Thanks,
Vinay.



Michal


+   }
+   }
+
+   return ret;
+}
+
+/**
+ * intel_guc_slpc_set_min_freq() - Set min frequency limit for SLPC.
+ * @slpc: pointer to intel_guc_slpc.
+ * @val: frequency (MHz)
+ *
+ * This function will invoke GuC SLPC action to update the min unslice
+ * frequency.
+ *
+ * Return: 0 on success, non-zero error code on failure.
+ */
+int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val)
+{
+   int ret;
+   struct intel_guc *guc = slpc_to_guc(slpc);
+   struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+   intel_wakeref_t wakeref;
+
+   with_intel_runtime_pm(>runtime_pm, wakeref) {
+   ret = slpc_set_param(slpc,
+  SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+  val);
+   if (ret) {
+   drm_err(>drm,
+   "Set min frequency for unslice returned 
(%pe)\n", ERR_PTR(ret));
+   /* Return standardized err code for sysfs */
+   ret = -EIO;
+   }
+   }
+
+   return ret;
+}
+
  /*
   * intel_guc_slpc_enable() - Start SLPC
   * @slpc: pointer to intel_guc_slpc.
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
index f02249ff5f1b..3a1a7eaafc12 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
@@ -30,5 +30,7 @@ void intel_guc_slpc_init_early(struct intel_guc_slpc *slpc);
  int intel_guc_slpc_init(struct intel_guc_slpc *slpc);
  int intel_guc_slpc_enable(struct intel_guc_slpc *slpc);
  void 

Re: [Intel-gfx] [PATCH 05/14] drm/i915/guc/slpc: Allocate, initialize and release SLPC

2021-07-23 Thread Belgaumkar, Vinay




On 7/21/2021 10:26 AM, Michal Wajdeczko wrote:



On 21.07.2021 18:11, Vinay Belgaumkar wrote:

Allocate data structures for SLPC and functions for
initializing on host side.

v2: Address review comments (Michal W)

Signed-off-by: Vinay Belgaumkar 
Signed-off-by: Sundaresan Sujaritha 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc.c| 11 ++
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 36 ++-
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   |  1 +
  .../gpu/drm/i915/gt/uc/intel_guc_slpc_types.h |  3 ++
  4 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index fcccb103a21a..686cb978662d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -337,6 +337,12 @@ int intel_guc_init(struct intel_guc *guc)
goto err_ct;
}
  
+	if (intel_guc_slpc_is_used(guc)) {

+   ret = intel_guc_slpc_init(>slpc);
+   if (ret)
+   goto err_submission;
+   }
+
/* now that everything is perma-pinned, initialize the parameters */
guc_init_params(guc);
  
@@ -347,6 +353,8 @@ int intel_guc_init(struct intel_guc *guc)
  
  	return 0;
  
+err_submission:

+   intel_guc_submission_fini(guc);
  err_ct:
intel_guc_ct_fini(>ct);
  err_ads:
@@ -369,6 +377,9 @@ void intel_guc_fini(struct intel_guc *guc)
  
  	i915_ggtt_disable_guc(gt->ggtt);
  
+	if (intel_guc_slpc_is_used(guc))

+   intel_guc_slpc_fini(>slpc);
+
if (intel_guc_submission_is_used(guc))
intel_guc_submission_fini(guc);
  
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c

index d9feb430ce35..a99d727b5bf0 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -12,6 +12,16 @@ static inline struct intel_guc *slpc_to_guc(struct 
intel_guc_slpc *slpc)
return container_of(slpc, struct intel_guc, slpc);
  }
  
+static inline struct intel_gt *slpc_to_gt(struct intel_guc_slpc *slpc)

+{
+   return guc_to_gt(slpc_to_guc(slpc));
+}
+
+static inline struct drm_i915_private *slpc_to_i915(struct intel_guc_slpc 
*slpc)
+{
+   return (slpc_to_gt(slpc))->i915;


redundant ( )


ok.




+}
+
  static bool __detect_slpc_supported(struct intel_guc *guc)
  {
/* GuC SLPC is unavailable for pre-Gen12 */
@@ -35,9 +45,29 @@ void intel_guc_slpc_init_early(struct intel_guc_slpc *slpc)
guc->slpc_selected = __guc_slpc_selected(guc);
  }
  
+static int slpc_shared_data_init(struct intel_guc_slpc *slpc)

+{
+   struct intel_guc *guc = slpc_to_guc(slpc);
+   struct drm_i915_private *i915 = slpc_to_i915(slpc);
+   u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
+   int err;
+
+   err = intel_guc_allocate_and_map_vma(guc, size, >vma, (void 
**)>vaddr);
+   if (unlikely(err)) {
+   drm_err(>drm,
+   "Failed to allocate SLPC struct (err=%pe)\n",
+   ERR_PTR(err));
+   return err;
+   }
+
+   return err;
+}
+
  int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
  {
-   return 0;
+   GEM_BUG_ON(slpc->vma);
+
+   return slpc_shared_data_init(slpc);
  }
  
  /*

@@ -60,4 +90,8 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
  
  void intel_guc_slpc_fini(struct intel_guc_slpc *slpc)

  {
+   if (!slpc->vma)
+   return;
+
+   i915_vma_unpin_and_release(>vma, I915_VMA_RELEASE_MAP);
  }
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
index c3b0ad7f0f93..f02249ff5f1b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
@@ -8,6 +8,7 @@
  
  #include "intel_guc_submission.h"

  #include "intel_guc_slpc_types.h"
+#include "abi/guc_actions_slpc_abi.h"


is this is needed here ?
maybe abi.h could be included only in slcp.c ?


ok, removed.



  
  static inline bool intel_guc_slpc_is_supported(struct intel_guc *guc)

  {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
index b85148265b1f..214a449e78f2 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
@@ -7,9 +7,12 @@
  #define _INTEL_GUC_SLPC_TYPES_H_
  
  #include 

+#include "abi/guc_actions_slpc_abi.h"


for below pointers you don't need this header(s) to be included


ok.
Thanks,
Vinay.


Michal

  
  struct intel_guc_slpc {
  
+	struct i915_vma *vma;

+   struct slpc_shared_data *vaddr;
  };
  
  #endif




Re: [PATCH 06/14] drm/i915/guc/slpc: Enable SLPC and add related H2G events

2021-07-23 Thread Belgaumkar, Vinay




On 7/21/2021 10:38 AM, Michal Wajdeczko wrote:



On 21.07.2021 18:11, Vinay Belgaumkar wrote:

Add methods for interacting with GuC for enabling SLPC. Enable
SLPC after GuC submission has been established. GuC load will
fail if SLPC cannot be successfully initialized. Add various
helper methods to set/unset the parameters for SLPC. They can
be set using H2G calls or directly setting bits in the shared
data structure.

This patch also removes the GEM_BUG_ON from guc_submission_disable().
The assumption when that was added was there would be no wakerefs
when it would be called. However, if we fail to enable slpc, we will


s/slpc/SLPC


done.



still be holding a wakeref.

v2: Address several review comments, add new helpers for
decoding the slpc min/max frequencies. Use masks instead of hardcoded
constants. (Michal W)

Signed-off-by: Vinay Belgaumkar 
Signed-off-by: Sundaresan Sujaritha 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 206 ++
  .../gpu/drm/i915/gt/uc/intel_guc_slpc_types.h |   2 +
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c |   4 -
  drivers/gpu/drm/i915/gt/uc/intel_uc.c |  10 +
  4 files changed, 218 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index a99d727b5bf0..48db2a8f67d1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -45,6 +45,40 @@ void intel_guc_slpc_init_early(struct intel_guc_slpc *slpc)
guc->slpc_selected = __guc_slpc_selected(guc);
  }
  
+static void slpc_mem_set_param(struct slpc_shared_data *data,

+   u32 id, u32 value)
+{
+   GEM_BUG_ON(id >= SLPC_MAX_OVERRIDE_PARAMETERS);
+   /*
+* When the flag bit is set, corresponding value will be read
+* and applied by slpc.
+*/
+   data->override_params.bits[id >> 5] |= (1 << (id % 32));
+   data->override_params.values[id] = value;
+}
+
+static void slpc_mem_set_enabled(struct slpc_shared_data *data,
+   u8 enable_id, u8 disable_id)
+{
+   /*
+* Enabling a param involves setting the enable_id
+* to 1 and disable_id to 0.
+*/
+   slpc_mem_set_param(data, enable_id, 1);
+   slpc_mem_set_param(data, disable_id, 0);
+}
+
+static void slpc_mem_set_disabled(struct slpc_shared_data *data,
+   u8 enable_id, u8 disable_id)
+{
+   /*
+* Disabling a param involves setting the enable_id
+* to 0 and disable_id to 1.
+*/
+   slpc_mem_set_param(data, disable_id, 1);
+   slpc_mem_set_param(data, enable_id, 0);
+}
+
  static int slpc_shared_data_init(struct intel_guc_slpc *slpc)
  {
struct intel_guc *guc = slpc_to_guc(slpc);
@@ -63,6 +97,116 @@ static int slpc_shared_data_init(struct intel_guc_slpc 
*slpc)
return err;
  }
  
+static u32 slpc_get_state(struct intel_guc_slpc *slpc)

+{
+   struct slpc_shared_data *data;
+
+   GEM_BUG_ON(!slpc->vma);
+
+   drm_clflush_virt_range(slpc->vaddr, sizeof(u32));
+   data = slpc->vaddr;
+
+   return data->header.global_state;
+}
+
+static bool slpc_is_running(struct intel_guc_slpc *slpc)
+{
+   return (slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING);
+}
+
+static int guc_action_slpc_query(struct intel_guc *guc, u32 offset)
+{
+   u32 request[] = {
+   INTEL_GUC_ACTION_SLPC_REQUEST,
+   SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2),
+   offset,
+   0,
+   };
+
+   return intel_guc_send(guc, request, ARRAY_SIZE(request));
+}
+
+static int slpc_query_task_state(struct intel_guc_slpc *slpc)
+{
+   struct intel_guc *guc = slpc_to_guc(slpc);
+   struct drm_i915_private *i915 = slpc_to_i915(slpc);
+   u32 shared_data_gtt_offset = intel_guc_ggtt_offset(guc, slpc->vma);
+   int ret;
+
+   ret = guc_action_slpc_query(guc, shared_data_gtt_offset);
+   if (ret)
+   drm_err(>drm, "Query task state data returned (%pe)\n",
+   ERR_PTR(ret));
+
+   drm_clflush_virt_range(slpc->vaddr, SLPC_PAGE_SIZE_BYTES);
+
+   return ret;
+}
+
+static const char *slpc_state_string(struct intel_guc_slpc *slpc)
+{
+   const char *str = NULL;
+   u32 state = slpc_get_state(slpc);
+
+   switch (state) {
+   case SLPC_GLOBAL_STATE_NOT_RUNNING:
+   str = "not running";
+   break;
+   case SLPC_GLOBAL_STATE_INITIALIZING:
+   str = "initializing";
+   break;
+   case SLPC_GLOBAL_STATE_RESETTING:
+   str = "resetting";
+   break;
+   case SLPC_GLOBAL_STATE_RUNNING:
+   str = "running";
+   break;
+   case SLPC_GLOBAL_STATE_SHUTTING_DOWN:
+   str = "shutting down";
+   break;
+   case SLPC_GLOBAL_STATE_ERROR:
+   str = 

Re: [PATCH 04/14] drm/i915/guc/slpc: Adding SLPC communication interfaces

2021-07-23 Thread Belgaumkar, Vinay




On 7/21/2021 10:25 AM, Michal Wajdeczko wrote:



On 21.07.2021 18:11, Vinay Belgaumkar wrote:

Add constants and params that are needed to configure SLPC.

v2: Add a new abi header for SLPC. Replace bitfields with
genmasks. Address other comments from Michal W.

Signed-off-by: Vinay Belgaumkar 
Signed-off-by: Sundaresan Sujaritha 
---
  .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 201 ++
  drivers/gpu/drm/i915/gt/uc/intel_guc.c|   4 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   1 +
  3 files changed, 206 insertions(+)
  create mode 100644 drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h

diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
new file mode 100644
index ..05d809746b32
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _GUC_ACTIONS_SLPC_ABI_H_
+#define _GUC_ACTIONS_SLPC_ABI_H_
+
+#include 
+
+/**
+ * SLPC SHARED DATA STRUCTURE


if you want to use kernel-doc, then add DOC: tag

ok.



+ *
+ *  
+---+---+--+
+ *  | CL| Bytes | Description  
|
+ *  
+===+===+==+
+ *  | 1 | 0-3   | SHARED DATA SIZE 
|
+ *  |   
+---+--+
+ *  |   | 4-7   | GLOBAL STATE 
|
+ *  |   
+---+--+
+ *  |   | 8-11  | DISPLAY DATA ADDRESS 
|
+ *  |   
+---+--+
+ *  |   | 12:63 | PADDING  
|
+ *  
+---+---+--+
+ *  |   | 0:63  | PADDING(PLATFORM INFO)   
|
+ *  
+---+---+--+
+ *  | 3 | 0-3   | TASK STATE DATA  
|
+ *  +   
+---+--+
+ *  |   | 4:63  | PADDING  
|
+ *  
+---+---+--+
+ *  |4-21 0:1087| OVERRIDE PARAMS AND BIT FIELDS   
|

^
something didn't work here


Fixed.




+ *  
+---+---+--+
+ *  |   |   | PADDING + EXTRA RESERVED PAGE
|
+ *  
+---+---+--+
+ */
+
+/*
+ * SLPC exposes certain parameters for global configuration by the host.
+ * These are referred to as override parameters, because in most cases
+ * the host will not need to modify the default values used by SLPC.
+ * SLPC remembers the default values which allows the host to easily restore
+ * them by simply unsetting the override. The host can set or unset override
+ * parameters during SLPC (re-)initialization using the SLPC Reset event.
+ * The host can also set or unset override parameters on the fly using the
+ * Parameter Set and Parameter Unset events
+ */
+
+#define SLPC_MAX_OVERRIDE_PARAMETERS   256
+#define SLPC_OVERRIDE_BITFIELD_SIZE \
+   (SLPC_MAX_OVERRIDE_PARAMETERS / 32)
+
+#define SLPC_PAGE_SIZE_BYTES   4096
+#define SLPC_CACHELINE_SIZE_BYTES  64
+#define SLPC_SHARED_DATA_SIZE_BYTE_HEADER  SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO   
SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE  SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE  SLPC_PAGE_SIZE_BYTES
+#define SLPC_SHARED_DATA_SIZE_BYTE_MAX (2 * SLPC_PAGE_SIZE_BYTES)
+#define SLPC_EVENT(id, argc)   ((u32)(id) << 8 | (argc))
+#define SLPC_EVENT_MAX_INPUT_ARGS  9


above two are likely used in H2G SLPC message that is not fully defined,
maybe at the end of this file add separate section with H2G format,
using plain C #defines, and move SLPC_EVENT helper macro to fwif.h


ok.




+
+/*
+ * Cacheline size aligned (Total size needed for
+ * SLPM_KMD_MAX_OVERRIDE_PARAMETERS=256 is 1088 bytes)
+ */
+#define SLPC_OVERRIDE_PARAMS_TOTAL_BYTES   
(SLPC_MAX_OVERRIDE_PARAMETERS * 4) \
+   + 
((SLPC_MAX_OVERRIDE_PARAMETERS / 32) * 4)) \
+   + (SLPC_CACHELINE_SIZE_BYTES-1)) / 
SLPC_CACHELINE_SIZE_BYTES)*SLPC_CACHELINE_SIZE_BYTES)
+
+#define SLPC_SHARED_DATA_SIZE_BYTE_OTHER   

  1   2   >