Re: [Intel-gfx] [PATCH v10 3/7] drm/i915: Define and use GuC and CTB TLB invalidation routines

2023-10-10 Thread John Harrison

On 10/10/2023 15:30, Cavitt, Jonathan wrote:

-Original Message-
From: Harrison, John C 
Sent: Tuesday, October 10, 2023 2:51 PM
To: Cavitt, Jonathan ; 
intel-gfx@lists.freedesktop.org
Cc: Gupta, saurabhg ; chris.p.wil...@linux.intel.com; Iddamsetty, Aravind 
; Yang, Fei ; Shyti, Andi ; 
Das, Nirmoy ; Krzysztofik, Janusz ; Roper, Matthew D 
; tvrtko.ursu...@linux.intel.com; jani.nik...@linux.intel.com
Subject: Re: [PATCH v10 3/7] drm/i915: Define and use GuC and CTB TLB 
invalidation routines

On 10/10/2023 08:02, Jonathan Cavitt wrote:

...

+static void fini_tlb_lookup(struct intel_guc *guc)
+{
+   struct intel_guc_tlb_wait *wait;
+
+   if (!HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915))
+   return;
+
+   wait = xa_load(>tlb_lookup, guc->serial_slot);
+   kfree(wait);

There was originally a error being printed if wait->busy was still set,
i.e. someone was still waiting on the object that is about to be
destroyed. There were review comments about that being broken in an
intermediate patch set. I don't recall seeing any explanation as to why
the error message should be completely removed.


The GEM_BUG_ON was downgraded to a debug message in an intermediate step
at the request of one of the reviewers (this was a version 8 change, IIRC).
We concluded that if the execution of the system was not impacted by the debug
path, we shouldn't bother with the debug message at all.  So we removed it.
I think it was Fei or Andi that suggested it?
-Jonathan Cavitt
I recall it was me that said it should be an error message rather than a 
BUG_ON. And my point is that I don't see how this is a 'debug path'. If 
a waiter is still waiting on the wait object that is about to be freed 
then that is a potential dangling pointer dereference. That totally has 
the possibility to impact execution of the system.


John.



Re: [Intel-gfx] [PATCH v10 3/7] drm/i915: Define and use GuC and CTB TLB invalidation routines

2023-10-10 Thread Cavitt, Jonathan
-Original Message-
From: Harrison, John C  
Sent: Tuesday, October 10, 2023 2:51 PM
To: Cavitt, Jonathan ; 
intel-gfx@lists.freedesktop.org
Cc: Gupta, saurabhg ; chris.p.wil...@linux.intel.com; 
Iddamsetty, Aravind ; Yang, Fei 
; Shyti, Andi ; Das, Nirmoy 
; Krzysztofik, Janusz ; 
Roper, Matthew D ; tvrtko.ursu...@linux.intel.com; 
jani.nik...@linux.intel.com
Subject: Re: [PATCH v10 3/7] drm/i915: Define and use GuC and CTB TLB 
invalidation routines
> 
> On 10/10/2023 08:02, Jonathan Cavitt wrote:
> > From: Prathap Kumar Valsan 
> >
> > The GuC firmware had defined the interface for Translation Look-Aside
> > Buffer (TLB) invalidation.  We should use this interface when
> > invalidating the engine and GuC TLBs.
> > Add additional functionality to intel_gt_invalidate_tlb, invalidating
> > the GuC TLBs and falling back to GT invalidation when the GuC is
> > disabled.
> > The invalidation is done by sending a request directly to the GuC
> > tlb_lookup that invalidates the table.  The invalidation is submitted as
> > a wait request and is performed in the CT event handler.  This means we
> > cannot perform this TLB invalidation path if the CT is not enabled.
> > If the request isn't fulfilled in two seconds, this would constitute
> > an error in the invalidation as that would constitute either a lost
> > request or a severe GuC overload.
> >
> > With this new invalidation routine, we can perform GuC-based GGTT
> > invalidations.  GuC-based GGTT invalidation is incompatible with
> > MMIO invalidation so we should not perform MMIO invalidation when
> > GuC-based GGTT invalidation is expected.
> >
> > The additional complexity incurred in this patch will be necessary for
> > range-based tlb invalidations, which will be platformed in the future.
> >
> > Signed-off-by: Prathap Kumar Valsan 
> > Signed-off-by: Bruce Chang 
> > Signed-off-by: Chris Wilson 
> > Signed-off-by: Umesh Nerlige Ramappa 
> > Signed-off-by: Jonathan Cavitt 
> > Signed-off-by: Aravind Iddamsetty 
> > Signed-off-by: Fei Yang 
> > CC: Andi Shyti 
> > ---
> >   drivers/gpu/drm/i915/gt/intel_ggtt.c  |  34 +++-
> >   drivers/gpu/drm/i915/gt/intel_tlb.c   |  16 +-
> >   .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |  33 
> >   drivers/gpu/drm/i915/gt/uc/intel_guc.h|  22 +++
> >   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |   4 +
> >   drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   1 +
> >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 186 +-
> >   7 files changed, 284 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
> > b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > index 4d7d88b92632b..a1f7bdc602996 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > @@ -206,22 +206,38 @@ static void gen8_ggtt_invalidate(struct i915_ggtt 
> > *ggtt)
> > intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
> >   }
> >   
> > +static void guc_ggtt_ct_invalidate(struct intel_gt *gt)
> > +{
> > +   struct intel_uncore *uncore = gt->uncore;
> > +   intel_wakeref_t wakeref;
> > +
> > +   with_intel_runtime_pm_if_active(uncore->rpm, wakeref) {
> > +   struct intel_guc *guc = >uc.guc;
> > +
> > +   intel_guc_invalidate_tlb_guc(guc);
> > +   }
> > +}
> > +
> >   static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
> >   {
> > struct drm_i915_private *i915 = ggtt->vm.i915;
> > +   struct intel_gt *gt;
> >   
> > -   gen8_ggtt_invalidate(ggtt);
> > -
> > -   if (GRAPHICS_VER(i915) >= 12) {
> > -   struct intel_gt *gt;
> > +   if (!HAS_GUC_TLB_INVALIDATION(i915))
> > +   gen8_ggtt_invalidate(ggtt);
> >   
> > -   list_for_each_entry(gt, >gt_list, ggtt_link)
> > +   list_for_each_entry(gt, >gt_list, ggtt_link) {
> > +   if (HAS_GUC_TLB_INVALIDATION(i915) &&
> > +   intel_guc_is_ready(>uc.guc)) {
> > +   guc_ggtt_ct_invalidate(gt);
> > +   } else if (GRAPHICS_VER(i915) >= 12) {
> > intel_uncore_write_fw(gt->uncore,
> >   GEN12_GUC_TLB_INV_CR,
> >   GEN12_GUC_TLB_INV_CR_INVALIDATE);
> > -   } else {
> > -   intel_uncore_write_fw(ggtt->vm.gt->uncore,
> > - GEN8_GTCR, GEN8_GTCR_INVALIDATE);
> > +   } else {
> > +   intel_uncore_write_fw(gt->uncore,
> > + GEN8_GTCR, GEN8_GTCR_INVALIDATE);
> > +   }
> > }
> >   }
> >   
> > @@ -1243,7 +1259,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
> > ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
> > }
> >   
> > -   if (intel_uc_wants_guc(>vm.gt->uc))
> > +   if (intel_uc_wants_guc_submission(>vm.gt->uc))
> > ggtt->invalidate = guc_ggtt_invalidate;
> > else
> > ggtt->invalidate = gen8_ggtt_invalidate;
> > diff 

Re: [Intel-gfx] [PATCH v10 3/7] drm/i915: Define and use GuC and CTB TLB invalidation routines

2023-10-10 Thread John Harrison

On 10/10/2023 08:02, Jonathan Cavitt wrote:

From: Prathap Kumar Valsan 

The GuC firmware had defined the interface for Translation Look-Aside
Buffer (TLB) invalidation.  We should use this interface when
invalidating the engine and GuC TLBs.
Add additional functionality to intel_gt_invalidate_tlb, invalidating
the GuC TLBs and falling back to GT invalidation when the GuC is
disabled.
The invalidation is done by sending a request directly to the GuC
tlb_lookup that invalidates the table.  The invalidation is submitted as
a wait request and is performed in the CT event handler.  This means we
cannot perform this TLB invalidation path if the CT is not enabled.
If the request isn't fulfilled in two seconds, this would constitute
an error in the invalidation as that would constitute either a lost
request or a severe GuC overload.

With this new invalidation routine, we can perform GuC-based GGTT
invalidations.  GuC-based GGTT invalidation is incompatible with
MMIO invalidation so we should not perform MMIO invalidation when
GuC-based GGTT invalidation is expected.

The additional complexity incurred in this patch will be necessary for
range-based tlb invalidations, which will be platformed in the future.

Signed-off-by: Prathap Kumar Valsan 
Signed-off-by: Bruce Chang 
Signed-off-by: Chris Wilson 
Signed-off-by: Umesh Nerlige Ramappa 
Signed-off-by: Jonathan Cavitt 
Signed-off-by: Aravind Iddamsetty 
Signed-off-by: Fei Yang 
CC: Andi Shyti 
---
  drivers/gpu/drm/i915/gt/intel_ggtt.c  |  34 +++-
  drivers/gpu/drm/i915/gt/intel_tlb.c   |  16 +-
  .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |  33 
  drivers/gpu/drm/i915/gt/uc/intel_guc.h|  22 +++
  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |   4 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   1 +
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 186 +-
  7 files changed, 284 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 4d7d88b92632b..a1f7bdc602996 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -206,22 +206,38 @@ static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
  }
  
+static void guc_ggtt_ct_invalidate(struct intel_gt *gt)

+{
+   struct intel_uncore *uncore = gt->uncore;
+   intel_wakeref_t wakeref;
+
+   with_intel_runtime_pm_if_active(uncore->rpm, wakeref) {
+   struct intel_guc *guc = >uc.guc;
+
+   intel_guc_invalidate_tlb_guc(guc);
+   }
+}
+
  static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
  {
struct drm_i915_private *i915 = ggtt->vm.i915;
+   struct intel_gt *gt;
  
-	gen8_ggtt_invalidate(ggtt);

-
-   if (GRAPHICS_VER(i915) >= 12) {
-   struct intel_gt *gt;
+   if (!HAS_GUC_TLB_INVALIDATION(i915))
+   gen8_ggtt_invalidate(ggtt);
  
-		list_for_each_entry(gt, >gt_list, ggtt_link)

+   list_for_each_entry(gt, >gt_list, ggtt_link) {
+   if (HAS_GUC_TLB_INVALIDATION(i915) &&
+   intel_guc_is_ready(>uc.guc)) {
+   guc_ggtt_ct_invalidate(gt);
+   } else if (GRAPHICS_VER(i915) >= 12) {
intel_uncore_write_fw(gt->uncore,
  GEN12_GUC_TLB_INV_CR,
  GEN12_GUC_TLB_INV_CR_INVALIDATE);
-   } else {
-   intel_uncore_write_fw(ggtt->vm.gt->uncore,
- GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+   } else {
+   intel_uncore_write_fw(gt->uncore,
+ GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+   }
}
  }
  
@@ -1243,7 +1259,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)

ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
}
  
-	if (intel_uc_wants_guc(>vm.gt->uc))

+   if (intel_uc_wants_guc_submission(>vm.gt->uc))
ggtt->invalidate = guc_ggtt_invalidate;
else
ggtt->invalidate = gen8_ggtt_invalidate;
diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c 
b/drivers/gpu/drm/i915/gt/intel_tlb.c
index 139608c30d978..4bb13d1890e37 100644
--- a/drivers/gpu/drm/i915/gt/intel_tlb.c
+++ b/drivers/gpu/drm/i915/gt/intel_tlb.c
@@ -12,6 +12,7 @@
  #include "intel_gt_print.h"
  #include "intel_gt_regs.h"
  #include "intel_tlb.h"
+#include "uc/intel_guc.h"
  
  /*

   * HW architecture suggest typical invalidation time at 40us,
@@ -131,11 +132,24 @@ void intel_gt_invalidate_tlb_full(struct intel_gt *gt, 
u32 seqno)
return;
  
  	with_intel_gt_pm_if_awake(gt, wakeref) {

+   struct intel_guc *guc = >uc.guc;
+
mutex_lock(>tlb.invalidate_lock);
if (tlb_seqno_passed(gt, seqno))
goto 

[Intel-gfx] [PATCH v10 3/7] drm/i915: Define and use GuC and CTB TLB invalidation routines

2023-10-10 Thread Jonathan Cavitt
From: Prathap Kumar Valsan 

The GuC firmware had defined the interface for Translation Look-Aside
Buffer (TLB) invalidation.  We should use this interface when
invalidating the engine and GuC TLBs.
Add additional functionality to intel_gt_invalidate_tlb, invalidating
the GuC TLBs and falling back to GT invalidation when the GuC is
disabled.
The invalidation is done by sending a request directly to the GuC
tlb_lookup that invalidates the table.  The invalidation is submitted as
a wait request and is performed in the CT event handler.  This means we
cannot perform this TLB invalidation path if the CT is not enabled.
If the request isn't fulfilled in two seconds, this would constitute
an error in the invalidation as that would constitute either a lost
request or a severe GuC overload.

With this new invalidation routine, we can perform GuC-based GGTT
invalidations.  GuC-based GGTT invalidation is incompatible with
MMIO invalidation so we should not perform MMIO invalidation when
GuC-based GGTT invalidation is expected.

The additional complexity incurred in this patch will be necessary for
range-based tlb invalidations, which will be platformed in the future.

Signed-off-by: Prathap Kumar Valsan 
Signed-off-by: Bruce Chang 
Signed-off-by: Chris Wilson 
Signed-off-by: Umesh Nerlige Ramappa 
Signed-off-by: Jonathan Cavitt 
Signed-off-by: Aravind Iddamsetty 
Signed-off-by: Fei Yang 
CC: Andi Shyti 
---
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  34 +++-
 drivers/gpu/drm/i915/gt/intel_tlb.c   |  16 +-
 .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |  33 
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|  22 +++
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |   4 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   1 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 186 +-
 7 files changed, 284 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 4d7d88b92632b..a1f7bdc602996 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -206,22 +206,38 @@ static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 }
 
+static void guc_ggtt_ct_invalidate(struct intel_gt *gt)
+{
+   struct intel_uncore *uncore = gt->uncore;
+   intel_wakeref_t wakeref;
+
+   with_intel_runtime_pm_if_active(uncore->rpm, wakeref) {
+   struct intel_guc *guc = >uc.guc;
+
+   intel_guc_invalidate_tlb_guc(guc);
+   }
+}
+
 static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
 {
struct drm_i915_private *i915 = ggtt->vm.i915;
+   struct intel_gt *gt;
 
-   gen8_ggtt_invalidate(ggtt);
-
-   if (GRAPHICS_VER(i915) >= 12) {
-   struct intel_gt *gt;
+   if (!HAS_GUC_TLB_INVALIDATION(i915))
+   gen8_ggtt_invalidate(ggtt);
 
-   list_for_each_entry(gt, >gt_list, ggtt_link)
+   list_for_each_entry(gt, >gt_list, ggtt_link) {
+   if (HAS_GUC_TLB_INVALIDATION(i915) &&
+   intel_guc_is_ready(>uc.guc)) {
+   guc_ggtt_ct_invalidate(gt);
+   } else if (GRAPHICS_VER(i915) >= 12) {
intel_uncore_write_fw(gt->uncore,
  GEN12_GUC_TLB_INV_CR,
  GEN12_GUC_TLB_INV_CR_INVALIDATE);
-   } else {
-   intel_uncore_write_fw(ggtt->vm.gt->uncore,
- GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+   } else {
+   intel_uncore_write_fw(gt->uncore,
+ GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+   }
}
 }
 
@@ -1243,7 +1259,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
}
 
-   if (intel_uc_wants_guc(>vm.gt->uc))
+   if (intel_uc_wants_guc_submission(>vm.gt->uc))
ggtt->invalidate = guc_ggtt_invalidate;
else
ggtt->invalidate = gen8_ggtt_invalidate;
diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c 
b/drivers/gpu/drm/i915/gt/intel_tlb.c
index 139608c30d978..4bb13d1890e37 100644
--- a/drivers/gpu/drm/i915/gt/intel_tlb.c
+++ b/drivers/gpu/drm/i915/gt/intel_tlb.c
@@ -12,6 +12,7 @@
 #include "intel_gt_print.h"
 #include "intel_gt_regs.h"
 #include "intel_tlb.h"
+#include "uc/intel_guc.h"
 
 /*
  * HW architecture suggest typical invalidation time at 40us,
@@ -131,11 +132,24 @@ void intel_gt_invalidate_tlb_full(struct intel_gt *gt, 
u32 seqno)
return;
 
with_intel_gt_pm_if_awake(gt, wakeref) {
+   struct intel_guc *guc = >uc.guc;
+
mutex_lock(>tlb.invalidate_lock);
if (tlb_seqno_passed(gt, seqno))
goto unlock;
 
-