Re: [PATCH 22/47] drm/i915/guc: Update intel_gt_wait_for_idle to work with GuC

2021-07-16 Thread Matthew Brost
On Sat, Jul 10, 2021 at 03:55:02AM +, Matthew Brost wrote:
> On Fri, Jul 09, 2021 at 05:16:34PM -0700, John Harrison wrote:
> > On 6/24/2021 00:04, Matthew Brost wrote:
> > > When running the GuC the GPU can't be considered idle if the GuC still
> > > has contexts pinned. As such, a call has been added in
> > > intel_gt_wait_for_idle to idle the UC and in turn the GuC by waiting for
> > > the number of unpinned contexts to go to zero.
> > > 
> > > v2: rtimeout -> remaining_timeout
> > > 
> > > Cc: John Harrison 
> > > Signed-off-by: Matthew Brost 
> > > ---
> > >   drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
> > >   drivers/gpu/drm/i915/gt/intel_gt.c| 19 
> > >   drivers/gpu/drm/i915/gt/intel_gt.h|  2 +
> > >   drivers/gpu/drm/i915/gt/intel_gt_requests.c   | 22 ++---
> > >   drivers/gpu/drm/i915/gt/intel_gt_requests.h   |  9 +-
> > >   drivers/gpu/drm/i915/gt/uc/intel_guc.h|  4 +
> > >   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |  1 +
> > >   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  4 +
> > >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 88 ++-
> > >   drivers/gpu/drm/i915/gt/uc/intel_uc.h |  5 ++
> > >   drivers/gpu/drm/i915/i915_debugfs.c   |  1 +
> > >   drivers/gpu/drm/i915/i915_gem_evict.c |  1 +
> > >   .../gpu/drm/i915/selftests/igt_live_test.c|  2 +-
> > >   .../gpu/drm/i915/selftests/mock_gem_device.c  |  3 +-
> > >   14 files changed, 137 insertions(+), 27 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c 
> > > b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> > > index 2fd155742bd2..335b955d5b4b 100644
> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> > > @@ -644,7 +644,8 @@ mmap_offset_attach(struct drm_i915_gem_object *obj,
> > >   goto insert;
> > >   /* Attempt to reap some mmap space from dead objects */
> > > - err = intel_gt_retire_requests_timeout(>gt, MAX_SCHEDULE_TIMEOUT);
> > > + err = intel_gt_retire_requests_timeout(>gt, MAX_SCHEDULE_TIMEOUT,
> > > +NULL);
> > >   if (err)
> > >   goto err;
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
> > > b/drivers/gpu/drm/i915/gt/intel_gt.c
> > > index e714e21c0a4d..acfdd53b2678 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> > > @@ -585,6 +585,25 @@ static void __intel_gt_disable(struct intel_gt *gt)
> > >   GEM_BUG_ON(intel_gt_pm_is_awake(gt));
> > >   }
> > > +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
> > > +{
> > > + long remaining_timeout;
> > > +
> > > + /* If the device is asleep, we have no requests outstanding */
> > > + if (!intel_gt_pm_is_awake(gt))
> > > + return 0;
> > > +
> > > + while ((timeout = intel_gt_retire_requests_timeout(gt, timeout,
> > > +_timeout)) 
> > > > 0) {
> > > + cond_resched();
> > > + if (signal_pending(current))
> > > + return -EINTR;
> > > + }
> > > +
> > > + return timeout ? timeout : intel_uc_wait_for_idle(>uc,
> > > +   remaining_timeout);
> > > +}
> > > +
> > >   int intel_gt_init(struct intel_gt *gt)
> > >   {
> > >   int err;
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h 
> > > b/drivers/gpu/drm/i915/gt/intel_gt.h
> > > index e7aabe0cc5bf..74e771871a9b 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_gt.h
> > > +++ b/drivers/gpu/drm/i915/gt/intel_gt.h
> > > @@ -48,6 +48,8 @@ void intel_gt_driver_release(struct intel_gt *gt);
> > >   void intel_gt_driver_late_release(struct intel_gt *gt);
> > > +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
> > > +
> > >   void intel_gt_check_and_clear_faults(struct intel_gt *gt);
> > >   void intel_gt_clear_error_registers(struct intel_gt *gt,
> > >   intel_engine_mask_t engine_mask);
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c 
> > > b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
> > > index 647eca9d867a..39f5e824dac5 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
> > > @@ -13,6 +13,7 @@
> > >   #include "intel_gt_pm.h"
> > >   #include "intel_gt_requests.h"
> > >   #include "intel_timeline.h"
> > > +#include "uc/intel_uc.h"
> > Why is this needed?
> > 
> 
> It is not, likely holdover from internal churn.
> 
> > >   static bool retire_requests(struct intel_timeline *tl)
> > >   {
> > > @@ -130,7 +131,8 @@ void intel_engine_fini_retire(struct intel_engine_cs 
> > > *engine)
> > >   GEM_BUG_ON(engine->retire);
> > >   }
> > > -long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
> > > +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout,
> > > +  

Re: [PATCH 46/51] drm/i915/selftest: Fix MOCS selftest for GuC submission

2021-07-16 Thread Matthew Brost
On Fri, Jul 16, 2021 at 01:17:19PM -0700, Matthew Brost wrote:
> From: Rahul Kumar Singh 
> 
> When GuC submission is enabled, the GuC controls engine resets. Rather
> than explicitly triggering a reset, the driver must submit a hanging
> context to GuC and wait for the reset to occur.
> 
> Signed-off-by: Rahul Kumar Singh 
> Signed-off-by: John Harrison 
> Signed-off-by: Matthew Brost 
> Cc: Daniele Ceraolo Spurio 
> Cc: Matthew Brost 

Reviewed-by: Matthew Brost 

> ---
>  drivers/gpu/drm/i915/gt/selftest_mocs.c | 49 ++---
>  1 file changed, 35 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c 
> b/drivers/gpu/drm/i915/gt/selftest_mocs.c
> index 8763bbeca0f7..b7314739ee40 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
> @@ -10,6 +10,7 @@
>  #include "gem/selftests/mock_context.h"
>  #include "selftests/igt_reset.h"
>  #include "selftests/igt_spinner.h"
> +#include "selftests/intel_scheduler_helpers.h"
>  
>  struct live_mocs {
>   struct drm_i915_mocs_table table;
> @@ -318,7 +319,8 @@ static int live_mocs_clean(void *arg)
>  }
>  
>  static int active_engine_reset(struct intel_context *ce,
> -const char *reason)
> +const char *reason,
> +bool using_guc)
>  {
>   struct igt_spinner spin;
>   struct i915_request *rq;
> @@ -335,9 +337,13 @@ static int active_engine_reset(struct intel_context *ce,
>   }
>  
>   err = request_add_spin(rq, );
> - if (err == 0)
> + if (err == 0 && !using_guc)
>   err = intel_engine_reset(ce->engine, reason);
>  
> + /* Ensure the reset happens and kills the engine */
> + if (err == 0)
> + err = intel_selftest_wait_for_rq(rq);
> +
>   igt_spinner_end();
>   igt_spinner_fini();
>  
> @@ -345,21 +351,23 @@ static int active_engine_reset(struct intel_context *ce,
>  }
>  
>  static int __live_mocs_reset(struct live_mocs *mocs,
> -  struct intel_context *ce)
> +  struct intel_context *ce, bool using_guc)
>  {
>   struct intel_gt *gt = ce->engine->gt;
>   int err;
>  
>   if (intel_has_reset_engine(gt)) {
> - err = intel_engine_reset(ce->engine, "mocs");
> - if (err)
> - return err;
> -
> - err = check_mocs_engine(mocs, ce);
> - if (err)
> - return err;
> + if (!using_guc) {
> + err = intel_engine_reset(ce->engine, "mocs");
> + if (err)
> + return err;
> +
> + err = check_mocs_engine(mocs, ce);
> + if (err)
> + return err;
> + }
>  
> - err = active_engine_reset(ce, "mocs");
> + err = active_engine_reset(ce, "mocs", using_guc);
>   if (err)
>   return err;
>  
> @@ -395,19 +403,32 @@ static int live_mocs_reset(void *arg)
>  
>   igt_global_reset_lock(gt);
>   for_each_engine(engine, gt, id) {
> + bool using_guc = intel_engine_uses_guc(engine);
> + struct intel_selftest_saved_policy saved;
>   struct intel_context *ce;
> + int err2;
> +
> + err = intel_selftest_modify_policy(engine, );
> + if (err)
> + break;
>  
>   ce = mocs_context_create(engine);
>   if (IS_ERR(ce)) {
>   err = PTR_ERR(ce);
> - break;
> + goto restore;
>   }
>  
>   intel_engine_pm_get(engine);
> - err = __live_mocs_reset(, ce);
> - intel_engine_pm_put(engine);
>  
> + err = __live_mocs_reset(, ce, using_guc);
> +
> + intel_engine_pm_put(engine);
>   intel_context_put(ce);
> +
> +restore:
> + err2 = intel_selftest_restore_policy(engine, );
> + if (err == 0)
> + err = err2;
>   if (err)
>   break;
>   }
> -- 
> 2.28.0
> 


Re: [PATCH 48/51] drm/i915/selftest: Fix hangcheck self test for GuC submission

2021-07-16 Thread Matthew Brost
On Fri, Jul 16, 2021 at 01:17:21PM -0700, Matthew Brost wrote:
> From: John Harrison 
> 
> When GuC submission is enabled, the GuC controls engine resets. Rather
> than explicitly triggering a reset, the driver must submit a hanging
> context to GuC and wait for the reset to occur.
> 
> Conversely, one of the tests specifically sends hanging batches to the
> engines but wants them to sit around until a manual reset of the full
> GT (including GuC itself). That means disabling GuC based engine
> resets to prevent those from killing the hanging batch too soon. So,
> add support to the scheduling policy helper for disabling resets as
> well as making them quicker!
> 
> In GuC submission mode, the 'is engine idle' test basically turns into
> 'is engine PM wakelock held'. Independently, there is a heartbeat
> disable helper function that the tests use. For unexplained reasons,
> this acquires the engine wakelock before disabling the heartbeat and
> only releases it when re-enabling the heartbeat. As one of the tests
> tries to do a wait for idle in the middle of a heartbeat disabled
> section, it is therefore guaranteed to always fail. Added a 'no_pm'
> variant of the heartbeat helper that allows the engine to be asleep
> while also having heartbeats disabled.
> 
> Signed-off-by: John Harrison 
> Signed-off-by: Matthew Brost 

Reviewed-by: Matthew Brost 

> Cc: Daniele Ceraolo Spurio 
> Cc: Matthew Brost 
> ---
>  drivers/gpu/drm/i915/gt/intel_engine_types.h  |   1 +
>  .../drm/i915/gt/selftest_engine_heartbeat.c   |  22 ++
>  .../drm/i915/gt/selftest_engine_heartbeat.h   |   2 +
>  drivers/gpu/drm/i915/gt/selftest_hangcheck.c  | 223 +-
>  drivers/gpu/drm/i915/gt/selftest_mocs.c   |   3 +-
>  .../gpu/drm/i915/gt/selftest_workarounds.c|   6 +-
>  .../gpu/drm/i915/gt/uc/intel_guc_submission.c |   3 +
>  .../i915/selftests/intel_scheduler_helpers.c  |  39 ++-
>  .../i915/selftests/intel_scheduler_helpers.h  |   9 +-
>  9 files changed, 237 insertions(+), 71 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
> b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index d66b732a91c2..eec57e57403f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -449,6 +449,7 @@ struct intel_engine_cs {
>  #define I915_ENGINE_IS_VIRTUAL   BIT(5)
>  #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
>  #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
> +#define I915_ENGINE_WANT_FORCED_PREEMPTION BIT(8)
>   unsigned int flags;
>  
>   /*
> diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c 
> b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
> index 4896e4ccad50..317eebf086c3 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
> @@ -405,3 +405,25 @@ void st_engine_heartbeat_enable(struct intel_engine_cs 
> *engine)
>   engine->props.heartbeat_interval_ms =
>   engine->defaults.heartbeat_interval_ms;
>  }
> +
> +void st_engine_heartbeat_disable_no_pm(struct intel_engine_cs *engine)
> +{
> + engine->props.heartbeat_interval_ms = 0;
> +
> + /*
> +  * Park the heartbeat but without holding the PM lock as that
> +  * makes the engines appear not-idle. Note that if/when unpark
> +  * is called due to the PM lock being acquired later the
> +  * heartbeat still won't be enabled because of the above = 0.
> +  */
> + if (intel_engine_pm_get_if_awake(engine)) {
> + intel_engine_park_heartbeat(engine);
> + intel_engine_pm_put(engine);
> + }
> +}
> +
> +void st_engine_heartbeat_enable_no_pm(struct intel_engine_cs *engine)
> +{
> + engine->props.heartbeat_interval_ms =
> + engine->defaults.heartbeat_interval_ms;
> +}
> diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h 
> b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h
> index cd27113d5400..81da2cd8e406 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h
> +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h
> @@ -9,6 +9,8 @@
>  struct intel_engine_cs;
>  
>  void st_engine_heartbeat_disable(struct intel_engine_cs *engine);
> +void st_engine_heartbeat_disable_no_pm(struct intel_engine_cs *engine);
>  void st_engine_heartbeat_enable(struct intel_engine_cs *engine);
> +void st_engine_heartbeat_enable_no_pm(struct intel_engine_cs *engine);
>  
>  #endif /* SELFTEST_ENGINE_HEARTBEAT_H */
> diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c 
> b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> index 0ed87cc4d063..971c0c249eb0 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> @@ -17,6 +17,8 @@
>  #include "selftests/igt_flush_test.h"
>  #include "selftests/igt_reset.h"
>  #include "selftests/igt_atomic.h"
> +#include "selftests/igt_spinner.h"
> +#include "selftests/intel_scheduler_helpers.h"
>  
>  

Re: [PATCH] drm/tegra: Fix compilation of variadic macro

2021-07-16 Thread Thierry Reding
On Fri, Jul 16, 2021 at 01:46:00PM +0100, Jon Hunter wrote:
> Commit 43636451db8c ("drm/tegra: Implement job submission part of new
> UAPI") added the macro 'SUBMIT_ERR' that in turns makes use of the macro
> '__VA_OPT__'. The '__VA_OPT__' macro is not supported by older versions
> of GCC and so causes build failures when using older versions of GCC.
> Fix this by using the '##__VA_ARGS__' macro instead.
> 
> Fixes: 43636451db8c ("drm/tegra: Implement job submission part of new UAPI")
> Reported-by: Linux Kernel Functional Testing 
> Signed-off-by: Jon Hunter 
> ---
>  drivers/gpu/drm/tegra/submit.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Applied, thanks.

Thierry


signature.asc
Description: PGP signature


[PATCH 2/2] drm/i915: Tear down properly on early i915_init exit

2021-07-16 Thread Jason Ekstrand
In i915_exit(), we check i915_pci_driver.driver.owner to detect if
i915_init exited early and don't tear anything down.  However, we didn't
have proper tear-down paths for early exits in i915_init().

Most of the time, you would never notice this as driver init failures
are extremely rare and generally the sign of a bigger bug.  However,
when the mock self-tests are run, they run as part of i915_init() and
exit early once they complete.  They run after i915_globals_init() and
before we set up anything else.  The IGT test then unloads the module,
invoking i915_exit() which, thanks to our i915_pci_driver.driver.owner
check, doesn't actually tear anything down.  Importantly, this means
i915_globals_exit() never gets called even though i915_globals_init()
was and we leak the globals.

The most annoying part is that you don't actually notice the failure as
part of the self-tests since leaking a bit of memory, while bad, doesn't
result in anything observable from userspace.  Instead, the next time we
load the driver (usually for next IGT test), i915_globals_init() gets
invoked again, we go to allocate a bunch of new memory slabs, those
implicitly create debugfs entries, and debugfs warns that we're trying
to create directories and files that already exist.  Since this all
happens as part of the next driver load, it shows up in the dmesg-warn
of whatever IGT test ran after the mock selftests.

Signed-off-by: Jason Ekstrand 
Fixes: 32eb6bcfdda9 ("drm/i915: Make request allocation caches global")
Cc: Daniel Vetter 
---
 drivers/gpu/drm/i915/i915_globals.c |  4 ++--
 drivers/gpu/drm/i915/i915_pci.c | 23 +--
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_globals.c 
b/drivers/gpu/drm/i915/i915_globals.c
index 77f1911c463b8..87267e1d2ad92 100644
--- a/drivers/gpu/drm/i915/i915_globals.c
+++ b/drivers/gpu/drm/i915/i915_globals.c
@@ -138,7 +138,7 @@ void i915_globals_unpark(void)
atomic_inc();
 }
 
-static void __exit __i915_globals_flush(void)
+static void __i915_globals_flush(void)
 {
atomic_inc(); /* skip shrinking */
 
@@ -148,7 +148,7 @@ static void __exit __i915_globals_flush(void)
atomic_dec();
 }
 
-void __exit i915_globals_exit(void)
+void i915_globals_exit(void)
 {
GEM_BUG_ON(atomic_read());
 
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 50ed93b03e582..783f547be0990 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1199,13 +1199,20 @@ static int __init i915_init(void)
bool use_kms = true;
int err;
 
+   /* We use this to detect early returns from i915_init() so we don't
+* tear anything down in i915_exit()
+*/
+   i915_pci_driver.driver.owner = NULL;
+
err = i915_globals_init();
if (err)
return err;
 
err = i915_mock_selftests();
-   if (err)
-   return err > 0 ? 0 : err;
+   if (err) {
+   err = err > 0 ? 0 : err;
+   goto globals_exit;
+   }
 
/*
 * Enable KMS by default, unless explicitly overriden by
@@ -1228,13 +1235,17 @@ static int __init i915_init(void)
i915_pmu_init();
 
err = pci_register_driver(_pci_driver);
-   if (err) {
-   i915_pmu_exit();
-   return err;
-   }
+   if (err)
+   goto pmu_exit;
 
i915_perf_sysctl_register();
return 0;
+
+pmu_exit:
+   i915_pmu_exit();
+globals_exit:
+   i915_globals_exit();
+   return err;
 }
 
 static void __exit i915_exit(void)
-- 
2.31.1



[PATCH 1/2] drm/i915: Call i915_globals_exit() after i915_pmu_exit()

2021-07-16 Thread Jason Ekstrand
We should tear down in the opposite order we set up.

Signed-off-by: Jason Ekstrand 
Fixes: 537f9c84a427 ("drm/i915/pmu: Fix CPU hotplug with multiple GPUs")
Cc: Daniel Vetter 
---
 drivers/gpu/drm/i915/i915_pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 67696d7522718..50ed93b03e582 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1244,8 +1244,8 @@ static void __exit i915_exit(void)
 
i915_perf_sysctl_unregister();
pci_unregister_driver(_pci_driver);
-   i915_globals_exit();
i915_pmu_exit();
+   i915_globals_exit();
 }
 
 module_init(i915_init);
-- 
2.31.1



Re: [PATCH 49/51] drm/i915/selftest: Bump selftest timeouts for hangcheck

2021-07-16 Thread Matthew Brost
On Fri, Jul 16, 2021 at 01:17:22PM -0700, Matthew Brost wrote:
> From: John Harrison 
> 
> Some testing environments and some heavier tests are slower than
> previous limits allowed for. For example, it can take multiple seconds
> for the 'context has been reset' notification handler to reach the
> 'kill the requests' code in the 'active' version of the 'reset
> engines' test. During which time the selftest gets bored, gives up
> waiting and fails the test.
> 
> There is also an async thread that the selftest uses to pump work
> through the hardware in parallel to the context that is marked for
> reset. That also could get bored waiting for completions and kill the
> test off.
> 
> Lastly, the flush at the of various test sections can also see
> timeouts due to the large amount of work backed up. This is also true
> of the live_hwsp_read test.
> 
> Signed-off-by: John Harrison 
> Signed-off-by: Matthew Brost 
> Cc: Daniele Ceraolo Spurio 

Reviewed-by: Matthew Brost 

> ---
>  drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 2 +-
>  drivers/gpu/drm/i915/selftests/igt_flush_test.c  | 2 +-
>  drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c | 2 +-
>  3 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c 
> b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> index 971c0c249eb0..a93a9b0d258e 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> @@ -876,7 +876,7 @@ static int active_request_put(struct i915_request *rq)
>   if (!rq)
>   return 0;
>  
> - if (i915_request_wait(rq, 0, 5 * HZ) < 0) {
> + if (i915_request_wait(rq, 0, 10 * HZ) < 0) {
>   GEM_TRACE("%s timed out waiting for completion of fence 
> %llx:%lld\n",
> rq->engine->name,
> rq->fence.context,
> diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c 
> b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
> index 7b0939e3f007..a6c71fca61aa 100644
> --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
> +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
> @@ -19,7 +19,7 @@ int igt_flush_test(struct drm_i915_private *i915)
>  
>   cond_resched();
>  
> - if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) {
> + if (intel_gt_wait_for_idle(gt, HZ) == -ETIME) {
>   pr_err("%pS timed out, cancelling all further testing.\n",
>  __builtin_return_address(0));
>  
> diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c 
> b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c
> index 69db139f9e0d..ebd6d69b3315 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c
> @@ -13,7 +13,7 @@
>  
>  #define REDUCED_TIMESLICE5
>  #define REDUCED_PREEMPT  10
> -#define WAIT_FOR_RESET_TIME  1000
> +#define WAIT_FOR_RESET_TIME  1
>  
>  int intel_selftest_modify_policy(struct intel_engine_cs *engine,
>struct intel_selftest_saved_policy *saved,
> -- 
> 2.28.0
> 


Re: [PATCH v3 0/8] Support DEVICE_GENERIC memory in migrate_vma_*

2021-07-16 Thread Felix Kuehling
Am 2021-07-16 um 11:07 a.m. schrieb Theodore Y. Ts'o:
> On Wed, Jun 23, 2021 at 05:49:55PM -0400, Felix Kuehling wrote:
>> I can think of two ways to test the changes for MEMORY_DEVICE_GENERIC in
>> this patch series in a way that is reproducible without special hardware and
>> firmware:
>>
>> For the reference counting changes we could use the dax driver with hmem and
>> use efi_fake_mem on the kernel command line to create some DEVICE_GENERIC
>> pages. I'm open to suggestions for good user mode tests to exercise dax
>> functionality on this type of memory.
> Sorry for the thread necromancy, but now that the merge window is
> past

No worries. Alejandro should have a new version of this series in a few
days, with updates to hmm_test and some fixes.


>
> Today I test ext4's dax support, without having any $$$ DAX hardware,
> by using the kernel command line "memmap=4G!9G:memmap=9G!14G" which
> reserves memory so that creates two pmem device and then I run
> xfstests with DAX enabled using qemu or using a Google Compute Engine
> VM, using TEST_DEV=/dev/pmem0 and SCRATCH_DEV=/dev/pmem1.
>
> If you can give me a recipe for what kernel configs I should enable,
> and what magic kernel command line arguments to use, then I'd be able
> to test your patch set with ext4.
That would be great!

Regarding kernel config options, it should be the same as what you're
using for DAX testing today. We're not changing or adding any Kconfig
options. But let me take a stab:

ZONE_DEVICE
HMM_MIRROR
MMU_NOTIFIER
DEVICE_PRIVATE (maybe not needed for your test)
FS_DAX

I'm not sure what you're looking for in terms of kernel command line,
other than the memmap options you already found. There are some more
options to run hmm_test with fake SPM (DEVICE_GENERIC) memory, but we're
already running that ourselves. That will also be in the next revision
of this patch series.

If you can run your xfstests with DAX on top of this patch series, that
would be very helpful. That's to make sure the ZONE_DEVICE page refcount
changes don't break DAX.

Regards,
  Felix


>
> Cheers,
>
>   - Ted


Re: [PATCH] drm/mipi: set fwnode when a mipi_dsi_device registers itself

2021-07-16 Thread Saravana Kannan
Hi William,

Thanks for catching this.

On Fri, Jul 9, 2021 at 11:45 PM Will McVicker  wrote:
>
> This is needed for fw_devlink to work properly with MIPI DSI devices.
> Without setting the device's fwnode, the sync state framework isn't able
> to properly track device links between the MIPI DSI device and its
> suppliers which may result in its supplier probing before the mipi
> device.

I think it'd be more accurate if the commit text is something like:

drm/mipi: set fwnode when a mipi_dsi_device is registered

This allows the fw_devlink feature to work across mipi_dsi bus devices too. This
feature avoid unnecessary probe deferrals of mipi_dsi devices, defers
consumers of
mipi_dsi devices till the mipi_dsi devices probe, and allows mipi_dsi drivers to
implement sync_state() callbacks.

Reviewed-by: Saravana Kannan 

Thanks,
Saravana

>
> Suggested-by: Saravana Kannan 
> Signed-off-by: Will McVicker 
> ---
>  drivers/gpu/drm/drm_mipi_dsi.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c
> index 5dd475e82995..469d56cf2a50 100644
> --- a/drivers/gpu/drm/drm_mipi_dsi.c
> +++ b/drivers/gpu/drm/drm_mipi_dsi.c
> @@ -222,6 +222,7 @@ mipi_dsi_device_register_full(struct mipi_dsi_host *host,
> }
>
> dsi->dev.of_node = info->node;
> +   dsi->dev.fwnode = of_fwnode_handle(info->node);
> dsi->channel = info->channel;
> strlcpy(dsi->name, info->type, sizeof(dsi->name));
>
> --
> 2.32.0.93.g670b81a890-goog
>


Re: [Intel-gfx] [PATCH 44/51] drm/i915/selftest: Better error reporting from hangcheck selftest

2021-07-16 Thread Matthew Brost
On Fri, Jul 16, 2021 at 01:17:17PM -0700, Matthew Brost wrote:
> From: John Harrison 
> 
> There are many ways in which the hangcheck selftest can fail. Very few
> of them actually printed an error message to say what happened. So,
> fill in the missing messages.
> 
> Signed-off-by: John Harrison 
> Signed-off-by: Matthew Brost 
> Cc: Daniele Ceraolo Spurio 

Reviewed-by: Matthew Brost 

> ---
>  drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 89 
>  1 file changed, 72 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c 
> b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> index 7aea10aa1fb4..0ed87cc4d063 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> @@ -378,6 +378,7 @@ static int igt_reset_nop(void *arg)
>   ce = intel_context_create(engine);
>   if (IS_ERR(ce)) {
>   err = PTR_ERR(ce);
> + pr_err("[%s] Create context failed: %d!\n", 
> engine->name, err);
>   break;
>   }
>  
> @@ -387,6 +388,7 @@ static int igt_reset_nop(void *arg)
>   rq = intel_context_create_request(ce);
>   if (IS_ERR(rq)) {
>   err = PTR_ERR(rq);
> + pr_err("[%s] Create request failed: 
> %d!\n", engine->name, err);
>   break;
>   }
>  
> @@ -401,24 +403,31 @@ static int igt_reset_nop(void *arg)
>   igt_global_reset_unlock(gt);
>  
>   if (intel_gt_is_wedged(gt)) {
> + pr_err("[%s] GT is wedged!\n", engine->name);
>   err = -EIO;
>   break;
>   }
>  
>   if (i915_reset_count(global) != reset_count + ++count) {
> - pr_err("Full GPU reset not recorded!\n");
> + pr_err("[%s] Reset not recorded: %d vs %d + %d!\n",
> +engine->name, i915_reset_count(global), 
> reset_count, count);
>   err = -EINVAL;
>   break;
>   }
>  
>   err = igt_flush_test(gt->i915);
> - if (err)
> + if (err) {
> + pr_err("[%s] Flush failed: %d!\n", engine->name, err);
>   break;
> + }
>   } while (time_before(jiffies, end_time));
>   pr_info("%s: %d resets\n", __func__, count);
>  
> - if (igt_flush_test(gt->i915))
> + if (igt_flush_test(gt->i915)) {
> + pr_err("Post flush failed: %d!\n", err);
>   err = -EIO;
> + }
> +
>   return err;
>  }
>  
> @@ -441,8 +450,10 @@ static int igt_reset_nop_engine(void *arg)
>   int err;
>  
>   ce = intel_context_create(engine);
> - if (IS_ERR(ce))
> + if (IS_ERR(ce)) {
> + pr_err("[%s] Create context failed: %d!\n", 
> engine->name, err);
>   return PTR_ERR(ce);
> + }
>  
>   reset_count = i915_reset_count(global);
>   reset_engine_count = i915_reset_engine_count(global, engine);
> @@ -550,8 +561,10 @@ static int igt_reset_fail_engine(void *arg)
>   int err;
>  
>   ce = intel_context_create(engine);
> - if (IS_ERR(ce))
> + if (IS_ERR(ce)) {
> + pr_err("[%s] Create context failed: %d!\n", 
> engine->name, err);
>   return PTR_ERR(ce);
> + }
>  
>   st_engine_heartbeat_disable(engine);
>   set_bit(I915_RESET_ENGINE + id, >reset.flags);
> @@ -711,6 +724,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool 
> active)
>   rq = hang_create_request(, engine);
>   if (IS_ERR(rq)) {
>   err = PTR_ERR(rq);
> + pr_err("[%s] Create hang request 
> failed: %d!\n", engine->name, err);
>   break;
>   }
>  
> @@ -765,12 +779,16 @@ static int __igt_reset_engine(struct intel_gt *gt, bool 
> active)
>   break;
>  
>   err = igt_flush_test(gt->i915);
> - if (err)
> + if (err) {
> + pr_err("[%s] Flush failed: %d!\n", engine->name, err);
>   break;
> + }
>   }
>  
> - if (intel_gt_is_wedged(gt))
> + if (intel_gt_is_wedged(gt)) {
> + pr_err("GT is wedged!\n");
>   err = -EIO;
> + }
>  
>   if (active)
>   hang_fini();
> @@ -837,6 +855,7 @@ static int active_engine(void *data)
>   ce[count] = intel_context_create(engine);
>   

Re: [PATCH 39/51] drm/i915/guc: Connect reset modparam updates to GuC policy flags

2021-07-16 Thread Matthew Brost
On Fri, Jul 16, 2021 at 01:17:12PM -0700, Matthew Brost wrote:
> From: John Harrison 
> 
> Changing the reset module parameter has no effect on a running GuC.
> The corresponding entry in the ADS must be updated and then the GuC
> informed via a Host2GuC message.
> 
> The new debugfs interface to module parameters allows this to happen.
> However, connecting the parameter data address back to anything useful
> is messy. One option would be to pass a new private data structure
> address through instead of just the parameter pointer. However, that
> means having a new (and different) data structure for each parameter
> and a new (and different) write function for each parameter. This
> method keeps everything generic by instead using a string lookup on
> the directory entry name.
> 
> Signed-off-by: John Harrison 
> Signed-off-by: Matthew Brost 

Reviewed-by: Matthew Brost 

> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c |  2 +-
>  drivers/gpu/drm/i915/i915_debugfs_params.c | 31 ++
>  2 files changed, 32 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> index 2ad5fcd4e1b7..c6d0b762d82c 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> @@ -99,7 +99,7 @@ static int guc_action_policies_update(struct intel_guc 
> *guc, u32 policy_offset)
>   policy_offset
>   };
>  
> - return intel_guc_send(guc, action, ARRAY_SIZE(action));
> + return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, 
> true);
>  }
>  
>  int intel_guc_global_policies_update(struct intel_guc *guc)
> diff --git a/drivers/gpu/drm/i915/i915_debugfs_params.c 
> b/drivers/gpu/drm/i915/i915_debugfs_params.c
> index 4e2b077692cb..8ecd8b42f048 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs_params.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs_params.c
> @@ -6,9 +6,20 @@
>  #include 
>  
>  #include "i915_debugfs_params.h"
> +#include "gt/intel_gt.h"
> +#include "gt/uc/intel_guc.h"
>  #include "i915_drv.h"
>  #include "i915_params.h"
>  
> +#define MATCH_DEBUGFS_NODE_NAME(_file, _name)
> (strcmp((_file)->f_path.dentry->d_name.name, (_name)) == 0)
> +
> +#define GET_I915(i915, name, ptr)\
> + do {\
> + struct i915_params *params; \
> + params = container_of(((void *) (ptr)), typeof(*params), name); 
> \
> + (i915) = container_of(params, typeof(*(i915)), params); \
> + } while(0)
> +
>  /* int param */
>  static int i915_param_int_show(struct seq_file *m, void *data)
>  {
> @@ -24,6 +35,16 @@ static int i915_param_int_open(struct inode *inode, struct 
> file *file)
>   return single_open(file, i915_param_int_show, inode->i_private);
>  }
>  
> +static int notify_guc(struct drm_i915_private *i915)
> +{
> + int ret = 0;
> +
> + if (intel_uc_uses_guc_submission(>gt.uc))
> + ret = intel_guc_global_policies_update(>gt.uc.guc);
> +
> + return ret;
> +}
> +
>  static ssize_t i915_param_int_write(struct file *file,
>   const char __user *ubuf, size_t len,
>   loff_t *offp)
> @@ -81,8 +102,10 @@ static ssize_t i915_param_uint_write(struct file *file,
>const char __user *ubuf, size_t len,
>loff_t *offp)
>  {
> + struct drm_i915_private *i915;
>   struct seq_file *m = file->private_data;
>   unsigned int *value = m->private;
> + unsigned int old = *value;
>   int ret;
>  
>   ret = kstrtouint_from_user(ubuf, len, 0, value);
> @@ -95,6 +118,14 @@ static ssize_t i915_param_uint_write(struct file *file,
>   *value = b;
>   }
>  
> + if (!ret && MATCH_DEBUGFS_NODE_NAME(file, "reset")) {
> + GET_I915(i915, reset, value);
> +
> + ret = notify_guc(i915);
> + if (ret)
> + *value = old;
> + }
> +
>   return ret ?: len;
>  }
>  
> -- 
> 2.28.0
> 


[PATCH 49/51] drm/i915/selftest: Bump selftest timeouts for hangcheck

2021-07-16 Thread Matthew Brost
From: John Harrison 

Some testing environments and some heavier tests are slower than
previous limits allowed for. For example, it can take multiple seconds
for the 'context has been reset' notification handler to reach the
'kill the requests' code in the 'active' version of the 'reset
engines' test. During which time the selftest gets bored, gives up
waiting and fails the test.

There is also an async thread that the selftest uses to pump work
through the hardware in parallel to the context that is marked for
reset. That also could get bored waiting for completions and kill the
test off.

Lastly, the flush at the of various test sections can also see
timeouts due to the large amount of work backed up. This is also true
of the live_hwsp_read test.

Signed-off-by: John Harrison 
Signed-off-by: Matthew Brost 
Cc: Daniele Ceraolo Spurio 
---
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 2 +-
 drivers/gpu/drm/i915/selftests/igt_flush_test.c  | 2 +-
 drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c 
b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 971c0c249eb0..a93a9b0d258e 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -876,7 +876,7 @@ static int active_request_put(struct i915_request *rq)
if (!rq)
return 0;
 
-   if (i915_request_wait(rq, 0, 5 * HZ) < 0) {
+   if (i915_request_wait(rq, 0, 10 * HZ) < 0) {
GEM_TRACE("%s timed out waiting for completion of fence 
%llx:%lld\n",
  rq->engine->name,
  rq->fence.context,
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c 
b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
index 7b0939e3f007..a6c71fca61aa 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -19,7 +19,7 @@ int igt_flush_test(struct drm_i915_private *i915)
 
cond_resched();
 
-   if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) {
+   if (intel_gt_wait_for_idle(gt, HZ) == -ETIME) {
pr_err("%pS timed out, cancelling all further testing.\n",
   __builtin_return_address(0));
 
diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c 
b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c
index 69db139f9e0d..ebd6d69b3315 100644
--- a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c
+++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c
@@ -13,7 +13,7 @@
 
 #define REDUCED_TIMESLICE  5
 #define REDUCED_PREEMPT10
-#define WAIT_FOR_RESET_TIME1000
+#define WAIT_FOR_RESET_TIME1
 
 int intel_selftest_modify_policy(struct intel_engine_cs *engine,
 struct intel_selftest_saved_policy *saved,
-- 
2.28.0



[PATCH 51/51] drm/i915/guc: Unblock GuC submission on Gen11+

2021-07-16 Thread Matthew Brost
From: Daniele Ceraolo Spurio 

Unblock GuC submission on Gen11+ platforms.

v2:
 (Martin Peres / John H)
  - Delete debug message when GuC is disabled by default on certain
platforms

Signed-off-by: Michal Wajdeczko 
Signed-off-by: Daniele Ceraolo Spurio 
Signed-off-by: Matthew Brost 
Reviewed-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|  1 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c |  8 
 drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h |  3 +--
 drivers/gpu/drm/i915/gt/uc/intel_uc.c | 13 -
 4 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index eb6062f95d3b..5d94cf482516 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -55,6 +55,7 @@ struct intel_guc {
struct ida guc_ids;
struct list_head guc_id_list;
 
+   bool submission_supported;
bool submission_selected;
 
struct i915_vma *ads_vma;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 263ad6a9e4a9..32269a22562e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -2512,6 +2512,13 @@ void intel_guc_submission_disable(struct intel_guc *guc)
/* Note: By the time we're here, GuC may have already been reset */
 }
 
+static bool __guc_submission_supported(struct intel_guc *guc)
+{
+   /* GuC submission is unavailable for pre-Gen11 */
+   return intel_guc_is_supported(guc) &&
+  GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11;
+}
+
 static bool __guc_submission_selected(struct intel_guc *guc)
 {
struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
@@ -2524,6 +2531,7 @@ static bool __guc_submission_selected(struct intel_guc 
*guc)
 
 void intel_guc_submission_init_early(struct intel_guc *guc)
 {
+   guc->submission_supported = __guc_submission_supported(guc);
guc->submission_selected = __guc_submission_selected(guc);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
index 03bc1c83a4d2..c7ef44fa0c36 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
@@ -38,8 +38,7 @@ int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
 
 static inline bool intel_guc_submission_is_supported(struct intel_guc *guc)
 {
-   /* XXX: GuC submission is unavailable for now */
-   return false;
+   return guc->submission_supported;
 }
 
 static inline bool intel_guc_submission_is_wanted(struct intel_guc *guc)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 7a69c3c027e9..da57d18d9f6b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -34,8 +34,14 @@ static void uc_expand_default_options(struct intel_uc *uc)
return;
}
 
-   /* Default: enable HuC authentication only */
-   i915->params.enable_guc = ENABLE_GUC_LOAD_HUC;
+   /* Intermediate platforms are HuC authentication only */
+   if (IS_DG1(i915) || IS_ALDERLAKE_S(i915)) {
+   i915->params.enable_guc = ENABLE_GUC_LOAD_HUC;
+   return;
+   }
+
+   /* Default: enable HuC authentication and GuC submission */
+   i915->params.enable_guc = ENABLE_GUC_LOAD_HUC | ENABLE_GUC_SUBMISSION;
 }
 
 /* Reset GuC providing us with fresh state for both GuC and HuC.
@@ -313,9 +319,6 @@ static int __uc_init(struct intel_uc *uc)
if (i915_inject_probe_failure(uc_to_gt(uc)->i915))
return -ENOMEM;
 
-   /* XXX: GuC submission is unavailable for now */
-   GEM_BUG_ON(intel_uc_uses_guc_submission(uc));
-
ret = intel_guc_init(guc);
if (ret)
return ret;
-- 
2.28.0



[PATCH 50/51] drm/i915/guc: Implement GuC priority management

2021-07-16 Thread Matthew Brost
Implement a simple static mapping algorithm of the i915 priority levels
(int, -1k to 1k exposed to user) to the 4 GuC levels. Mapping is as
follows:

i915 level < 0  -> GuC low level (3)
i915 level == 0 -> GuC normal level  (2)
i915 level < INT_MAX-> GuC high level(1)
i915 level == INT_MAX   -> GuC highest level (0)

We believe this mapping should cover the UMD use cases (3 distinct user
levels + 1 kernel level).

In addition to static mapping, a simple counter system is attached to
each context tracking the number of requests inflight on the context at
each level. This is needed as the GuC levels are per context while in
the i915 levels are per request.

Signed-off-by: Matthew Brost 
Cc: Daniele Ceraolo Spurio 
---
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c   |   3 +
 drivers/gpu/drm/i915/gt/intel_context_types.h |   9 +-
 drivers/gpu/drm/i915/gt/intel_engine_user.c   |   4 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 207 +-
 drivers/gpu/drm/i915/i915_request.c   |   5 +
 drivers/gpu/drm/i915/i915_request.h   |   8 +
 drivers/gpu/drm/i915/i915_scheduler.c |   7 +
 drivers/gpu/drm/i915/i915_scheduler_types.h   |  12 +
 drivers/gpu/drm/i915/i915_trace.h |  16 +-
 include/uapi/drm/i915_drm.h   |   9 +
 10 files changed, 274 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c 
b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 2007dc6f6b99..209cf265bf74 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -245,6 +245,9 @@ static void signal_irq_work(struct irq_work *work)
llist_entry(signal, typeof(*rq), signal_node);
struct list_head cb_list;
 
+   if (rq->engine->sched_engine->retire_inflight_request_prio)
+   
rq->engine->sched_engine->retire_inflight_request_prio(rq);
+
spin_lock(>lock);
list_replace(>fence.cb_list, _list);
__dma_fence_signal__timestamp(>fence, timestamp);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 005a64f2afa7..fe51c2d2 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -18,8 +18,9 @@
 #include "intel_engine_types.h"
 #include "intel_sseu.h"
 
-#define CONTEXT_REDZONE POISON_INUSE
+#include "uc/intel_guc_fwif.h"
 
+#define CONTEXT_REDZONE POISON_INUSE
 DECLARE_EWMA(runtime, 3, 8);
 
 struct i915_gem_context;
@@ -191,6 +192,12 @@ struct intel_context {
 
/* GuC context blocked fence */
struct i915_sw_fence guc_blocked;
+
+   /*
+* GuC priority management
+*/
+   u8 guc_prio;
+   u32 guc_prio_count[GUC_CLIENT_PRIORITY_NUM];
 };
 
 #endif /* __INTEL_CONTEXT_TYPES__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c 
b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 84142127ebd8..8f8bea08e734 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -11,6 +11,7 @@
 #include "intel_engine.h"
 #include "intel_engine_user.h"
 #include "intel_gt.h"
+#include "uc/intel_guc_submission.h"
 
 struct intel_engine_cs *
 intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
@@ -115,6 +116,9 @@ static void set_scheduler_caps(struct drm_i915_private 
*i915)
disabled |= (I915_SCHEDULER_CAP_ENABLED |
 I915_SCHEDULER_CAP_PRIORITY);
 
+   if (intel_uc_uses_guc_submission(>gt.uc))
+   enabled |= I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP;
+
for (i = 0; i < ARRAY_SIZE(map); i++) {
if (engine->flags & BIT(map[i].engine))
enabled |= BIT(map[i].sched);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 536fdbc406c6..263ad6a9e4a9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -81,7 +81,8 @@ guc_create_virtual(struct intel_engine_cs **siblings, 
unsigned int count);
  */
 #define SCHED_STATE_NO_LOCK_ENABLEDBIT(0)
 #define SCHED_STATE_NO_LOCK_PENDING_ENABLE BIT(1)
-#define SCHED_STATE_NO_LOCK_BLOCKED_SHIFT  2
+#define SCHED_STATE_NO_LOCK_REGISTERED BIT(2)
+#define SCHED_STATE_NO_LOCK_BLOCKED_SHIFT  3
 #define SCHED_STATE_NO_LOCK_BLOCKED \
BIT(SCHED_STATE_NO_LOCK_BLOCKED_SHIFT)
 #define SCHED_STATE_NO_LOCK_BLOCKED_MASK \
@@ -142,6 +143,24 @@ static inline void decr_context_blocked(struct 
intel_context *ce)
   >guc_sched_state_no_lock);
 }
 
+static inline bool context_registered(struct intel_context *ce)
+{
+   return 

[PATCH 45/51] drm/i915/selftest: Fix workarounds selftest for GuC submission

2021-07-16 Thread Matthew Brost
From: Rahul Kumar Singh 

When GuC submission is enabled, the GuC controls engine resets. Rather
than explicitly triggering a reset, the driver must submit a hanging
context to GuC and wait for the reset to occur.

Signed-off-by: Rahul Kumar Singh 
Signed-off-by: John Harrison 
Signed-off-by: Matthew Brost 
Cc: Daniele Ceraolo Spurio 
Cc: Matthew Brost 
---
 drivers/gpu/drm/i915/Makefile |   1 +
 .../gpu/drm/i915/gt/selftest_workarounds.c| 130 +-
 .../i915/selftests/intel_scheduler_helpers.c  |  76 ++
 .../i915/selftests/intel_scheduler_helpers.h  |  28 
 4 files changed, 201 insertions(+), 34 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c
 create mode 100644 drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 10b3bb6207ba..ab7679957623 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -280,6 +280,7 @@ i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
 i915-$(CONFIG_DRM_I915_SELFTEST) += \
gem/selftests/i915_gem_client_blt.o \
gem/selftests/igt_gem_utils.o \
+   selftests/intel_scheduler_helpers.o \
selftests/i915_random.o \
selftests/i915_selftest.o \
selftests/igt_atomic.o \
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c 
b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 7ebc4edb8ecf..7727bc531ea9 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -12,6 +12,7 @@
 #include "selftests/igt_flush_test.h"
 #include "selftests/igt_reset.h"
 #include "selftests/igt_spinner.h"
+#include "selftests/intel_scheduler_helpers.h"
 #include "selftests/mock_drm.h"
 
 #include "gem/selftests/igt_gem_utils.h"
@@ -261,28 +262,34 @@ static int do_engine_reset(struct intel_engine_cs *engine)
return intel_engine_reset(engine, "live_workarounds");
 }
 
+static int do_guc_reset(struct intel_engine_cs *engine)
+{
+   /* Currently a no-op as the reset is handled by GuC */
+   return 0;
+}
+
 static int
 switch_to_scratch_context(struct intel_engine_cs *engine,
- struct igt_spinner *spin)
+ struct igt_spinner *spin,
+ struct i915_request **rq)
 {
struct intel_context *ce;
-   struct i915_request *rq;
int err = 0;
 
ce = intel_context_create(engine);
if (IS_ERR(ce))
return PTR_ERR(ce);
 
-   rq = igt_spinner_create_request(spin, ce, MI_NOOP);
+   *rq = igt_spinner_create_request(spin, ce, MI_NOOP);
intel_context_put(ce);
 
-   if (IS_ERR(rq)) {
+   if (IS_ERR(*rq)) {
spin = NULL;
-   err = PTR_ERR(rq);
+   err = PTR_ERR(*rq);
goto err;
}
 
-   err = request_add_spin(rq, spin);
+   err = request_add_spin(*rq, spin);
 err:
if (err && spin)
igt_spinner_end(spin);
@@ -296,6 +303,7 @@ static int check_whitelist_across_reset(struct 
intel_engine_cs *engine,
 {
struct intel_context *ce, *tmp;
struct igt_spinner spin;
+   struct i915_request *rq;
intel_wakeref_t wakeref;
int err;
 
@@ -316,13 +324,24 @@ static int check_whitelist_across_reset(struct 
intel_engine_cs *engine,
goto out_spin;
}
 
-   err = switch_to_scratch_context(engine, );
+   err = switch_to_scratch_context(engine, , );
if (err)
goto out_spin;
 
+   /* Ensure the spinner hasn't aborted */
+   if (i915_request_completed(rq)) {
+   pr_err("%s spinner failed to start\n", name);
+   err = -ETIMEDOUT;
+   goto out_spin;
+   }
+
with_intel_runtime_pm(engine->uncore->rpm, wakeref)
err = reset(engine);
 
+   /* Ensure the reset happens and kills the engine */
+   if (err == 0)
+   err = intel_selftest_wait_for_rq(rq);
+
igt_spinner_end();
 
if (err) {
@@ -787,9 +806,26 @@ static int live_reset_whitelist(void *arg)
continue;
 
if (intel_has_reset_engine(gt)) {
-   err = check_whitelist_across_reset(engine,
-  do_engine_reset,
-  "engine");
+   if (intel_engine_uses_guc(engine)) {
+   struct intel_selftest_saved_policy saved;
+   int err2;
+
+   err = intel_selftest_modify_policy(engine, 
);
+   if(err)
+   goto out;
+
+   err = check_whitelist_across_reset(engine,
+  

[PATCH 47/51] drm/i915/selftest: Increase some timeouts in live_requests

2021-07-16 Thread Matthew Brost
Requests may take slightly longer with GuC submission, let's increase
the timeouts in live_requests.

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/selftests/i915_request.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c 
b/drivers/gpu/drm/i915/selftests/i915_request.c
index bd5c96a77ba3..d67710d10615 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -1313,7 +1313,7 @@ static int __live_parallel_engine1(void *arg)
i915_request_add(rq);
 
err = 0;
-   if (i915_request_wait(rq, 0, HZ / 5) < 0)
+   if (i915_request_wait(rq, 0, HZ) < 0)
err = -ETIME;
i915_request_put(rq);
if (err)
@@ -1419,7 +1419,7 @@ static int __live_parallel_spin(void *arg)
}
igt_spinner_end();
 
-   if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0)
+   if (err == 0 && i915_request_wait(rq, 0, HZ) < 0)
err = -EIO;
i915_request_put(rq);
 
-- 
2.28.0



[PATCH 43/51] drm/i915/guc: Support request cancellation

2021-07-16 Thread Matthew Brost
This adds GuC backend support for i915_request_cancel(), which in turn
makes CONFIG_DRM_I915_REQUEST_TIMEOUT work.

Signed-off-by: Matthew Brost 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gt/intel_context.c   |   9 +
 drivers/gpu/drm/i915/gt/intel_context.h   |   7 +
 drivers/gpu/drm/i915/gt/intel_context_types.h |   7 +
 .../drm/i915/gt/intel_execlists_submission.c  |  18 ++
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 169 ++
 drivers/gpu/drm/i915/i915_request.c   |  14 +-
 6 files changed, 211 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index dd078a80c3a3..b1e3d00fb1f2 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -366,6 +366,12 @@ static int __intel_context_active(struct i915_active 
*active)
return 0;
 }
 
+static int sw_fence_dummy_notify(struct i915_sw_fence *sf,
+enum i915_sw_fence_notify state)
+{
+   return NOTIFY_DONE;
+}
+
 void
 intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
 {
@@ -399,6 +405,9 @@ intel_context_init(struct intel_context *ce, struct 
intel_engine_cs *engine)
ce->guc_id = GUC_INVALID_LRC_ID;
INIT_LIST_HEAD(>guc_id_link);
 
+   i915_sw_fence_init(>guc_blocked, sw_fence_dummy_notify);
+   i915_sw_fence_commit(>guc_blocked);
+
i915_active_init(>active,
 __intel_context_active, __intel_context_retire, 0);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h 
b/drivers/gpu/drm/i915/gt/intel_context.h
index 814d9277096a..876bdb08303c 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -70,6 +70,13 @@ intel_context_is_pinned(struct intel_context *ce)
return atomic_read(>pin_count);
 }
 
+static inline void intel_context_cancel_request(struct intel_context *ce,
+   struct i915_request *rq)
+{
+   GEM_BUG_ON(!ce->ops->cancel_request);
+   return ce->ops->cancel_request(ce, rq);
+}
+
 /**
  * intel_context_unlock_pinned - Releases the earlier locking of 'pinned' 
status
  * @ce - the context
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 57c19ee3e313..005a64f2afa7 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -13,6 +13,7 @@
 #include 
 
 #include "i915_active_types.h"
+#include "i915_sw_fence.h"
 #include "i915_utils.h"
 #include "intel_engine_types.h"
 #include "intel_sseu.h"
@@ -42,6 +43,9 @@ struct intel_context_ops {
void (*unpin)(struct intel_context *ce);
void (*post_unpin)(struct intel_context *ce);
 
+   void (*cancel_request)(struct intel_context *ce,
+  struct i915_request *rq);
+
void (*enter)(struct intel_context *ce);
void (*exit)(struct intel_context *ce);
 
@@ -184,6 +188,9 @@ struct intel_context {
 * GuC ID link - in list when unpinned but guc_id still valid in GuC
 */
struct list_head guc_id_link;
+
+   /* GuC context blocked fence */
+   struct i915_sw_fence guc_blocked;
 };
 
 #endif /* __INTEL_CONTEXT_TYPES__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c 
b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index f9b5f54a5abe..8f6dc0fb49a6 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -114,6 +114,7 @@
 #include "gen8_engine_cs.h"
 #include "intel_breadcrumbs.h"
 #include "intel_context.h"
+#include "intel_engine_heartbeat.h"
 #include "intel_engine_pm.h"
 #include "intel_engine_stats.h"
 #include "intel_execlists_submission.h"
@@ -2536,11 +2537,26 @@ static int execlists_context_alloc(struct intel_context 
*ce)
return lrc_alloc(ce, ce->engine);
 }
 
+static void execlists_context_cancel_request(struct intel_context *ce,
+struct i915_request *rq)
+{
+   struct intel_engine_cs *engine = NULL;
+
+   i915_request_active_engine(rq, );
+
+   if (engine && intel_engine_pulse(engine))
+   intel_gt_handle_error(engine->gt, engine->mask, 0,
+ "request cancellation by %s",
+ current->comm);
+}
+
 static const struct intel_context_ops execlists_context_ops = {
.flags = COPS_HAS_INFLIGHT,
 
.alloc = execlists_context_alloc,
 
+   .cancel_request = execlists_context_cancel_request,
+
.pre_pin = execlists_context_pre_pin,
.pin = execlists_context_pin,
.unpin = lrc_unpin,
@@ -3558,6 +3574,8 @@ static const struct intel_context_ops virtual_context_ops 
= {
 
.alloc = virtual_context_alloc,
 
+   .cancel_request = 

[PATCH 44/51] drm/i915/selftest: Better error reporting from hangcheck selftest

2021-07-16 Thread Matthew Brost
From: John Harrison 

There are many ways in which the hangcheck selftest can fail. Very few
of them actually printed an error message to say what happened. So,
fill in the missing messages.

Signed-off-by: John Harrison 
Signed-off-by: Matthew Brost 
Cc: Daniele Ceraolo Spurio 
---
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 89 
 1 file changed, 72 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c 
b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 7aea10aa1fb4..0ed87cc4d063 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -378,6 +378,7 @@ static int igt_reset_nop(void *arg)
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
+   pr_err("[%s] Create context failed: %d!\n", 
engine->name, err);
break;
}
 
@@ -387,6 +388,7 @@ static int igt_reset_nop(void *arg)
rq = intel_context_create_request(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
+   pr_err("[%s] Create request failed: 
%d!\n", engine->name, err);
break;
}
 
@@ -401,24 +403,31 @@ static int igt_reset_nop(void *arg)
igt_global_reset_unlock(gt);
 
if (intel_gt_is_wedged(gt)) {
+   pr_err("[%s] GT is wedged!\n", engine->name);
err = -EIO;
break;
}
 
if (i915_reset_count(global) != reset_count + ++count) {
-   pr_err("Full GPU reset not recorded!\n");
+   pr_err("[%s] Reset not recorded: %d vs %d + %d!\n",
+  engine->name, i915_reset_count(global), 
reset_count, count);
err = -EINVAL;
break;
}
 
err = igt_flush_test(gt->i915);
-   if (err)
+   if (err) {
+   pr_err("[%s] Flush failed: %d!\n", engine->name, err);
break;
+   }
} while (time_before(jiffies, end_time));
pr_info("%s: %d resets\n", __func__, count);
 
-   if (igt_flush_test(gt->i915))
+   if (igt_flush_test(gt->i915)) {
+   pr_err("Post flush failed: %d!\n", err);
err = -EIO;
+   }
+
return err;
 }
 
@@ -441,8 +450,10 @@ static int igt_reset_nop_engine(void *arg)
int err;
 
ce = intel_context_create(engine);
-   if (IS_ERR(ce))
+   if (IS_ERR(ce)) {
+   pr_err("[%s] Create context failed: %d!\n", 
engine->name, err);
return PTR_ERR(ce);
+   }
 
reset_count = i915_reset_count(global);
reset_engine_count = i915_reset_engine_count(global, engine);
@@ -550,8 +561,10 @@ static int igt_reset_fail_engine(void *arg)
int err;
 
ce = intel_context_create(engine);
-   if (IS_ERR(ce))
+   if (IS_ERR(ce)) {
+   pr_err("[%s] Create context failed: %d!\n", 
engine->name, err);
return PTR_ERR(ce);
+   }
 
st_engine_heartbeat_disable(engine);
set_bit(I915_RESET_ENGINE + id, >reset.flags);
@@ -711,6 +724,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool 
active)
rq = hang_create_request(, engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
+   pr_err("[%s] Create hang request 
failed: %d!\n", engine->name, err);
break;
}
 
@@ -765,12 +779,16 @@ static int __igt_reset_engine(struct intel_gt *gt, bool 
active)
break;
 
err = igt_flush_test(gt->i915);
-   if (err)
+   if (err) {
+   pr_err("[%s] Flush failed: %d!\n", engine->name, err);
break;
+   }
}
 
-   if (intel_gt_is_wedged(gt))
+   if (intel_gt_is_wedged(gt)) {
+   pr_err("GT is wedged!\n");
err = -EIO;
+   }
 
if (active)
hang_fini();
@@ -837,6 +855,7 @@ static int active_engine(void *data)
ce[count] = intel_context_create(engine);
if (IS_ERR(ce[count])) {
err = PTR_ERR(ce[count]);
+   pr_err("[%s] Create context #%ld failed: %d!\n", 
engine->name, count, err);
   

[PATCH 14/51] drm/i915/guc: Ensure G2H response has space in buffer

2021-07-16 Thread Matthew Brost
Ensure G2H response has space in the buffer before sending H2G CTB as
the GuC can't handle any backpressure on the G2H interface.

v2:
 (Matthew)
  - s/INTEL_GUC_SEND/INTEL_GUC_CT_SEND
v3:
 (Matthew)
  - Add G2H credit accounting to blocking path, add g2h_release_space
helper
 (John H)
  - CTB_G2H_BUFFER_SIZE / 4 == G2H_ROOM_BUFFER_SIZE

Signed-off-by: John Harrison 
Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|  8 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 91 +++
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  9 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |  4 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 13 ++-
 5 files changed, 99 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 03b7222b04a2..80b88bae5f24 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -96,10 +96,11 @@ inline int intel_guc_send(struct intel_guc *guc, const u32 
*action, u32 len)
 }
 
 static
-inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, u32 len)
+inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, u32 len,
+u32 g2h_len_dw)
 {
return intel_guc_ct_send(>ct, action, len, NULL, 0,
-INTEL_GUC_CT_SEND_NB);
+MAKE_SEND_FLAGS(g2h_len_dw));
 }
 
 static inline int
@@ -113,6 +114,7 @@ intel_guc_send_and_receive(struct intel_guc *guc, const u32 
*action, u32 len,
 static inline int intel_guc_send_busy_loop(struct intel_guc* guc,
   const u32 *action,
   u32 len,
+  u32 g2h_len_dw,
   bool loop)
 {
int err;
@@ -123,7 +125,7 @@ static inline int intel_guc_send_busy_loop(struct 
intel_guc* guc,
might_sleep_if(loop && not_atomic);
 
 retry:
-   err = intel_guc_send_nb(guc, action, len);
+   err = intel_guc_send_nb(guc, action, len, g2h_len_dw);
if (unlikely(err == -EBUSY && loop)) {
if (likely(not_atomic)) {
if (msleep_interruptible(sleep_period_ms))
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 019b25ff1888..c33906ec478d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -73,6 +73,7 @@ static inline struct drm_device *ct_to_drm(struct 
intel_guc_ct *ct)
 #define CTB_DESC_SIZE  ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K)
 #define CTB_H2G_BUFFER_SIZE(SZ_4K)
 #define CTB_G2H_BUFFER_SIZE(4 * CTB_H2G_BUFFER_SIZE)
+#define G2H_ROOM_BUFFER_SIZE   (CTB_G2H_BUFFER_SIZE / 4)
 
 struct ct_request {
struct list_head link;
@@ -129,23 +130,27 @@ static void guc_ct_buffer_desc_init(struct 
guc_ct_buffer_desc *desc)
 
 static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb)
 {
+   u32 space;
+
ctb->broken = false;
ctb->tail = 0;
ctb->head = 0;
-   ctb->space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size);
+   space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size) - ctb->resv_space;
+   atomic_set(>space, space);
 
guc_ct_buffer_desc_init(ctb->desc);
 }
 
 static void guc_ct_buffer_init(struct intel_guc_ct_buffer *ctb,
   struct guc_ct_buffer_desc *desc,
-  u32 *cmds, u32 size_in_bytes)
+  u32 *cmds, u32 size_in_bytes, u32 resv_space)
 {
GEM_BUG_ON(size_in_bytes % 4);
 
ctb->desc = desc;
ctb->cmds = cmds;
ctb->size = size_in_bytes / 4;
+   ctb->resv_space = resv_space / 4;
 
guc_ct_buffer_reset(ctb);
 }
@@ -226,6 +231,7 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
struct guc_ct_buffer_desc *desc;
u32 blob_size;
u32 cmds_size;
+   u32 resv_space;
void *blob;
u32 *cmds;
int err;
@@ -250,19 +256,23 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
desc = blob;
cmds = blob + 2 * CTB_DESC_SIZE;
cmds_size = CTB_H2G_BUFFER_SIZE;
-   CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u\n", "send",
-ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size);
+   resv_space = 0;
+   CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "send",
+ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size,
+resv_space);
 
-   guc_ct_buffer_init(>ctbs.send, desc, cmds, cmds_size);
+   guc_ct_buffer_init(>ctbs.send, desc, cmds, cmds_size, resv_space);
 
/* store pointers to desc and cmds for recv ctb */
desc = blob + CTB_DESC_SIZE;
cmds = blob + 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE;
cmds_size = 

[PATCH 36/51] drm/i915/guc: Capture error state on context reset

2021-07-16 Thread Matthew Brost
We receive notification of an engine reset from GuC at its
completion. Meaning GuC has potentially cleared any HW state
we may have been interested in capturing. GuC resumes scheduling
on the engine post-reset, as the resets are meant to be transparent,
further muddling our error state.

There is ongoing work to define an API for a GuC debug state dump. The
suggestion for now is to manually disable FW initiated resets in cases
where debug state is needed.

Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/intel_context.c   | 20 +++
 drivers/gpu/drm/i915/gt/intel_context.h   |  3 ++
 drivers/gpu/drm/i915/gt/intel_engine.h| 21 ++-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 11 --
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  2 ++
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 35 +--
 drivers/gpu/drm/i915/i915_gpu_error.c | 25 ++---
 7 files changed, 91 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index bfb05d8697d1..dd078a80c3a3 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -515,6 +515,26 @@ struct i915_request *intel_context_create_request(struct 
intel_context *ce)
return rq;
 }
 
+struct i915_request *intel_context_find_active_request(struct intel_context 
*ce)
+{
+   struct i915_request *rq, *active = NULL;
+   unsigned long flags;
+
+   GEM_BUG_ON(!intel_engine_uses_guc(ce->engine));
+
+   spin_lock_irqsave(>guc_active.lock, flags);
+   list_for_each_entry_reverse(rq, >guc_active.requests,
+   sched.link) {
+   if (i915_request_completed(rq))
+   break;
+
+   active = rq;
+   }
+   spin_unlock_irqrestore(>guc_active.lock, flags);
+
+   return active;
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftest_context.c"
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h 
b/drivers/gpu/drm/i915/gt/intel_context.h
index 974ef85320c2..2ed9bf5f91a5 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -200,6 +200,9 @@ int intel_context_prepare_remote_request(struct 
intel_context *ce,
 
 struct i915_request *intel_context_create_request(struct intel_context *ce);
 
+struct i915_request *
+intel_context_find_active_request(struct intel_context *ce);
+
 static inline bool intel_context_is_barrier(const struct intel_context *ce)
 {
return test_bit(CONTEXT_BARRIER_BIT, >flags);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h 
b/drivers/gpu/drm/i915/gt/intel_engine.h
index edbde6171bca..8b5425612e8b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -245,7 +245,7 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs 
*engine,
   ktime_t *now);
 
 struct i915_request *
-intel_engine_find_active_request(struct intel_engine_cs *engine);
+intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine);
 
 u32 intel_engine_context_size(struct intel_gt *gt, u8 class);
 struct intel_context *
@@ -310,4 +310,23 @@ intel_engine_get_sibling(struct intel_engine_cs *engine, 
unsigned int sibling)
return engine->cops->get_sibling(engine, sibling);
 }
 
+static inline void
+intel_engine_set_hung_context(struct intel_engine_cs *engine,
+ struct intel_context *ce)
+{
+   engine->hung_ce = ce;
+}
+
+static inline void
+intel_engine_clear_hung_context(struct intel_engine_cs *engine)
+{
+   intel_engine_set_hung_context(engine, NULL);
+}
+
+static inline struct intel_context *
+intel_engine_get_hung_context(struct intel_engine_cs *engine)
+{
+   return engine->hung_ce;
+}
+
 #endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index d95d666407f5..c1f2e57aa789 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1672,7 +1672,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
drm_printf(m, "\tRequests:\n");
 
spin_lock_irqsave(>sched_engine->lock, flags);
-   rq = intel_engine_find_active_request(engine);
+   rq = intel_engine_execlist_find_hung_request(engine);
if (rq) {
struct intel_timeline *tl = get_timeline(rq);
 
@@ -1783,10 +1783,17 @@ static bool match_ring(struct i915_request *rq)
 }
 
 struct i915_request *
-intel_engine_find_active_request(struct intel_engine_cs *engine)
+intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine)
 {
struct i915_request *request, *active = NULL;
 
+   /*
+* This search does not work in GuC submission mode. However, the GuC
+* will report the hanging context directly to the driver 

[PATCH 37/51] drm/i915/guc: Fix for error capture after full GPU reset with GuC

2021-07-16 Thread Matthew Brost
From: John Harrison 

In the case of a full GPU reset (e.g. because GuC has died or because
GuC's hang detection has been disabled), the driver can't rely on GuC
reporting the guilty context. Instead, the driver needs to scan all
active contexts and find one that is currently executing, as per the
execlist mode behaviour. In GuC mode, this scan is different to
execlist mode as the active request list is handled very differently.

Similarly, the request state dump in debugfs needs to be handled
differently when in GuC submission mode.

Also refactured some of the request scanning code to avoid duplication
across the multiple code paths that are now replicating it.

Signed-off-by: John Harrison 
Signed-off-by: Matthew Brost 
Reviewed-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/intel_engine.h|   3 +
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 139 --
 .../gpu/drm/i915/gt/intel_engine_heartbeat.c  |   8 +
 drivers/gpu/drm/i915/gt/intel_reset.c |   2 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|   2 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  67 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.h |   3 +
 drivers/gpu/drm/i915/i915_request.c   |  41 ++
 drivers/gpu/drm/i915/i915_request.h   |  11 ++
 9 files changed, 229 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h 
b/drivers/gpu/drm/i915/gt/intel_engine.h
index 8b5425612e8b..2310ccda8058 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -240,6 +240,9 @@ __printf(3, 4)
 void intel_engine_dump(struct intel_engine_cs *engine,
   struct drm_printer *m,
   const char *header, ...);
+void intel_engine_dump_active_requests(struct list_head *requests,
+  struct i915_request *hung_rq,
+  struct drm_printer *m);
 
 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine,
   ktime_t *now);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index c1f2e57aa789..51a0d860d551 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1625,6 +1625,97 @@ static void print_properties(struct intel_engine_cs 
*engine,
   read_ul(>defaults, p->offset));
 }
 
+static void engine_dump_request(struct i915_request *rq, struct drm_printer 
*m, const char *msg)
+{
+   struct intel_timeline *tl = get_timeline(rq);
+
+   i915_request_show(m, rq, msg, 0);
+
+   drm_printf(m, "\t\tring->start:  0x%08x\n",
+  i915_ggtt_offset(rq->ring->vma));
+   drm_printf(m, "\t\tring->head:   0x%08x\n",
+  rq->ring->head);
+   drm_printf(m, "\t\tring->tail:   0x%08x\n",
+  rq->ring->tail);
+   drm_printf(m, "\t\tring->emit:   0x%08x\n",
+  rq->ring->emit);
+   drm_printf(m, "\t\tring->space:  0x%08x\n",
+  rq->ring->space);
+
+   if (tl) {
+   drm_printf(m, "\t\tring->hwsp:   0x%08x\n",
+  tl->hwsp_offset);
+   intel_timeline_put(tl);
+   }
+
+   print_request_ring(m, rq);
+
+   if (rq->context->lrc_reg_state) {
+   drm_printf(m, "Logical Ring Context:\n");
+   hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE);
+   }
+}
+
+void intel_engine_dump_active_requests(struct list_head *requests,
+  struct i915_request *hung_rq,
+  struct drm_printer *m)
+{
+   struct i915_request *rq;
+   const char *msg;
+   enum i915_request_state state;
+
+   list_for_each_entry(rq, requests, sched.link) {
+   if (rq == hung_rq)
+   continue;
+
+   state = i915_test_request_state(rq);
+   if (state < I915_REQUEST_QUEUED)
+   continue;
+
+   if (state == I915_REQUEST_ACTIVE)
+   msg = "\t\tactive on engine";
+   else
+   msg = "\t\tactive in queue";
+
+   engine_dump_request(rq, m, msg);
+   }
+}
+
+static void engine_dump_active_requests(struct intel_engine_cs *engine, struct 
drm_printer *m)
+{
+   struct i915_request *hung_rq = NULL;
+   struct intel_context *ce;
+   bool guc;
+
+   /*
+* No need for an engine->irq_seqno_barrier() before the seqno reads.
+* The GPU is still running so requests are still executing and any
+* hardware reads will be out of date by the time they are reported.
+* But the intention here is just to report an instantaneous snapshot
+* so that's fine.
+*/
+   lockdep_assert_held(>sched_engine->lock);
+
+   drm_printf(m, "\tRequests:\n");
+
+   guc = 

[PATCH 32/51] drm/i915/guc: Enable the timer expired interrupt for GuC

2021-07-16 Thread Matthew Brost
The GuC can implement execution qunatums, detect hung contexts and
other such things but it requires the timer expired interrupt to do so.

Signed-off-by: Matthew Brost 
CC: John Harrison 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/intel_rps.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 06e9a8ed4e03..0c8e7f2b06f0 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1877,6 +1877,10 @@ void intel_rps_init(struct intel_rps *rps)
 
if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) < 11)
rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
+
+   /* GuC needs ARAT expired interrupt unmasked */
+   if (intel_uc_uses_guc_submission(_to_gt(rps)->uc))
+   rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK;
 }
 
 void intel_rps_sanitize(struct intel_rps *rps)
-- 
2.28.0



[PATCH 19/51] drm/i915/guc: GuC virtual engines

2021-07-16 Thread Matthew Brost
Implement GuC virtual engines. Rather simple implementation, basically
just allocate an engine, setup context enter / exit function to virtual
engine specific functions, set all other variables / functions to guc
versions, and set the engine mask to that of all the siblings.

v2: Update to work with proto-ctx

Cc: Daniele Ceraolo Spurio 
Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |   8 +-
 drivers/gpu/drm/i915/gem/i915_gem_context.h   |   1 +
 drivers/gpu/drm/i915/gt/intel_context_types.h |   6 +
 drivers/gpu/drm/i915/gt/intel_engine.h|  27 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c |  14 +
 .../drm/i915/gt/intel_execlists_submission.c  |  29 ++-
 .../drm/i915/gt/intel_execlists_submission.h  |   4 -
 drivers/gpu/drm/i915/gt/selftest_execlists.c  |  12 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 240 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.h |   2 +
 10 files changed, 308 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 64659802d4df..edefe299bd76 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -74,7 +74,6 @@
 #include "gt/intel_context_param.h"
 #include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_engine_user.h"
-#include "gt/intel_execlists_submission.h" /* virtual_engine */
 #include "gt/intel_gpu_commands.h"
 #include "gt/intel_ring.h"
 
@@ -363,9 +362,6 @@ set_proto_ctx_engines_balance(struct i915_user_extension 
__user *base,
if (!HAS_EXECLISTS(i915))
return -ENODEV;
 
-   if (intel_uc_uses_guc_submission(>gt.uc))
-   return -ENODEV; /* not implement yet */
-
if (get_user(idx, >engine_index))
return -EFAULT;
 
@@ -950,8 +946,8 @@ static struct i915_gem_engines *user_engines(struct 
i915_gem_context *ctx,
break;
 
case I915_GEM_ENGINE_TYPE_BALANCED:
-   ce = intel_execlists_create_virtual(pe[n].siblings,
-   pe[n].num_siblings);
+   ce = intel_engine_create_virtual(pe[n].siblings,
+pe[n].num_siblings);
break;
 
case I915_GEM_ENGINE_TYPE_INVALID:
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 20411db84914..2639c719a7a6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -10,6 +10,7 @@
 #include "i915_gem_context_types.h"
 
 #include "gt/intel_context.h"
+#include "gt/intel_engine.h"
 
 #include "i915_drv.h"
 #include "i915_gem.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 4a5518d295c2..542c98418771 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -47,6 +47,12 @@ struct intel_context_ops {
 
void (*reset)(struct intel_context *ce);
void (*destroy)(struct kref *kref);
+
+   /* virtual engine/context interface */
+   struct intel_context *(*create_virtual)(struct intel_engine_cs **engine,
+   unsigned int count);
+   struct intel_engine_cs *(*get_sibling)(struct intel_engine_cs *engine,
+  unsigned int sibling);
 };
 
 struct intel_context {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h 
b/drivers/gpu/drm/i915/gt/intel_engine.h
index f911c1224ab2..9fec0aca5f4b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -273,13 +273,38 @@ intel_engine_has_preempt_reset(const struct 
intel_engine_cs *engine)
return intel_engine_has_preemption(engine);
 }
 
+struct intel_context *
+intel_engine_create_virtual(struct intel_engine_cs **siblings,
+   unsigned int count);
+
+static inline bool
+intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine)
+{
+   if (intel_engine_uses_guc(engine))
+   return intel_guc_virtual_engine_has_heartbeat(engine);
+   else
+   GEM_BUG_ON("Only should be called in GuC submission");
+
+   return false;
+}
+
 static inline bool
 intel_engine_has_heartbeat(const struct intel_engine_cs *engine)
 {
if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
return false;
 
-   return READ_ONCE(engine->props.heartbeat_interval_ms);
+   if (intel_engine_is_virtual(engine))
+   return intel_virtual_engine_has_heartbeat(engine);
+   else
+   return READ_ONCE(engine->props.heartbeat_interval_ms);
+}
+
+static inline struct intel_engine_cs *
+intel_engine_get_sibling(struct intel_engine_cs *engine, unsigned int 

[PATCH 34/51] drm/i915/guc: Don't complain about reset races

2021-07-16 Thread Matthew Brost
From: John Harrison 

It is impossible to seal all race conditions of resets occurring
concurrent to other operations. At least, not without introducing
excesive mutex locking. Instead, don't complain if it occurs. In
particular, don't complain if trying to send a H2G during a reset.
Whatever the H2G was about should get redone once the reset is over.

Signed-off-by: John Harrison 
Signed-off-by: Matthew Brost 
Reviewed-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 5 -
 drivers/gpu/drm/i915/gt/uc/intel_uc.c | 3 +++
 drivers/gpu/drm/i915/gt/uc/intel_uc.h | 2 ++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index d16381784ee2..92976d205478 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -757,7 +757,10 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 
*action, u32 len,
int ret;
 
if (unlikely(!ct->enabled)) {
-   WARN(1, "Unexpected send: action=%#x\n", *action);
+   struct intel_guc *guc = ct_to_guc(ct);
+   struct intel_uc *uc = container_of(guc, struct intel_uc, guc);
+
+   WARN(!uc->reset_in_progress, "Unexpected send: action=%#x\n", 
*action);
return -ENODEV;
}
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index b523a8521351..77c1fe2ed883 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -550,6 +550,7 @@ void intel_uc_reset_prepare(struct intel_uc *uc)
 {
struct intel_guc *guc = >guc;
 
+   uc->reset_in_progress = true;
 
/* Nothing to do if GuC isn't supported */
if (!intel_uc_supports_guc(uc))
@@ -579,6 +580,8 @@ void intel_uc_reset_finish(struct intel_uc *uc)
 {
struct intel_guc *guc = >guc;
 
+   uc->reset_in_progress = false;
+
/* Firmware expected to be running when this function is called */
if (intel_guc_is_fw_running(guc) && intel_uc_uses_guc_submission(uc))
intel_guc_submission_reset_finish(guc);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
index eaa3202192ac..91315e3f1c58 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
@@ -30,6 +30,8 @@ struct intel_uc {
 
/* Snapshot of GuC log from last failed load */
struct drm_i915_gem_object *load_err_log;
+
+   bool reset_in_progress;
 };
 
 void intel_uc_init_early(struct intel_uc *uc);
-- 
2.28.0



[PATCH 27/51] drm/i915: Reset GPU immediately if submission is disabled

2021-07-16 Thread Matthew Brost
If submission is disabled by the backend for any reason, reset the GPU
immediately in the heartbeat code as the backend can't be reenabled
until the GPU is reset.

Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 .../gpu/drm/i915/gt/intel_engine_heartbeat.c  | 63 +++
 .../gpu/drm/i915/gt/intel_engine_heartbeat.h  |  4 ++
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  9 +++
 drivers/gpu/drm/i915/i915_scheduler.c |  6 ++
 drivers/gpu/drm/i915/i915_scheduler.h |  6 ++
 drivers/gpu/drm/i915/i915_scheduler_types.h   |  5 ++
 6 files changed, 80 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c 
b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index b6a305e6a974..a8495364d906 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -70,12 +70,30 @@ static void show_heartbeat(const struct i915_request *rq,
 {
struct drm_printer p = drm_debug_printer("heartbeat");
 
-   intel_engine_dump(engine, ,
- "%s heartbeat {seqno:%llx:%lld, prio:%d} not 
ticking\n",
- engine->name,
- rq->fence.context,
- rq->fence.seqno,
- rq->sched.attr.priority);
+   if (!rq) {
+   intel_engine_dump(engine, ,
+ "%s heartbeat not ticking\n",
+ engine->name);
+   } else {
+   intel_engine_dump(engine, ,
+ "%s heartbeat {seqno:%llx:%lld, prio:%d} not 
ticking\n",
+ engine->name,
+ rq->fence.context,
+ rq->fence.seqno,
+ rq->sched.attr.priority);
+   }
+}
+
+static void
+reset_engine(struct intel_engine_cs *engine, struct i915_request *rq)
+{
+   if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+   show_heartbeat(rq, engine);
+
+   intel_gt_handle_error(engine->gt, engine->mask,
+ I915_ERROR_CAPTURE,
+ "stopped heartbeat on %s",
+ engine->name);
 }
 
 static void heartbeat(struct work_struct *wrk)
@@ -102,6 +120,11 @@ static void heartbeat(struct work_struct *wrk)
if (intel_gt_is_wedged(engine->gt))
goto out;
 
+   if (i915_sched_engine_disabled(engine->sched_engine)) {
+   reset_engine(engine, engine->heartbeat.systole);
+   goto out;
+   }
+
if (engine->heartbeat.systole) {
long delay = READ_ONCE(engine->props.heartbeat_interval_ms);
 
@@ -139,13 +162,7 @@ static void heartbeat(struct work_struct *wrk)
engine->sched_engine->schedule(rq, );
local_bh_enable();
} else {
-   if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
-   show_heartbeat(rq, engine);
-
-   intel_gt_handle_error(engine->gt, engine->mask,
- I915_ERROR_CAPTURE,
- "stopped heartbeat on %s",
- engine->name);
+   reset_engine(engine, rq);
}
 
rq->emitted_jiffies = jiffies;
@@ -194,6 +211,26 @@ void intel_engine_park_heartbeat(struct intel_engine_cs 
*engine)
i915_request_put(fetch_and_zero(>heartbeat.systole));
 }
 
+void intel_gt_unpark_heartbeats(struct intel_gt *gt)
+{
+   struct intel_engine_cs *engine;
+   enum intel_engine_id id;
+
+   for_each_engine(engine, gt, id)
+   if (intel_engine_pm_is_awake(engine))
+   intel_engine_unpark_heartbeat(engine);
+
+}
+
+void intel_gt_park_heartbeats(struct intel_gt *gt)
+{
+   struct intel_engine_cs *engine;
+   enum intel_engine_id id;
+
+   for_each_engine(engine, gt, id)
+   intel_engine_park_heartbeat(engine);
+}
+
 void intel_engine_init_heartbeat(struct intel_engine_cs *engine)
 {
INIT_DELAYED_WORK(>heartbeat.work, heartbeat);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h 
b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
index a488ea3e84a3..5da6d809a87a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
@@ -7,6 +7,7 @@
 #define INTEL_ENGINE_HEARTBEAT_H
 
 struct intel_engine_cs;
+struct intel_gt;
 
 void intel_engine_init_heartbeat(struct intel_engine_cs *engine);
 
@@ -16,6 +17,9 @@ int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
 void intel_engine_park_heartbeat(struct intel_engine_cs *engine);
 void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine);
 
+void intel_gt_park_heartbeats(struct intel_gt *gt);
+void 

[PATCH 17/51] drm/i915/guc: Add several request trace points

2021-07-16 Thread Matthew Brost
Add trace points for request dependencies and GuC submit. Extended
existing request trace points to include submit fence value,, guc_id,
and ring tail value.

v2: Fix white space alignment in i915_request_add trace point

Cc: John Harrison 
Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  3 ++
 drivers/gpu/drm/i915/i915_request.c   |  3 ++
 drivers/gpu/drm/i915/i915_trace.h | 43 +--
 3 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index a2af7e17dcc2..480fb2184ecf 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -417,6 +417,7 @@ static int guc_dequeue_one_context(struct intel_guc *guc)
guc->stalled_request = last;
return false;
}
+   trace_i915_request_guc_submit(last);
}
 
guc->stalled_request = NULL;
@@ -637,6 +638,8 @@ static int guc_bypass_tasklet_submit(struct intel_guc *guc,
ret = guc_add_request(guc, rq);
if (ret == -EBUSY)
guc->stalled_request = rq;
+   else
+   trace_i915_request_guc_submit(rq);
 
return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 2b2b63cba06c..01aa3d1ee2b1 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1319,6 +1319,9 @@ __i915_request_await_execution(struct i915_request *to,
return err;
}
 
+   trace_i915_request_dep_to(to);
+   trace_i915_request_dep_from(from);
+
/* Couple the dependency tree for PI on this exposed to->fence */
if (to->engine->sched_engine->schedule) {
err = i915_sched_node_add_dependency(>sched,
diff --git a/drivers/gpu/drm/i915/i915_trace.h 
b/drivers/gpu/drm/i915/i915_trace.h
index 6778ad2a14a4..ea41d069bf7d 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -794,30 +794,50 @@ DECLARE_EVENT_CLASS(i915_request,
TP_STRUCT__entry(
 __field(u32, dev)
 __field(u64, ctx)
+__field(u32, guc_id)
 __field(u16, class)
 __field(u16, instance)
 __field(u32, seqno)
+__field(u32, tail)
 ),
 
TP_fast_assign(
   __entry->dev = rq->engine->i915->drm.primary->index;
   __entry->class = rq->engine->uabi_class;
   __entry->instance = rq->engine->uabi_instance;
+  __entry->guc_id = rq->context->guc_id;
   __entry->ctx = rq->fence.context;
   __entry->seqno = rq->fence.seqno;
+  __entry->tail = rq->tail;
   ),
 
-   TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u",
+   TP_printk("dev=%u, engine=%u:%u, guc_id=%u, ctx=%llu, seqno=%u, 
tail=%u",
  __entry->dev, __entry->class, __entry->instance,
- __entry->ctx, __entry->seqno)
+ __entry->guc_id, __entry->ctx, __entry->seqno,
+ __entry->tail)
 );
 
 DEFINE_EVENT(i915_request, i915_request_add,
-   TP_PROTO(struct i915_request *rq),
-   TP_ARGS(rq)
+TP_PROTO(struct i915_request *rq),
+TP_ARGS(rq)
 );
 
 #if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS)
+DEFINE_EVENT(i915_request, i915_request_dep_to,
+TP_PROTO(struct i915_request *rq),
+TP_ARGS(rq)
+);
+
+DEFINE_EVENT(i915_request, i915_request_dep_from,
+TP_PROTO(struct i915_request *rq),
+TP_ARGS(rq)
+);
+
+DEFINE_EVENT(i915_request, i915_request_guc_submit,
+TP_PROTO(struct i915_request *rq),
+TP_ARGS(rq)
+);
+
 DEFINE_EVENT(i915_request, i915_request_submit,
 TP_PROTO(struct i915_request *rq),
 TP_ARGS(rq)
@@ -887,6 +907,21 @@ TRACE_EVENT(i915_request_out,
 
 #else
 #if !defined(TRACE_HEADER_MULTI_READ)
+static inline void
+trace_i915_request_dep_to(struct i915_request *rq)
+{
+}
+
+static inline void
+trace_i915_request_dep_from(struct i915_request *rq)
+{
+}
+
+static inline void
+trace_i915_request_guc_submit(struct i915_request *rq)
+{
+}
+
 static inline void
 trace_i915_request_submit(struct i915_request *rq)
 {
-- 
2.28.0



[PATCH 40/51] drm/i915/guc: Include scheduling policies in the debugfs state dump

2021-07-16 Thread Matthew Brost
From: John Harrison 

Added the scheduling policy parameters to the 'guc_info' debugfs state
dump.

Signed-off-by: John Harrison 
Signed-off-by: Matthew Brost 
Reviewed-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 14 ++
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h |  3 +++
 drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c |  2 ++
 3 files changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index c6d0b762d82c..93b0ac35a508 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -92,6 +92,20 @@ static void guc_policies_init(struct intel_guc *guc, struct 
guc_policies *polici
policies->is_valid = 1;
 }
 
+void intel_guc_ads_print_policy_info(struct intel_guc *guc,
+struct drm_printer *dp)
+{
+   struct __guc_ads_blob *blob = guc->ads_blob;
+
+   if (unlikely(!blob))
+   return;
+
+   drm_printf(dp, "Global scheduling policies:\n");
+   drm_printf(dp, "  DPC promote time   = %u\n", 
blob->policies.dpc_promote_time);
+   drm_printf(dp, "  Max num work items = %u\n", 
blob->policies.max_num_work_items);
+   drm_printf(dp, "  Flags  = %u\n", 
blob->policies.global_flags);
+}
+
 static int guc_action_policies_update(struct intel_guc *guc, u32 policy_offset)
 {
u32 action[] = {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h
index b00d3ae1113a..bdcb339a5321 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h
@@ -7,9 +7,12 @@
 #define _INTEL_GUC_ADS_H_
 
 struct intel_guc;
+struct drm_printer;
 
 int intel_guc_ads_create(struct intel_guc *guc);
 void intel_guc_ads_destroy(struct intel_guc *guc);
 void intel_guc_ads_reset(struct intel_guc *guc);
+void intel_guc_ads_print_policy_info(struct intel_guc *guc,
+struct drm_printer *p);
 
 #endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
index 7a454c91a736..72ddfff42f7d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
@@ -10,6 +10,7 @@
 #include "intel_guc_debugfs.h"
 #include "intel_guc_log_debugfs.h"
 #include "gt/uc/intel_guc_ct.h"
+#include "gt/uc/intel_guc_ads.h"
 #include "gt/uc/intel_guc_submission.h"
 
 static int guc_info_show(struct seq_file *m, void *data)
@@ -29,6 +30,7 @@ static int guc_info_show(struct seq_file *m, void *data)
 
intel_guc_ct_print_info(>ct, );
intel_guc_submission_print_info(guc, );
+   intel_guc_ads_print_policy_info(guc, );
 
return 0;
 }
-- 
2.28.0



[PATCH 35/51] drm/i915/guc: Enable GuC engine reset

2021-07-16 Thread Matthew Brost
From: John Harrison 

Clear the 'disable resets' flag to allow GuC to reset hung contexts
(detected via pre-emption timeout).

Signed-off-by: John Harrison 
Signed-off-by: Matthew Brost 
Reviewed-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 9fd3c911f5fb..d3e86ab7508f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -81,8 +81,7 @@ static void guc_policies_init(struct guc_policies *policies)
 {
policies->dpc_promote_time = GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US;
policies->max_num_work_items = GLOBAL_POLICY_MAX_NUM_WI;
-   /* Disable automatic resets as not yet supported. */
-   policies->global_flags = GLOBAL_POLICY_DISABLE_ENGINE_RESET;
+   policies->global_flags = 0;
policies->is_valid = 1;
 }
 
-- 
2.28.0



[PATCH 31/51] drm/i915/guc: Handle engine reset failure notification

2021-07-16 Thread Matthew Brost
GuC will notify the driver, via G2H, if it fails to
reset an engine. We recover by resorting to a full GPU
reset.

Signed-off-by: Matthew Brost 
Signed-off-by: Fernando Pacheco 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|  2 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |  3 ++
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 43 +++
 3 files changed, 48 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index f23a3a618550..7f14e1873010 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -264,6 +264,8 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
 const u32 *msg, u32 len);
 int intel_guc_context_reset_process_msg(struct intel_guc *guc,
const u32 *msg, u32 len);
+int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
+const u32 *msg, u32 len);
 
 void intel_guc_submission_reset_prepare(struct intel_guc *guc);
 void intel_guc_submission_reset(struct intel_guc *guc, bool stalled);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index c4f9b44b9f86..d16381784ee2 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -984,6 +984,9 @@ static int ct_process_request(struct intel_guc_ct *ct, 
struct ct_incoming_msg *r
case INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION:
ret = intel_guc_context_reset_process_msg(guc, payload, len);
break;
+   case INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION:
+   ret = intel_guc_engine_failure_process_msg(guc, payload, len);
+   break;
default:
ret = -EOPNOTSUPP;
break;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index feaf1ca61eaa..035633f567b5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -2232,6 +2232,49 @@ int intel_guc_context_reset_process_msg(struct intel_guc 
*guc,
return 0;
 }
 
+static struct intel_engine_cs *
+guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
+{
+   struct intel_gt *gt = guc_to_gt(guc);
+   u8 engine_class = guc_class_to_engine_class(guc_class);
+
+   /* Class index is checked in class converter */
+   GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE);
+
+   return gt->engine_class[engine_class][instance];
+}
+
+int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
+const u32 *msg, u32 len)
+{
+   struct intel_engine_cs *engine;
+   u8 guc_class, instance;
+   u32 reason;
+
+   if (unlikely(len != 3)) {
+   drm_dbg(_to_gt(guc)->i915->drm, "Invalid length %u", len);
+   return -EPROTO;
+   }
+
+   guc_class = msg[0];
+   instance = msg[1];
+   reason = msg[2];
+
+   engine = guc_lookup_engine(guc, guc_class, instance);
+   if (unlikely(!engine)) {
+   drm_dbg(_to_gt(guc)->i915->drm,
+   "Invalid engine %d:%d", guc_class, instance);
+   return -EPROTO;
+   }
+
+   intel_gt_handle_error(guc_to_gt(guc), engine->mask,
+ I915_ERROR_CAPTURE,
+ "GuC failed to reset %s (reason=0x%08x)\n",
+ engine->name, reason);
+
+   return 0;
+}
+
 void intel_guc_submission_print_info(struct intel_guc *guc,
 struct drm_printer *p)
 {
-- 
2.28.0



[PATCH 41/51] drm/i915/guc: Add golden context to GuC ADS

2021-07-16 Thread Matthew Brost
From: John Harrison 

The media watchdog mechanism involves GuC doing a silent reset and
continue of the hung context. This requires the i915 driver provide a
golden context to GuC in the ADS.

Signed-off-by: John Harrison 
Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/intel_gt.c |   2 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.c |   5 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.h |   2 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 213 ++---
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h |   1 +
 drivers/gpu/drm/i915/gt/uc/intel_uc.c  |   5 +
 drivers/gpu/drm/i915/gt/uc/intel_uc.h  |   1 +
 7 files changed, 199 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index acfdd53b2678..ceeb517ba259 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -654,6 +654,8 @@ int intel_gt_init(struct intel_gt *gt)
if (err)
goto err_gt;
 
+   intel_uc_init_late(>uc);
+
err = i915_inject_probe_error(gt->i915, -EIO);
if (err)
goto err_gt;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 68266cbffd1f..979128e28372 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -180,6 +180,11 @@ void intel_guc_init_early(struct intel_guc *guc)
}
 }
 
+void intel_guc_init_late(struct intel_guc *guc)
+{
+   intel_guc_ads_init_late(guc);
+}
+
 static u32 guc_ctl_debug_flags(struct intel_guc *guc)
 {
u32 level = intel_guc_log_get_level(>log);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index bc71635c70b9..dc18ac510ac8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -60,6 +60,7 @@ struct intel_guc {
struct i915_vma *ads_vma;
struct __guc_ads_blob *ads_blob;
u32 ads_regset_size;
+   u32 ads_golden_ctxt_size;
 
struct i915_vma *lrc_desc_pool;
void *lrc_desc_pool_vaddr;
@@ -176,6 +177,7 @@ static inline u32 intel_guc_ggtt_offset(struct intel_guc 
*guc,
 }
 
 void intel_guc_init_early(struct intel_guc *guc);
+void intel_guc_init_late(struct intel_guc *guc);
 void intel_guc_init_send_regs(struct intel_guc *guc);
 void intel_guc_write_params(struct intel_guc *guc);
 int intel_guc_init(struct intel_guc *guc);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 93b0ac35a508..241b3089b658 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -7,6 +7,7 @@
 
 #include "gt/intel_gt.h"
 #include "gt/intel_lrc.h"
+#include "gt/shmem_utils.h"
 #include "intel_guc_ads.h"
 #include "intel_guc_fwif.h"
 #include "intel_uc.h"
@@ -33,6 +34,10 @@
  *  +---+ <== dynamic
  *  | padding   |
  *  +---+ <== 4K aligned
+ *  | golden contexts   |
+ *  +---+
+ *  | padding   |
+ *  +---+ <== 4K aligned
  *  | private data  |
  *  +---+
  *  | padding   |
@@ -52,6 +57,11 @@ static u32 guc_ads_regset_size(struct intel_guc *guc)
return guc->ads_regset_size;
 }
 
+static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc)
+{
+   return PAGE_ALIGN(guc->ads_golden_ctxt_size);
+}
+
 static u32 guc_ads_private_data_size(struct intel_guc *guc)
 {
return PAGE_ALIGN(guc->fw.private_data_size);
@@ -62,12 +72,23 @@ static u32 guc_ads_regset_offset(struct intel_guc *guc)
return offsetof(struct __guc_ads_blob, regset);
 }
 
-static u32 guc_ads_private_data_offset(struct intel_guc *guc)
+static u32 guc_ads_golden_ctxt_offset(struct intel_guc *guc)
 {
u32 offset;
 
offset = guc_ads_regset_offset(guc) +
 guc_ads_regset_size(guc);
+
+   return PAGE_ALIGN(offset);
+}
+
+static u32 guc_ads_private_data_offset(struct intel_guc *guc)
+{
+   u32 offset;
+
+   offset = guc_ads_golden_ctxt_offset(guc) +
+guc_ads_golden_ctxt_size(guc);
+
return PAGE_ALIGN(offset);
 }
 
@@ -319,53 +340,163 @@ static void guc_mmio_reg_state_init(struct intel_guc 
*guc,
GEM_BUG_ON(temp_set.size);
 }
 
-/*
- * The first 80 dwords of the register state context, containing the
- * execlists and ppgtt registers.
- */
-#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32))
+static void fill_engine_enable_masks(struct intel_gt *gt,
+struct guc_gt_system_info *info)
+{
+   info->engine_enabled_masks[GUC_RENDER_CLASS] = 1;
+   

[PATCH 20/51] drm/i915: Track 'serial' counts for virtual engines

2021-07-16 Thread Matthew Brost
From: John Harrison 

The serial number tracking of engines happens at the backend of
request submission and was expecting to only be given physical
engines. However, in GuC submission mode, the decomposition of virtual
to physical engines does not happen in i915. Instead, requests are
submitted to their virtual engine mask all the way through to the
hardware (i.e. to GuC). This would mean that the heart beat code
thinks the physical engines are idle due to the serial number not
incrementing.

This patch updates the tracking to decompose virtual engines into
their physical constituents and tracks the request against each. This
is not entirely accurate as the GuC will only be issuing the request
to one physical engine. However, it is the best that i915 can do given
that it has no knowledge of the GuC's scheduling decisions.

Signed-off-by: John Harrison 
Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  2 ++
 .../gpu/drm/i915/gt/intel_execlists_submission.c |  6 ++
 drivers/gpu/drm/i915/gt/intel_ring_submission.c  |  6 ++
 drivers/gpu/drm/i915/gt/mock_engine.c|  6 ++
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c| 16 
 drivers/gpu/drm/i915/i915_request.c  |  4 +++-
 6 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 1cb9c3b70b29..8ad304b2f2e4 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -388,6 +388,8 @@ struct intel_engine_cs {
void(*park)(struct intel_engine_cs *engine);
void(*unpark)(struct intel_engine_cs *engine);
 
+   void(*bump_serial)(struct intel_engine_cs *engine);
+
void(*set_default_submission)(struct intel_engine_cs 
*engine);
 
const struct intel_context_ops *cops;
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c 
b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 28492cdce706..920707e22eb0 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -3191,6 +3191,11 @@ static void execlists_release(struct intel_engine_cs 
*engine)
lrc_fini_wa_ctx(engine);
 }
 
+static void execlist_bump_serial(struct intel_engine_cs *engine)
+{
+   engine->serial++;
+}
+
 static void
 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 {
@@ -3200,6 +3205,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs 
*engine)
 
engine->cops = _context_ops;
engine->request_alloc = execlists_request_alloc;
+   engine->bump_serial = execlist_bump_serial;
 
engine->reset.prepare = execlists_reset_prepare;
engine->reset.rewind = execlists_reset_rewind;
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c 
b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 5c4d204d07cc..61469c631057 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -1047,6 +1047,11 @@ static void setup_irq(struct intel_engine_cs *engine)
}
 }
 
+static void ring_bump_serial(struct intel_engine_cs *engine)
+{
+   engine->serial++;
+}
+
 static void setup_common(struct intel_engine_cs *engine)
 {
struct drm_i915_private *i915 = engine->i915;
@@ -1066,6 +1071,7 @@ static void setup_common(struct intel_engine_cs *engine)
 
engine->cops = _context_ops;
engine->request_alloc = ring_request_alloc;
+   engine->bump_serial = ring_bump_serial;
 
/*
 * Using a global execution timeline; the previous final breadcrumb is
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c 
b/drivers/gpu/drm/i915/gt/mock_engine.c
index 68970398e4ef..9203c766db80 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -292,6 +292,11 @@ static void mock_engine_release(struct intel_engine_cs 
*engine)
intel_engine_fini_retire(engine);
 }
 
+static void mock_bump_serial(struct intel_engine_cs *engine)
+{
+   engine->serial++;
+}
+
 struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
const char *name,
int id)
@@ -318,6 +323,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private 
*i915,
 
engine->base.cops = _context_ops;
engine->base.request_alloc = mock_request_alloc;
+   engine->base.bump_serial = mock_bump_serial;
engine->base.emit_flush = mock_emit_flush;
engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb;
engine->base.submit_request = mock_submit_request;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 7b3e1c91e689..372e0dc7617a 100644
--- 

[PATCH 46/51] drm/i915/selftest: Fix MOCS selftest for GuC submission

2021-07-16 Thread Matthew Brost
From: Rahul Kumar Singh 

When GuC submission is enabled, the GuC controls engine resets. Rather
than explicitly triggering a reset, the driver must submit a hanging
context to GuC and wait for the reset to occur.

Signed-off-by: Rahul Kumar Singh 
Signed-off-by: John Harrison 
Signed-off-by: Matthew Brost 
Cc: Daniele Ceraolo Spurio 
Cc: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/selftest_mocs.c | 49 ++---
 1 file changed, 35 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c 
b/drivers/gpu/drm/i915/gt/selftest_mocs.c
index 8763bbeca0f7..b7314739ee40 100644
--- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
@@ -10,6 +10,7 @@
 #include "gem/selftests/mock_context.h"
 #include "selftests/igt_reset.h"
 #include "selftests/igt_spinner.h"
+#include "selftests/intel_scheduler_helpers.h"
 
 struct live_mocs {
struct drm_i915_mocs_table table;
@@ -318,7 +319,8 @@ static int live_mocs_clean(void *arg)
 }
 
 static int active_engine_reset(struct intel_context *ce,
-  const char *reason)
+  const char *reason,
+  bool using_guc)
 {
struct igt_spinner spin;
struct i915_request *rq;
@@ -335,9 +337,13 @@ static int active_engine_reset(struct intel_context *ce,
}
 
err = request_add_spin(rq, );
-   if (err == 0)
+   if (err == 0 && !using_guc)
err = intel_engine_reset(ce->engine, reason);
 
+   /* Ensure the reset happens and kills the engine */
+   if (err == 0)
+   err = intel_selftest_wait_for_rq(rq);
+
igt_spinner_end();
igt_spinner_fini();
 
@@ -345,21 +351,23 @@ static int active_engine_reset(struct intel_context *ce,
 }
 
 static int __live_mocs_reset(struct live_mocs *mocs,
-struct intel_context *ce)
+struct intel_context *ce, bool using_guc)
 {
struct intel_gt *gt = ce->engine->gt;
int err;
 
if (intel_has_reset_engine(gt)) {
-   err = intel_engine_reset(ce->engine, "mocs");
-   if (err)
-   return err;
-
-   err = check_mocs_engine(mocs, ce);
-   if (err)
-   return err;
+   if (!using_guc) {
+   err = intel_engine_reset(ce->engine, "mocs");
+   if (err)
+   return err;
+
+   err = check_mocs_engine(mocs, ce);
+   if (err)
+   return err;
+   }
 
-   err = active_engine_reset(ce, "mocs");
+   err = active_engine_reset(ce, "mocs", using_guc);
if (err)
return err;
 
@@ -395,19 +403,32 @@ static int live_mocs_reset(void *arg)
 
igt_global_reset_lock(gt);
for_each_engine(engine, gt, id) {
+   bool using_guc = intel_engine_uses_guc(engine);
+   struct intel_selftest_saved_policy saved;
struct intel_context *ce;
+   int err2;
+
+   err = intel_selftest_modify_policy(engine, );
+   if (err)
+   break;
 
ce = mocs_context_create(engine);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
-   break;
+   goto restore;
}
 
intel_engine_pm_get(engine);
-   err = __live_mocs_reset(, ce);
-   intel_engine_pm_put(engine);
 
+   err = __live_mocs_reset(, ce, using_guc);
+
+   intel_engine_pm_put(engine);
intel_context_put(ce);
+
+restore:
+   err2 = intel_selftest_restore_policy(engine, );
+   if (err == 0)
+   err = err2;
if (err)
break;
}
-- 
2.28.0



[PATCH 33/51] drm/i915/guc: Provide mmio list to be saved/restored on engine reset

2021-07-16 Thread Matthew Brost
From: John Harrison 

The driver must provide GuC with a list of mmio registers
that should be saved/restored during a GuC-based engine reset.
Unfortunately, the list must be dynamically allocated as its size is
variable. That means the driver must generate the list twice - once to
work out the size and a second time to actually save it.

v2:
 (Alan / CI)
  - GEN7_GT_MODE -> GEN6_GT_MODE to fix WA selftest failure

Signed-off-by: John Harrison 
Signed-off-by: Fernando Pacheco 
Signed-off-by: Matthew Brost 
Cc: Daniele Ceraolo Spurio 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c   |  46 ++--
 .../gpu/drm/i915/gt/intel_workarounds_types.h |   1 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|   1 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c| 199 +-
 drivers/gpu/drm/i915/i915_reg.h   |   1 +
 5 files changed, 222 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 72562c233ad2..34738ccab8bd 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -150,13 +150,14 @@ static void _wa_add(struct i915_wa_list *wal, const 
struct i915_wa *wa)
 }
 
 static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
-  u32 clear, u32 set, u32 read_mask)
+  u32 clear, u32 set, u32 read_mask, bool masked_reg)
 {
struct i915_wa wa = {
.reg  = reg,
.clr  = clear,
.set  = set,
.read = read_mask,
+   .masked_reg = masked_reg,
};
 
_wa_add(wal, );
@@ -165,7 +166,7 @@ static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
 static void
 wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
 {
-   wa_add(wal, reg, clear, set, clear);
+   wa_add(wal, reg, clear, set, clear, false);
 }
 
 static void
@@ -200,20 +201,20 @@ wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, 
u32 clr)
 static void
 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 {
-   wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val);
+   wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
 }
 
 static void
 wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 {
-   wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val);
+   wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
 }
 
 static void
 wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
u32 mask, u32 val)
 {
-   wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask);
+   wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
 }
 
 static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -583,10 +584,10 @@ static void icl_ctx_workarounds_init(struct 
intel_engine_cs *engine,
 GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
 
/* WaEnableFloatBlendOptimization:icl */
-   wa_write_clr_set(wal,
-GEN10_CACHE_MODE_SS,
-0, /* write-only, so skip validation */
-_MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
+   wa_add(wal, GEN10_CACHE_MODE_SS, 0,
+  _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE),
+  0 /* write-only, so skip validation */,
+  true);
 
/* WaDisableGPGPUMidThreadPreemption:icl */
wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
@@ -631,7 +632,7 @@ static void gen12_ctx_gt_tuning_init(struct intel_engine_cs 
*engine,
   FF_MODE2,
   FF_MODE2_TDS_TIMER_MASK,
   FF_MODE2_TDS_TIMER_128,
-  0);
+  0, false);
 }
 
 static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -669,7 +670,7 @@ static void gen12_ctx_workarounds_init(struct 
intel_engine_cs *engine,
   FF_MODE2,
   FF_MODE2_GS_TIMER_MASK,
   FF_MODE2_GS_TIMER_224,
-  0);
+  0, false);
 
/*
 * Wa_14012131227:dg1
@@ -847,7 +848,7 @@ hsw_gt_workarounds_init(struct drm_i915_private *i915, 
struct i915_wa_list *wal)
wa_add(wal,
   HSW_ROW_CHICKEN3, 0,
   _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
-   0 /* XXX does this reg exist? */);
+  0 /* XXX does this reg exist? */, true);
 
/* WaVSRefCountFullforceMissDisable:hsw */
wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
@@ -1937,10 +1938,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, 
struct i915_wa_list *wal)
 * disable bit, which we don't touch here, but it's good
 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
 */
-   wa_add(wal, GEN7_GT_MODE, 0,
-  _MASKED_FIELD(GEN6_WIZ_HASHING_MASK,
-   

[PATCH 29/51] drm/i915/guc: Suspend/resume implementation for new interface

2021-07-16 Thread Matthew Brost
The new GuC interface introduces an MMIO H2G command,
INTEL_GUC_ACTION_RESET_CLIENT, which is used to implement suspend. This
MMIO tears down any active contexts generating a context reset G2H CTB
for each. Once that step completes the GuC tears down the CTB
channels. It is safe to suspend once this MMIO H2G command completes
and all G2H CTBs have been processed. In practice the i915 will likely
never receive a G2H as suspend should only be called after the GPU is
idle.

Resume is implemented in the same manner as before - simply reload the
GuC firmware and reinitialize everything (e.g. CTB channels, contexts,
etc..).

Cc: John Harrison 
Signed-off-by: Matthew Brost 
Signed-off-by: Michal Wajdeczko 
Reviewed-by: John Harrison 
---
 .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |  1 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.c| 64 ---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 15 +++--
 .../gpu/drm/i915/gt/uc/intel_guc_submission.h |  5 ++
 drivers/gpu/drm/i915/gt/uc/intel_uc.c | 20 --
 5 files changed, 54 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index 57e18babdf4b..596cf4b818e5 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -142,6 +142,7 @@ enum intel_guc_action {
INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
+   INTEL_GUC_ACTION_RESET_CLIENT = 0x5B01,
INTEL_GUC_ACTION_LIMIT
 };
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 9b09395b998f..68266cbffd1f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -524,51 +524,34 @@ int intel_guc_auth_huc(struct intel_guc *guc, u32 
rsa_offset)
  */
 int intel_guc_suspend(struct intel_guc *guc)
 {
-   struct intel_uncore *uncore = guc_to_gt(guc)->uncore;
int ret;
-   u32 status;
u32 action[] = {
-   INTEL_GUC_ACTION_ENTER_S_STATE,
-   GUC_POWER_D1, /* any value greater than GUC_POWER_D0 */
+   INTEL_GUC_ACTION_RESET_CLIENT,
};
 
-   /*
-* If GuC communication is enabled but submission is not supported,
-* we do not need to suspend the GuC.
-*/
-   if (!intel_guc_submission_is_used(guc) || !intel_guc_is_ready(guc))
+   if (!intel_guc_is_ready(guc))
return 0;
 
-   /*
-* The ENTER_S_STATE action queues the save/restore operation in GuC FW
-* and then returns, so waiting on the H2G is not enough to guarantee
-* GuC is done. When all the processing is done, GuC writes
-* INTEL_GUC_SLEEP_STATE_SUCCESS to scratch register 14, so we can poll
-* on that. Note that GuC does not ensure that the value in the register
-* is different from INTEL_GUC_SLEEP_STATE_SUCCESS while the action is
-* in progress so we need to take care of that ourselves as well.
-*/
-
-   intel_uncore_write(uncore, SOFT_SCRATCH(14),
-  INTEL_GUC_SLEEP_STATE_INVALID_MASK);
-
-   ret = intel_guc_send(guc, action, ARRAY_SIZE(action));
-   if (ret)
-   return ret;
-
-   ret = __intel_wait_for_register(uncore, SOFT_SCRATCH(14),
-   INTEL_GUC_SLEEP_STATE_INVALID_MASK,
-   0, 0, 10, );
-   if (ret)
-   return ret;
-
-   if (status != INTEL_GUC_SLEEP_STATE_SUCCESS) {
-   DRM_ERROR("GuC failed to change sleep state. "
- "action=0x%x, err=%u\n",
- action[0], status);
-   return -EIO;
+   if (intel_guc_submission_is_used(guc)) {
+   /*
+* This H2G MMIO command tears down the GuC in two steps. First 
it will
+* generate a G2H CTB for every active context indicating a 
reset. In
+* practice the i915 shouldn't ever get a G2H as suspend should 
only be
+* called when the GPU is idle. Next, it tears down the CTBs 
and this
+* H2G MMIO command completes.
+*
+* Don't abort on a failure code from the GuC. Keep going and 
do the
+* clean up in santize() and re-initialisation on resume and 
hopefully
+* the error here won't be problematic.
+*/
+   ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), 
NULL, 0);
+   if (ret)
+   DRM_ERROR("GuC suspend: RESET_CLIENT action failed with 
error %d!\n", ret);
}
 
+   /* Signal that the GuC isn't running. */
+   intel_guc_sanitize(guc);
+
return 0;
 }
 
@@ -578,7 

[PATCH 21/51] drm/i915: Hold reference to intel_context over life of i915_request

2021-07-16 Thread Matthew Brost
Hold a reference to the intel_context over life of an i915_request.
Without this an i915_request can exist after the context has been
destroyed (e.g. request retired, context closed, but user space holds a
reference to the request from an out fence). In the case of GuC
submission + virtual engine, the engine that the request references is
also destroyed which can trigger bad pointer dref in fence ops (e.g.
i915_fence_get_driver_name). We could likely change
i915_fence_get_driver_name to avoid touching the engine but let's just
be safe and hold the intel_context reference.

v2:
 (John Harrison)
  - Update comment explaining how GuC mode and execlists mode deal with
virtual engines differently

Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/i915_request.c | 55 -
 1 file changed, 23 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 30ecdc46a12f..b3c792d55321 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -125,39 +125,17 @@ static void i915_fence_release(struct dma_fence *fence)
i915_sw_fence_fini(>semaphore);
 
/*
-* Keep one request on each engine for reserved use under mempressure
-*
-* We do not hold a reference to the engine here and so have to be
-* very careful in what rq->engine we poke. The virtual engine is
-* referenced via the rq->context and we released that ref during
-* i915_request_retire(), ergo we must not dereference a virtual
-* engine here. Not that we would want to, as the only consumer of
-* the reserved engine->request_pool is the power management parking,
-* which must-not-fail, and that is only run on the physical engines.
-*
-* Since the request must have been executed to be have completed,
-* we know that it will have been processed by the HW and will
-* not be unsubmitted again, so rq->engine and rq->execution_mask
-* at this point is stable. rq->execution_mask will be a single
-* bit if the last and _only_ engine it could execution on was a
-* physical engine, if it's multiple bits then it started on and
-* could still be on a virtual engine. Thus if the mask is not a
-* power-of-two we assume that rq->engine may still be a virtual
-* engine and so a dangling invalid pointer that we cannot dereference
-*
-* For example, consider the flow of a bonded request through a virtual
-* engine. The request is created with a wide engine mask (all engines
-* that we might execute on). On processing the bond, the request mask
-* is reduced to one or more engines. If the request is subsequently
-* bound to a single engine, it will then be constrained to only
-* execute on that engine and never returned to the virtual engine
-* after timeslicing away, see __unwind_incomplete_requests(). Thus we
-* know that if the rq->execution_mask is a single bit, rq->engine
-* can be a physical engine with the exact corresponding mask.
+* Keep one request on each engine for reserved use under mempressure,
+* do not use with virtual engines as this really is only needed for
+* kernel contexts.
 */
-   if (is_power_of_2(rq->execution_mask) &&
-   !cmpxchg(>engine->request_pool, NULL, rq))
+   if (!intel_engine_is_virtual(rq->engine) &&
+   !cmpxchg(>engine->request_pool, NULL, rq)) {
+   intel_context_put(rq->context);
return;
+   }
+
+   intel_context_put(rq->context);
 
kmem_cache_free(global.slab_requests, rq);
 }
@@ -954,7 +932,19 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
}
}
 
-   rq->context = ce;
+   /*
+* Hold a reference to the intel_context over life of an i915_request.
+* Without this an i915_request can exist after the context has been
+* destroyed (e.g. request retired, context closed, but user space holds
+* a reference to the request from an out fence). In the case of GuC
+* submission + virtual engine, the engine that the request references
+* is also destroyed which can trigger bad pointer dref in fence ops
+* (e.g. i915_fence_get_driver_name). We could likely change these
+* functions to avoid touching the engine but let's just be safe and
+* hold the intel_context reference. In execlist mode the request always
+* eventually points to a physical engine so this isn't an issue.
+*/
+   rq->context = intel_context_get(ce);
rq->engine = ce->engine;
rq->ring = ce->ring;
rq->execution_mask = ce->engine->mask;
@@ -1031,6 +1021,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)

[PATCH 39/51] drm/i915/guc: Connect reset modparam updates to GuC policy flags

2021-07-16 Thread Matthew Brost
From: John Harrison 

Changing the reset module parameter has no effect on a running GuC.
The corresponding entry in the ADS must be updated and then the GuC
informed via a Host2GuC message.

The new debugfs interface to module parameters allows this to happen.
However, connecting the parameter data address back to anything useful
is messy. One option would be to pass a new private data structure
address through instead of just the parameter pointer. However, that
means having a new (and different) data structure for each parameter
and a new (and different) write function for each parameter. This
method keeps everything generic by instead using a string lookup on
the directory entry name.

Signed-off-by: John Harrison 
Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c |  2 +-
 drivers/gpu/drm/i915/i915_debugfs_params.c | 31 ++
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 2ad5fcd4e1b7..c6d0b762d82c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -99,7 +99,7 @@ static int guc_action_policies_update(struct intel_guc *guc, 
u32 policy_offset)
policy_offset
};
 
-   return intel_guc_send(guc, action, ARRAY_SIZE(action));
+   return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, 
true);
 }
 
 int intel_guc_global_policies_update(struct intel_guc *guc)
diff --git a/drivers/gpu/drm/i915/i915_debugfs_params.c 
b/drivers/gpu/drm/i915/i915_debugfs_params.c
index 4e2b077692cb..8ecd8b42f048 100644
--- a/drivers/gpu/drm/i915/i915_debugfs_params.c
+++ b/drivers/gpu/drm/i915/i915_debugfs_params.c
@@ -6,9 +6,20 @@
 #include 
 
 #include "i915_debugfs_params.h"
+#include "gt/intel_gt.h"
+#include "gt/uc/intel_guc.h"
 #include "i915_drv.h"
 #include "i915_params.h"
 
+#define MATCH_DEBUGFS_NODE_NAME(_file, _name)  
(strcmp((_file)->f_path.dentry->d_name.name, (_name)) == 0)
+
+#define GET_I915(i915, name, ptr)  \
+   do {\
+   struct i915_params *params; \
+   params = container_of(((void *) (ptr)), typeof(*params), name); 
\
+   (i915) = container_of(params, typeof(*(i915)), params); \
+   } while(0)
+
 /* int param */
 static int i915_param_int_show(struct seq_file *m, void *data)
 {
@@ -24,6 +35,16 @@ static int i915_param_int_open(struct inode *inode, struct 
file *file)
return single_open(file, i915_param_int_show, inode->i_private);
 }
 
+static int notify_guc(struct drm_i915_private *i915)
+{
+   int ret = 0;
+
+   if (intel_uc_uses_guc_submission(>gt.uc))
+   ret = intel_guc_global_policies_update(>gt.uc.guc);
+
+   return ret;
+}
+
 static ssize_t i915_param_int_write(struct file *file,
const char __user *ubuf, size_t len,
loff_t *offp)
@@ -81,8 +102,10 @@ static ssize_t i915_param_uint_write(struct file *file,
 const char __user *ubuf, size_t len,
 loff_t *offp)
 {
+   struct drm_i915_private *i915;
struct seq_file *m = file->private_data;
unsigned int *value = m->private;
+   unsigned int old = *value;
int ret;
 
ret = kstrtouint_from_user(ubuf, len, 0, value);
@@ -95,6 +118,14 @@ static ssize_t i915_param_uint_write(struct file *file,
*value = b;
}
 
+   if (!ret && MATCH_DEBUGFS_NODE_NAME(file, "reset")) {
+   GET_I915(i915, reset, value);
+
+   ret = notify_guc(i915);
+   if (ret)
+   *value = old;
+   }
+
return ret ?: len;
 }
 
-- 
2.28.0



[PATCH 25/51] drm/i915: Move active request tracking to a vfunc

2021-07-16 Thread Matthew Brost
Move active request tracking to a backend vfunc rather than assuming all
backends want to do this in the maner. In the case execlists /
ring submission the tracking is on the physical engine while with GuC
submission it is on the context.

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/intel_context.c   |  3 ++
 drivers/gpu/drm/i915/gt/intel_context_types.h |  7 
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  6 +++
 .../drm/i915/gt/intel_execlists_submission.c  | 40 ++
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 22 ++
 drivers/gpu/drm/i915/gt/mock_engine.c | 30 ++
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 33 +++
 drivers/gpu/drm/i915/i915_request.c   | 41 ++-
 drivers/gpu/drm/i915/i915_request.h   |  2 +
 9 files changed, 147 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 251ff7eea22d..bfb05d8697d1 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -393,6 +393,9 @@ intel_context_init(struct intel_context *ce, struct 
intel_engine_cs *engine)
spin_lock_init(>guc_state.lock);
INIT_LIST_HEAD(>guc_state.fences);
 
+   spin_lock_init(>guc_active.lock);
+   INIT_LIST_HEAD(>guc_active.requests);
+
ce->guc_id = GUC_INVALID_LRC_ID;
INIT_LIST_HEAD(>guc_id_link);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 542c98418771..035108c10b2c 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -162,6 +162,13 @@ struct intel_context {
struct list_head fences;
} guc_state;
 
+   struct {
+   /** lock: protects everything in guc_active */
+   spinlock_t lock;
+   /** requests: active requests on this context */
+   struct list_head requests;
+   } guc_active;
+
/* GuC scheduling state flags that do not require a lock. */
atomic_t guc_sched_state_no_lock;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 03a81e8d87f4..950fc73ed6af 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -420,6 +420,12 @@ struct intel_engine_cs {
 
void(*release)(struct intel_engine_cs *engine);
 
+   /*
+* Add / remove request from engine active tracking
+*/
+   void(*add_active_request)(struct i915_request *rq);
+   void(*remove_active_request)(struct i915_request *rq);
+
struct intel_engine_execlists execlists;
 
/*
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c 
b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index abe48421fd7a..f9b5f54a5abe 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -3106,6 +3106,42 @@ static void execlists_park(struct intel_engine_cs 
*engine)
cancel_timer(>execlists.preempt);
 }
 
+static void add_to_engine(struct i915_request *rq)
+{
+   lockdep_assert_held(>engine->sched_engine->lock);
+   list_move_tail(>sched.link, >engine->sched_engine->requests);
+}
+
+static void remove_from_engine(struct i915_request *rq)
+{
+   struct intel_engine_cs *engine, *locked;
+
+   /*
+* Virtual engines complicate acquiring the engine timeline lock,
+* as their rq->engine pointer is not stable until under that
+* engine lock. The simple ploy we use is to take the lock then
+* check that the rq still belongs to the newly locked engine.
+*/
+   locked = READ_ONCE(rq->engine);
+   spin_lock_irq(>sched_engine->lock);
+   while (unlikely(locked != (engine = READ_ONCE(rq->engine {
+   spin_unlock(>sched_engine->lock);
+   spin_lock(>sched_engine->lock);
+   locked = engine;
+   }
+   list_del_init(>sched.link);
+
+   clear_bit(I915_FENCE_FLAG_PQUEUE, >fence.flags);
+   clear_bit(I915_FENCE_FLAG_HOLD, >fence.flags);
+
+   /* Prevent further __await_execution() registering a cb, then flush */
+   set_bit(I915_FENCE_FLAG_ACTIVE, >fence.flags);
+
+   spin_unlock_irq(>sched_engine->lock);
+
+   i915_request_notify_execute_cb_imm(rq);
+}
+
 static bool can_preempt(struct intel_engine_cs *engine)
 {
if (GRAPHICS_VER(engine->i915) > 8)
@@ -3206,6 +3242,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs 
*engine)
engine->cops = _context_ops;
engine->request_alloc = execlists_request_alloc;
engine->bump_serial = execlist_bump_serial;
+   engine->add_active_request = add_to_engine;
+   engine->remove_active_request = 

[PATCH 22/51] drm/i915/guc: Disable bonding extension with GuC submission

2021-07-16 Thread Matthew Brost
Update the bonding extension to return -ENODEV when using GuC submission
as this extension fundamentally will not work with the GuC submission
interface.

Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index edefe299bd76..28c62f7ccfc7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -491,6 +491,11 @@ set_proto_ctx_engines_bond(struct i915_user_extension 
__user *base, void *data)
return -EINVAL;
}
 
+   if (intel_engine_uses_guc(master)) {
+   DRM_DEBUG("bonding extension not supported with GuC 
submission");
+   return -ENODEV;
+   }
+
if (get_user(num_bonds, >num_bonds))
return -EFAULT;
 
-- 
2.28.0



[PATCH 07/51] drm/i915/guc: Insert fence on context when deregistering

2021-07-16 Thread Matthew Brost
Sometimes during context pinning a context with the same guc_id is
registered with the GuC. In this a case deregister must be done before
the context can be registered. A fence is inserted on all requests while
the deregister is in flight. Once the G2H is received indicating the
deregistration is complete the context is registered and the fence is
released.

v2:
 (John H)
  - Fix commit message

Cc: John Harrison 
Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/intel_context.c   |  1 +
 drivers/gpu/drm/i915/gt/intel_context_types.h |  5 ++
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 51 ++-
 drivers/gpu/drm/i915/i915_request.h   |  8 +++
 4 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 32fd6647154b..ad7197c5910f 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -385,6 +385,7 @@ intel_context_init(struct intel_context *ce, struct 
intel_engine_cs *engine)
mutex_init(>pin_mutex);
 
spin_lock_init(>guc_state.lock);
+   INIT_LIST_HEAD(>guc_state.fences);
 
ce->guc_id = GUC_INVALID_LRC_ID;
INIT_LIST_HEAD(>guc_id_link);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 606c480aec26..e0e3a937f709 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -147,6 +147,11 @@ struct intel_context {
 * submission
 */
u8 sched_state;
+   /*
+* fences: maintains of list of requests that have a submit
+* fence related to GuC submission
+*/
+   struct list_head fences;
} guc_state;
 
/* GuC scheduling state flags that do not require a lock. */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index a47b3813b4d0..a438aecfe93f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -926,6 +926,30 @@ static const struct intel_context_ops guc_context_ops = {
.destroy = guc_context_destroy,
 };
 
+static void __guc_signal_context_fence(struct intel_context *ce)
+{
+   struct i915_request *rq;
+
+   lockdep_assert_held(>guc_state.lock);
+
+   list_for_each_entry(rq, >guc_state.fences, guc_fence_link)
+   i915_sw_fence_complete(>submit);
+
+   INIT_LIST_HEAD(>guc_state.fences);
+}
+
+static void guc_signal_context_fence(struct intel_context *ce)
+{
+   unsigned long flags;
+
+   GEM_BUG_ON(!context_wait_for_deregister_to_register(ce));
+
+   spin_lock_irqsave(>guc_state.lock, flags);
+   clr_context_wait_for_deregister_to_register(ce);
+   __guc_signal_context_fence(ce);
+   spin_unlock_irqrestore(>guc_state.lock, flags);
+}
+
 static bool context_needs_register(struct intel_context *ce, bool new_guc_id)
 {
return new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, >flags) ||
@@ -936,6 +960,7 @@ static int guc_request_alloc(struct i915_request *rq)
 {
struct intel_context *ce = rq->context;
struct intel_guc *guc = ce_to_guc(ce);
+   unsigned long flags;
int ret;
 
GEM_BUG_ON(!intel_context_is_pinned(rq->context));
@@ -980,7 +1005,7 @@ static int guc_request_alloc(struct i915_request *rq)
 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id.
 */
if (atomic_add_unless(>guc_id_ref, 1, 0))
-   return 0;
+   goto out;
 
ret = pin_guc_id(guc, ce);  /* returns 1 if new guc_id assigned */
if (unlikely(ret < 0))
@@ -996,6 +1021,28 @@ static int guc_request_alloc(struct i915_request *rq)
 
clear_bit(CONTEXT_LRCA_DIRTY, >flags);
 
+out:
+   /*
+* We block all requests on this context if a G2H is pending for a
+* context deregistration as the GuC will fail a context registration
+* while this G2H is pending. Once a G2H returns, the fence is released
+* that is blocking these requests (see guc_signal_context_fence).
+*
+* We can safely check the below field outside of the lock as it isn't
+* possible for this field to transition from being clear to set but
+* converse is possible, hence the need for the check within the lock.
+*/
+   if (likely(!context_wait_for_deregister_to_register(ce)))
+   return 0;
+
+   spin_lock_irqsave(>guc_state.lock, flags);
+   if (context_wait_for_deregister_to_register(ce)) {
+   i915_sw_fence_await(>submit);
+
+   list_add_tail(>guc_fence_link, >guc_state.fences);
+   }
+   spin_unlock_irqrestore(>guc_state.lock, flags);
+
return 0;
 }
 
@@ 

[PATCH 24/51] drm/i915: Add i915_sched_engine destroy vfunc

2021-07-16 Thread Matthew Brost
This help the backends clean up when the schedule engine object gets
destroyed.

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/i915_scheduler.c   | 3 ++-
 drivers/gpu/drm/i915/i915_scheduler.h   | 4 +---
 drivers/gpu/drm/i915/i915_scheduler_types.h | 5 +
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index 3a58a9130309..4fceda96deed 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -431,7 +431,7 @@ void i915_request_show_with_schedule(struct drm_printer *m,
rcu_read_unlock();
 }
 
-void i915_sched_engine_free(struct kref *kref)
+static void default_destroy(struct kref *kref)
 {
struct i915_sched_engine *sched_engine =
container_of(kref, typeof(*sched_engine), ref);
@@ -453,6 +453,7 @@ i915_sched_engine_create(unsigned int subclass)
 
sched_engine->queue = RB_ROOT_CACHED;
sched_engine->queue_priority_hint = INT_MIN;
+   sched_engine->destroy = default_destroy;
 
INIT_LIST_HEAD(_engine->requests);
INIT_LIST_HEAD(_engine->hold);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h 
b/drivers/gpu/drm/i915/i915_scheduler.h
index 650ab8e0db9f..3c9504e9f409 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -51,8 +51,6 @@ static inline void i915_priolist_free(struct i915_priolist *p)
 struct i915_sched_engine *
 i915_sched_engine_create(unsigned int subclass);
 
-void i915_sched_engine_free(struct kref *kref);
-
 static inline struct i915_sched_engine *
 i915_sched_engine_get(struct i915_sched_engine *sched_engine)
 {
@@ -63,7 +61,7 @@ i915_sched_engine_get(struct i915_sched_engine *sched_engine)
 static inline void
 i915_sched_engine_put(struct i915_sched_engine *sched_engine)
 {
-   kref_put(_engine->ref, i915_sched_engine_free);
+   kref_put(_engine->ref, sched_engine->destroy);
 }
 
 static inline bool
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h 
b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 5935c3152bdc..00384e2c5273 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -163,6 +163,11 @@ struct i915_sched_engine {
 */
void *private_data;
 
+   /**
+* @destroy: destroy schedule engine / cleanup in backend
+*/
+   void(*destroy)(struct kref *kref);
+
/**
 * @kick_backend: kick backend after a request's priority has changed
 */
-- 
2.28.0



[PATCH 10/51] drm/i915/guc: Extend deregistration fence to schedule disable

2021-07-16 Thread Matthew Brost
Extend the deregistration context fence to fence whne a GuC context has
scheduling disable pending.

v2:
 (John H)
  - Update comment why we check the pin count within spin lock

Cc: John Harrison 
Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 40 +++
 1 file changed, 33 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 5519c988a6ca..9dc1a256e185 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -922,7 +922,22 @@ static void guc_context_sched_disable(struct intel_context 
*ce)
goto unpin;
 
spin_lock_irqsave(>guc_state.lock, flags);
+
+   /*
+* We have to check if the context has been pinned again as another pin
+* operation is allowed to pass this function. Checking the pin count,
+* within ce->guc_state.lock, synchronizes this function with
+* guc_request_alloc ensuring a request doesn't slip through the
+* 'context_pending_disable' fence. Checking within the spin lock (can't
+* sleep) ensures another process doesn't pin this context and generate
+* a request before we set the 'context_pending_disable' flag here.
+*/
+   if (unlikely(atomic_add_unless(>pin_count, -2, 2))) {
+   spin_unlock_irqrestore(>guc_state.lock, flags);
+   return;
+   }
guc_id = prep_context_pending_disable(ce);
+
spin_unlock_irqrestore(>guc_state.lock, flags);
 
with_intel_runtime_pm(runtime_pm, wakeref)
@@ -1127,19 +1142,22 @@ static int guc_request_alloc(struct i915_request *rq)
 out:
/*
 * We block all requests on this context if a G2H is pending for a
-* context deregistration as the GuC will fail a context registration
-* while this G2H is pending. Once a G2H returns, the fence is released
-* that is blocking these requests (see guc_signal_context_fence).
+* schedule disable or context deregistration as the GuC will fail a
+* schedule enable or context registration if either G2H is pending
+* respectfully. Once a G2H returns, the fence is released that is
+* blocking these requests (see guc_signal_context_fence).
 *
-* We can safely check the below field outside of the lock as it isn't
-* possible for this field to transition from being clear to set but
+* We can safely check the below fields outside of the lock as it isn't
+* possible for these fields to transition from being clear to set but
 * converse is possible, hence the need for the check within the lock.
 */
-   if (likely(!context_wait_for_deregister_to_register(ce)))
+   if (likely(!context_wait_for_deregister_to_register(ce) &&
+  !context_pending_disable(ce)))
return 0;
 
spin_lock_irqsave(>guc_state.lock, flags);
-   if (context_wait_for_deregister_to_register(ce)) {
+   if (context_wait_for_deregister_to_register(ce) ||
+   context_pending_disable(ce)) {
i915_sw_fence_await(>submit);
 
list_add_tail(>guc_fence_link, >guc_state.fences);
@@ -1488,10 +1506,18 @@ int intel_guc_sched_done_process_msg(struct intel_guc 
*guc,
if (context_pending_enable(ce)) {
clr_context_pending_enable(ce);
} else if (context_pending_disable(ce)) {
+   /*
+* Unpin must be done before __guc_signal_context_fence,
+* otherwise a race exists between the requests getting
+* submitted + retired before this unpin completes resulting in
+* the pin_count going to zero and the context still being
+* enabled.
+*/
intel_context_sched_disable_unpin(ce);
 
spin_lock_irqsave(>guc_state.lock, flags);
clr_context_pending_disable(ce);
+   __guc_signal_context_fence(ce);
spin_unlock_irqrestore(>guc_state.lock, flags);
}
 
-- 
2.28.0



[PATCH 38/51] drm/i915/guc: Hook GuC scheduling policies up

2021-07-16 Thread Matthew Brost
From: John Harrison 

Use the official driver default scheduling policies for configuring
the GuC scheduler rather than a bunch of hardcoded values.

v2:
 (Matthew Brost)
  - Move I915_ENGINE_WANT_FORCED_PREEMPTION to later patch

Signed-off-by: John Harrison 
Signed-off-by: Matthew Brost 
Reviewed-by: Matthew Brost 
Cc: Jose Souza 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|  2 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c| 44 ++-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  8 ++--
 3 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 62187c3dcda9..bc71635c70b9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -270,6 +270,8 @@ int intel_guc_engine_failure_process_msg(struct intel_guc 
*guc,
 
 void intel_guc_find_hung_context(struct intel_engine_cs *engine);
 
+int intel_guc_global_policies_update(struct intel_guc *guc);
+
 void intel_guc_submission_reset_prepare(struct intel_guc *guc);
 void intel_guc_submission_reset(struct intel_guc *guc, bool stalled);
 void intel_guc_submission_reset_finish(struct intel_guc *guc);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index d3e86ab7508f..2ad5fcd4e1b7 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -77,14 +77,54 @@ static u32 guc_ads_blob_size(struct intel_guc *guc)
   guc_ads_private_data_size(guc);
 }
 
-static void guc_policies_init(struct guc_policies *policies)
+static void guc_policies_init(struct intel_guc *guc, struct guc_policies 
*policies)
 {
+   struct intel_gt *gt = guc_to_gt(guc);
+   struct drm_i915_private *i915 = gt->i915;
+
policies->dpc_promote_time = GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US;
policies->max_num_work_items = GLOBAL_POLICY_MAX_NUM_WI;
+
policies->global_flags = 0;
+   if (i915->params.reset < 2)
+   policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
+
policies->is_valid = 1;
 }
 
+static int guc_action_policies_update(struct intel_guc *guc, u32 policy_offset)
+{
+   u32 action[] = {
+   INTEL_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE,
+   policy_offset
+   };
+
+   return intel_guc_send(guc, action, ARRAY_SIZE(action));
+}
+
+int intel_guc_global_policies_update(struct intel_guc *guc)
+{
+   struct __guc_ads_blob *blob = guc->ads_blob;
+   struct intel_gt *gt = guc_to_gt(guc);
+   intel_wakeref_t wakeref;
+   int ret;
+
+   if (!blob)
+   return -ENOTSUPP;
+
+   GEM_BUG_ON(!blob->ads.scheduler_policies);
+
+   guc_policies_init(guc, >policies);
+
+   if (!intel_guc_is_ready(guc))
+   return 0;
+
+   with_intel_runtime_pm(>i915->runtime_pm, wakeref)
+   ret = guc_action_policies_update(guc, 
blob->ads.scheduler_policies);
+
+   return ret;
+}
+
 static void guc_mapping_table_init(struct intel_gt *gt,
   struct guc_gt_system_info *system_info)
 {
@@ -281,7 +321,7 @@ static void __guc_ads_init(struct intel_guc *guc)
u8 engine_class, guc_class;
 
/* GuC scheduling policies */
-   guc_policies_init(>policies);
+   guc_policies_init(guc, >policies);
 
/*
 * GuC expects a per-engine-class context image and size
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index c8e1fc80f58e..6536bd6807a0 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -870,6 +870,7 @@ void intel_guc_submission_reset_finish(struct intel_guc 
*guc)
GEM_WARN_ON(atomic_read(>outstanding_submission_g2h));
atomic_set(>outstanding_submission_g2h, 0);
 
+   intel_guc_global_policies_update(guc);
enable_submission(guc);
intel_gt_unpark_heartbeats(guc_to_gt(guc));
 }
@@ -1160,8 +1161,9 @@ static void guc_context_policy_init(struct 
intel_engine_cs *engine,
 {
desc->policy_flags = 0;
 
-   desc->execution_quantum = CONTEXT_POLICY_DEFAULT_EXECUTION_QUANTUM_US;
-   desc->preemption_timeout = CONTEXT_POLICY_DEFAULT_PREEMPTION_TIME_US;
+   /* NB: For both of these, zero means disabled. */
+   desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
+   desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
 }
 
 static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
@@ -1937,13 +1939,13 @@ static void guc_default_vfuncs(struct intel_engine_cs 
*engine)
engine->set_default_submission = guc_set_default_submission;
 
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
+   engine->flags |= I915_ENGINE_HAS_TIMESLICES;
 
/*
 * TODO: GuC supports timeslicing and semaphores 

[PATCH 48/51] drm/i915/selftest: Fix hangcheck self test for GuC submission

2021-07-16 Thread Matthew Brost
From: John Harrison 

When GuC submission is enabled, the GuC controls engine resets. Rather
than explicitly triggering a reset, the driver must submit a hanging
context to GuC and wait for the reset to occur.

Conversely, one of the tests specifically sends hanging batches to the
engines but wants them to sit around until a manual reset of the full
GT (including GuC itself). That means disabling GuC based engine
resets to prevent those from killing the hanging batch too soon. So,
add support to the scheduling policy helper for disabling resets as
well as making them quicker!

In GuC submission mode, the 'is engine idle' test basically turns into
'is engine PM wakelock held'. Independently, there is a heartbeat
disable helper function that the tests use. For unexplained reasons,
this acquires the engine wakelock before disabling the heartbeat and
only releases it when re-enabling the heartbeat. As one of the tests
tries to do a wait for idle in the middle of a heartbeat disabled
section, it is therefore guaranteed to always fail. Added a 'no_pm'
variant of the heartbeat helper that allows the engine to be asleep
while also having heartbeats disabled.

Signed-off-by: John Harrison 
Signed-off-by: Matthew Brost 
Cc: Daniele Ceraolo Spurio 
Cc: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   1 +
 .../drm/i915/gt/selftest_engine_heartbeat.c   |  22 ++
 .../drm/i915/gt/selftest_engine_heartbeat.h   |   2 +
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  | 223 +-
 drivers/gpu/drm/i915/gt/selftest_mocs.c   |   3 +-
 .../gpu/drm/i915/gt/selftest_workarounds.c|   6 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |   3 +
 .../i915/selftests/intel_scheduler_helpers.c  |  39 ++-
 .../i915/selftests/intel_scheduler_helpers.h  |   9 +-
 9 files changed, 237 insertions(+), 71 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index d66b732a91c2..eec57e57403f 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -449,6 +449,7 @@ struct intel_engine_cs {
 #define I915_ENGINE_IS_VIRTUAL   BIT(5)
 #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
 #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
+#define I915_ENGINE_WANT_FORCED_PREEMPTION BIT(8)
unsigned int flags;
 
/*
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c 
b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index 4896e4ccad50..317eebf086c3 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -405,3 +405,25 @@ void st_engine_heartbeat_enable(struct intel_engine_cs 
*engine)
engine->props.heartbeat_interval_ms =
engine->defaults.heartbeat_interval_ms;
 }
+
+void st_engine_heartbeat_disable_no_pm(struct intel_engine_cs *engine)
+{
+   engine->props.heartbeat_interval_ms = 0;
+
+   /*
+* Park the heartbeat but without holding the PM lock as that
+* makes the engines appear not-idle. Note that if/when unpark
+* is called due to the PM lock being acquired later the
+* heartbeat still won't be enabled because of the above = 0.
+*/
+   if (intel_engine_pm_get_if_awake(engine)) {
+   intel_engine_park_heartbeat(engine);
+   intel_engine_pm_put(engine);
+   }
+}
+
+void st_engine_heartbeat_enable_no_pm(struct intel_engine_cs *engine)
+{
+   engine->props.heartbeat_interval_ms =
+   engine->defaults.heartbeat_interval_ms;
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h 
b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h
index cd27113d5400..81da2cd8e406 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h
@@ -9,6 +9,8 @@
 struct intel_engine_cs;
 
 void st_engine_heartbeat_disable(struct intel_engine_cs *engine);
+void st_engine_heartbeat_disable_no_pm(struct intel_engine_cs *engine);
 void st_engine_heartbeat_enable(struct intel_engine_cs *engine);
+void st_engine_heartbeat_enable_no_pm(struct intel_engine_cs *engine);
 
 #endif /* SELFTEST_ENGINE_HEARTBEAT_H */
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c 
b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 0ed87cc4d063..971c0c249eb0 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -17,6 +17,8 @@
 #include "selftests/igt_flush_test.h"
 #include "selftests/igt_reset.h"
 #include "selftests/igt_atomic.h"
+#include "selftests/igt_spinner.h"
+#include "selftests/intel_scheduler_helpers.h"
 
 #include "selftests/mock_drm.h"
 
@@ -449,6 +451,14 @@ static int igt_reset_nop_engine(void *arg)
IGT_TIMEOUT(end_time);
int err;
 
+   if (intel_engine_uses_guc(engine)) {
+   /* Engine level resets are triggered by 

[PATCH 42/51] drm/i915/guc: Implement banned contexts for GuC submission

2021-07-16 Thread Matthew Brost
When using GuC submission, if a context gets banned disable scheduling
and mark all inflight requests as complete.

Cc: John Harrison 
Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |   2 +-
 drivers/gpu/drm/i915/gt/intel_context.h   |  13 ++
 drivers/gpu/drm/i915/gt/intel_context_types.h |   2 +
 drivers/gpu/drm/i915/gt/intel_reset.c |  32 +---
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  20 +++
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|   2 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 151 --
 drivers/gpu/drm/i915/i915_trace.h |  10 ++
 8 files changed, 195 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 28c62f7ccfc7..d87a4c6da5bc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1084,7 +1084,7 @@ static void kill_engines(struct i915_gem_engines 
*engines, bool ban)
for_each_gem_engine(ce, engines, it) {
struct intel_engine_cs *engine;
 
-   if (ban && intel_context_set_banned(ce))
+   if (ban && intel_context_ban(ce, NULL))
continue;
 
/*
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h 
b/drivers/gpu/drm/i915/gt/intel_context.h
index 2ed9bf5f91a5..814d9277096a 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -16,6 +16,7 @@
 #include "intel_engine_types.h"
 #include "intel_ring_types.h"
 #include "intel_timeline_types.h"
+#include "i915_trace.h"
 
 #define CE_TRACE(ce, fmt, ...) do {\
const struct intel_context *ce__ = (ce);\
@@ -243,6 +244,18 @@ static inline bool intel_context_set_banned(struct 
intel_context *ce)
return test_and_set_bit(CONTEXT_BANNED, >flags);
 }
 
+static inline bool intel_context_ban(struct intel_context *ce,
+struct i915_request *rq)
+{
+   bool ret = intel_context_set_banned(ce);
+
+   trace_intel_context_ban(ce);
+   if (ce->ops->ban)
+   ce->ops->ban(ce, rq);
+
+   return ret;
+}
+
 static inline bool
 intel_context_force_single_submission(const struct intel_context *ce)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 035108c10b2c..57c19ee3e313 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -35,6 +35,8 @@ struct intel_context_ops {
 
int (*alloc)(struct intel_context *ce);
 
+   void (*ban)(struct intel_context *ce, struct i915_request *rq);
+
int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, 
void **vaddr);
int (*pin)(struct intel_context *ce, void *vaddr);
void (*unpin)(struct intel_context *ce);
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c 
b/drivers/gpu/drm/i915/gt/intel_reset.c
index f3cdbf4ba5c8..3ed694cab5af 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -22,7 +22,6 @@
 #include "intel_reset.h"
 
 #include "uc/intel_guc.h"
-#include "uc/intel_guc_submission.h"
 
 #define RESET_MAX_RETRIES 3
 
@@ -39,21 +38,6 @@ static void rmw_clear_fw(struct intel_uncore *uncore, 
i915_reg_t reg, u32 clr)
intel_uncore_rmw_fw(uncore, reg, clr, 0);
 }
 
-static void skip_context(struct i915_request *rq)
-{
-   struct intel_context *hung_ctx = rq->context;
-
-   list_for_each_entry_from_rcu(rq, _ctx->timeline->requests, link) {
-   if (!i915_request_is_active(rq))
-   return;
-
-   if (rq->context == hung_ctx) {
-   i915_request_set_error_once(rq, -EIO);
-   __i915_request_skip(rq);
-   }
-   }
-}
-
 static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
 {
struct drm_i915_file_private *file_priv = ctx->file_priv;
@@ -88,10 +72,8 @@ static bool mark_guilty(struct i915_request *rq)
bool banned;
int i;
 
-   if (intel_context_is_closed(rq->context)) {
-   intel_context_set_banned(rq->context);
+   if (intel_context_is_closed(rq->context))
return true;
-   }
 
rcu_read_lock();
ctx = rcu_dereference(rq->context->gem_context);
@@ -123,11 +105,9 @@ static bool mark_guilty(struct i915_request *rq)
banned = !i915_gem_context_is_recoverable(ctx);
if (time_before(jiffies, prev_hang + CONTEXT_FAST_HANG_JIFFIES))
banned = true;
-   if (banned) {
+   if (banned)
drm_dbg(>i915->drm, "context %s: guilty %d, banned\n",
ctx->name, atomic_read(>guilty_count));
-   intel_context_set_banned(rq->context);
-   }
 

[PATCH 09/51] drm/i915/guc: Disable engine barriers with GuC during unpin

2021-07-16 Thread Matthew Brost
Disable engine barriers for unpinning with GuC. This feature isn't
needed with the GuC as it disables context scheduling before unpinning
which guarantees the HW will not reference the context. Hence it is
not necessary to defer unpinning until a kernel context request
completes on each engine in the context engine mask.

Cc: John Harrison 
Signed-off-by: Matthew Brost 
Signed-off-by: Daniele Ceraolo Spurio 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/intel_context.c|  2 +-
 drivers/gpu/drm/i915/gt/selftest_context.c | 10 ++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 3d5b4116617f..91349d071e0e 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -80,7 +80,7 @@ static int intel_context_active_acquire(struct intel_context 
*ce)
 
__i915_active_acquire(>active);
 
-   if (intel_context_is_barrier(ce))
+   if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine))
return 0;
 
/* Preallocate tracking nodes */
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c 
b/drivers/gpu/drm/i915/gt/selftest_context.c
index 26685b927169..fa7b99a671dd 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -209,7 +209,13 @@ static int __live_active_context(struct intel_engine_cs 
*engine)
 * This test makes sure that the context is kept alive until a
 * subsequent idle-barrier (emitted when the engine wakeref hits 0
 * with no more outstanding requests).
+*
+* In GuC submission mode we don't use idle barriers and we instead
+* get a message from the GuC to signal that it is safe to unpin the
+* context from memory.
 */
+   if (intel_engine_uses_guc(engine))
+   return 0;
 
if (intel_engine_pm_is_awake(engine)) {
pr_err("%s is awake before starting %s!\n",
@@ -357,7 +363,11 @@ static int __live_remote_context(struct intel_engine_cs 
*engine)
 * on the context image remotely (intel_context_prepare_remote_request),
 * which inserts foreign fences into intel_context.active, does not
 * clobber the idle-barrier.
+*
+* In GuC submission mode we don't use idle barriers.
 */
+   if (intel_engine_uses_guc(engine))
+   return 0;
 
if (intel_engine_pm_is_awake(engine)) {
pr_err("%s is awake before starting %s!\n",
-- 
2.28.0



[PATCH 28/51] drm/i915/guc: Add disable interrupts to guc sanitize

2021-07-16 Thread Matthew Brost
Add disable GuC interrupts to intel_guc_sanitize(). Part of this
requires moving the guc_*_interrupt wrapper function into header file
intel_guc.h.

Signed-off-by: Matthew Brost 
Cc: Daniele Ceraolo Spurio 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc.h | 16 
 drivers/gpu/drm/i915/gt/uc/intel_uc.c  | 21 +++--
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index d75a76882a44..b3cfc52fe0bc 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -217,9 +217,25 @@ static inline bool intel_guc_is_ready(struct intel_guc 
*guc)
return intel_guc_is_fw_running(guc) && intel_guc_ct_enabled(>ct);
 }
 
+static inline void intel_guc_reset_interrupts(struct intel_guc *guc)
+{
+   guc->interrupts.reset(guc);
+}
+
+static inline void intel_guc_enable_interrupts(struct intel_guc *guc)
+{
+   guc->interrupts.enable(guc);
+}
+
+static inline void intel_guc_disable_interrupts(struct intel_guc *guc)
+{
+   guc->interrupts.disable(guc);
+}
+
 static inline int intel_guc_sanitize(struct intel_guc *guc)
 {
intel_uc_fw_sanitize(>fw);
+   intel_guc_disable_interrupts(guc);
intel_guc_ct_sanitize(>ct);
guc->mmio_msg = 0;
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index f0b02200aa01..ab11fe731ee7 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -207,21 +207,6 @@ static void guc_handle_mmio_msg(struct intel_guc *guc)
spin_unlock_irq(>irq_lock);
 }
 
-static void guc_reset_interrupts(struct intel_guc *guc)
-{
-   guc->interrupts.reset(guc);
-}
-
-static void guc_enable_interrupts(struct intel_guc *guc)
-{
-   guc->interrupts.enable(guc);
-}
-
-static void guc_disable_interrupts(struct intel_guc *guc)
-{
-   guc->interrupts.disable(guc);
-}
-
 static int guc_enable_communication(struct intel_guc *guc)
 {
struct intel_gt *gt = guc_to_gt(guc);
@@ -242,7 +227,7 @@ static int guc_enable_communication(struct intel_guc *guc)
guc_get_mmio_msg(guc);
guc_handle_mmio_msg(guc);
 
-   guc_enable_interrupts(guc);
+   intel_guc_enable_interrupts(guc);
 
/* check for CT messages received before we enabled interrupts */
spin_lock_irq(>irq_lock);
@@ -265,7 +250,7 @@ static void guc_disable_communication(struct intel_guc *guc)
 */
guc_clear_mmio_msg(guc);
 
-   guc_disable_interrupts(guc);
+   intel_guc_disable_interrupts(guc);
 
intel_guc_ct_disable(>ct);
 
@@ -463,7 +448,7 @@ static int __uc_init_hw(struct intel_uc *uc)
if (ret)
goto err_out;
 
-   guc_reset_interrupts(guc);
+   intel_guc_reset_interrupts(guc);
 
/* WaEnableuKernelHeaderValidFix:skl */
/* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */
-- 
2.28.0



[PATCH 08/51] drm/i915/guc: Defer context unpin until scheduling is disabled

2021-07-16 Thread Matthew Brost
With GuC scheduling, it isn't safe to unpin a context while scheduling
is enabled for that context as the GuC may touch some of the pinned
state (e.g. LRC). To ensure scheduling isn't enabled when an unpin is
done, a call back is added to intel_context_unpin when pin count == 1
to disable scheduling for that context. When the response CTB is
received it is safe to do the final unpin.

Future patches may add a heuristic / delay to schedule the disable
call back to avoid thrashing on schedule enable / disable.

Cc: John Harrison 
Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/intel_context.c   |   4 +-
 drivers/gpu/drm/i915/gt/intel_context.h   |  27 +++-
 drivers/gpu/drm/i915/gt/intel_context_types.h |   2 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|   2 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |   3 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 147 +-
 6 files changed, 181 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index ad7197c5910f..3d5b4116617f 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -306,9 +306,9 @@ int __intel_context_do_pin(struct intel_context *ce)
return err;
 }
 
-void intel_context_unpin(struct intel_context *ce)
+void __intel_context_do_unpin(struct intel_context *ce, int sub)
 {
-   if (!atomic_dec_and_test(>pin_count))
+   if (!atomic_sub_and_test(sub, >pin_count))
return;
 
CE_TRACE(ce, "unpin\n");
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h 
b/drivers/gpu/drm/i915/gt/intel_context.h
index b10cbe8fee99..974ef85320c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -113,7 +113,32 @@ static inline void __intel_context_pin(struct 
intel_context *ce)
atomic_inc(>pin_count);
 }
 
-void intel_context_unpin(struct intel_context *ce);
+void __intel_context_do_unpin(struct intel_context *ce, int sub);
+
+static inline void intel_context_sched_disable_unpin(struct intel_context *ce)
+{
+   __intel_context_do_unpin(ce, 2);
+}
+
+static inline void intel_context_unpin(struct intel_context *ce)
+{
+   if (!ce->ops->sched_disable) {
+   __intel_context_do_unpin(ce, 1);
+   } else {
+   /*
+* Move ownership of this pin to the scheduling disable which is
+* an async operation. When that operation completes the above
+* intel_context_sched_disable_unpin is called potentially
+* unpinning the context.
+*/
+   while (!atomic_add_unless(>pin_count, -1, 1)) {
+   if (atomic_cmpxchg(>pin_count, 1, 2) == 1) {
+   ce->ops->sched_disable(ce);
+   break;
+   }
+   }
+   }
+}
 
 void intel_context_enter_engine(struct intel_context *ce);
 void intel_context_exit_engine(struct intel_context *ce);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index e0e3a937f709..4a5518d295c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -43,6 +43,8 @@ struct intel_context_ops {
void (*enter)(struct intel_context *ce);
void (*exit)(struct intel_context *ce);
 
+   void (*sched_disable)(struct intel_context *ce);
+
void (*reset)(struct intel_context *ce);
void (*destroy)(struct kref *kref);
 };
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 30773cd699f5..03b7222b04a2 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -241,6 +241,8 @@ int intel_guc_reset_engine(struct intel_guc *guc,
 
 int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
  const u32 *msg, u32 len);
+int intel_guc_sched_done_process_msg(struct intel_guc *guc,
+const u32 *msg, u32 len);
 
 void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p);
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 28ff82c5be45..019b25ff1888 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -932,6 +932,9 @@ static int ct_process_request(struct intel_guc_ct *ct, 
struct ct_incoming_msg *r
ret = intel_guc_deregister_done_process_msg(guc, payload,
len);
break;
+   case INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
+   ret = intel_guc_sched_done_process_msg(guc, payload, len);
+   break;
default:
ret = -EOPNOTSUPP;
  

[PATCH 02/51] drm/i915/guc: Remove GuC stage descriptor, add LRC descriptor

2021-07-16 Thread Matthew Brost
Remove old GuC stage descriptor, add LRC descriptor which will be used
by the new GuC interface implemented in this patch series.

v2:
 (John Harrison)
  - s/lrc/LRC/g

Cc: John Harrison 
Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   | 65 -
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 72 ++-
 3 files changed, 25 insertions(+), 116 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 72e4653222e2..2625d2d5959f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -43,8 +43,8 @@ struct intel_guc {
struct i915_vma *ads_vma;
struct __guc_ads_blob *ads_blob;
 
-   struct i915_vma *stage_desc_pool;
-   void *stage_desc_pool_vaddr;
+   struct i915_vma *lrc_desc_pool;
+   void *lrc_desc_pool_vaddr;
 
/* Control params for fw initialization */
u32 params[GUC_CTL_MAX_DWORDS];
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 28245a217a39..4e4edc368b77 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -26,9 +26,6 @@
 #define GUC_CLIENT_PRIORITY_NORMAL 3
 #define GUC_CLIENT_PRIORITY_NUM4
 
-#define GUC_MAX_STAGE_DESCRIPTORS  1024
-#defineGUC_INVALID_STAGE_IDGUC_MAX_STAGE_DESCRIPTORS
-
 #define GUC_MAX_LRC_DESCRIPTORS65535
 #defineGUC_INVALID_LRC_ID  GUC_MAX_LRC_DESCRIPTORS
 
@@ -181,68 +178,6 @@ struct guc_process_desc {
u32 reserved[30];
 } __packed;
 
-/* engine id and context id is packed into guc_execlist_context.context_id*/
-#define GUC_ELC_CTXID_OFFSET   0
-#define GUC_ELC_ENGINE_OFFSET  29
-
-/* The execlist context including software and HW information */
-struct guc_execlist_context {
-   u32 context_desc;
-   u32 context_id;
-   u32 ring_status;
-   u32 ring_lrca;
-   u32 ring_begin;
-   u32 ring_end;
-   u32 ring_next_free_location;
-   u32 ring_current_tail_pointer_value;
-   u8 engine_state_submit_value;
-   u8 engine_state_wait_value;
-   u16 pagefault_count;
-   u16 engine_submit_queue_count;
-} __packed;
-
-/*
- * This structure describes a stage set arranged for a particular communication
- * between uKernel (GuC) and Driver (KMD). Technically, this is known as a
- * "GuC Context descriptor" in the specs, but we use the term "stage 
descriptor"
- * to avoid confusion with all the other things already named "context" in the
- * driver. A static pool of these descriptors are stored inside a GEM object
- * (stage_desc_pool) which is held for the entire lifetime of our interaction
- * with the GuC, being allocated before the GuC is loaded with its firmware.
- */
-struct guc_stage_desc {
-   u32 sched_common_area;
-   u32 stage_id;
-   u32 pas_id;
-   u8 engines_used;
-   u64 db_trigger_cpu;
-   u32 db_trigger_uk;
-   u64 db_trigger_phy;
-   u16 db_id;
-
-   struct guc_execlist_context lrc[GUC_MAX_ENGINES_NUM];
-
-   u8 attribute;
-
-   u32 priority;
-
-   u32 wq_sampled_tail_offset;
-   u32 wq_total_submit_enqueues;
-
-   u32 process_desc;
-   u32 wq_addr;
-   u32 wq_size;
-
-   u32 engine_presence;
-
-   u8 engine_suspended;
-
-   u8 reserved0[3];
-   u64 reserved1[1];
-
-   u64 desc_private;
-} __packed;
-
 #define CONTEXT_REGISTRATION_FLAG_KMD  BIT(0)
 
 #define CONTEXT_POLICY_DEFAULT_EXECUTION_QUANTUM_US 100
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index e9c237b18692..a366890fb840 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -65,57 +65,35 @@ static inline struct i915_priolist *to_priolist(struct 
rb_node *rb)
return rb_entry(rb, struct i915_priolist, node);
 }
 
-static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id)
+/* Future patches will use this function */
+__attribute__ ((unused))
+static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc *guc, u32 index)
 {
-   struct guc_stage_desc *base = guc->stage_desc_pool_vaddr;
+   struct guc_lrc_desc *base = guc->lrc_desc_pool_vaddr;
 
-   return [id];
-}
-
-static int guc_stage_desc_pool_create(struct intel_guc *guc)
-{
-   u32 size = PAGE_ALIGN(sizeof(struct guc_stage_desc) *
- GUC_MAX_STAGE_DESCRIPTORS);
+   GEM_BUG_ON(index >= GUC_MAX_LRC_DESCRIPTORS);
 
-   return intel_guc_allocate_and_map_vma(guc, size, >stage_desc_pool,
- >stage_desc_pool_vaddr);
+   return [index];
 }
 
-static void 

[PATCH 26/51] drm/i915/guc: Reset implementation for new GuC interface

2021-07-16 Thread Matthew Brost
Reset implementation for new GuC interface. This is the legacy reset
implementation which is called when the i915 owns the engine hang check.
Future patches will offload the engine hang check to GuC but we will
continue to maintain this legacy path as a fallback and this code path
is also required if the GuC dies.

With the new GuC interface it is not possible to reset individual
engines - it is only possible to reset the GPU entirely. This patch
forces an entire chip reset if any engine hangs.

v2:
 (Michal)
  - Check for -EPIPE rather than -EIO (CT deadlock/corrupt check)
v3:
 (John H)
  - Split into a series of smaller patches

Cc: John Harrison 
Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/intel_gt_pm.c |   6 +-
 drivers/gpu/drm/i915/gt/intel_reset.c |  18 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc.c|  13 -
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|   8 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 562 ++
 drivers/gpu/drm/i915/gt/uc/intel_uc.c |  39 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc.h |   3 +
 7 files changed, 515 insertions(+), 134 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c 
b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index aef3084e8b16..463a6ae605a0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -174,8 +174,6 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
if (intel_gt_is_wedged(gt))
intel_gt_unset_wedged(gt);
 
-   intel_uc_sanitize(>uc);
-
for_each_engine(engine, gt, id)
if (engine->reset.prepare)
engine->reset.prepare(engine);
@@ -191,6 +189,8 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
__intel_engine_reset(engine, false);
}
 
+   intel_uc_reset(>uc, false);
+
for_each_engine(engine, gt, id)
if (engine->reset.finish)
engine->reset.finish(engine);
@@ -243,6 +243,8 @@ int intel_gt_resume(struct intel_gt *gt)
goto err_wedged;
}
 
+   intel_uc_reset_finish(>uc);
+
intel_rps_enable(>rps);
intel_llc_enable(>llc);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c 
b/drivers/gpu/drm/i915/gt/intel_reset.c
index 72251638d4ea..2987282dff6d 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -826,6 +826,8 @@ static int gt_reset(struct intel_gt *gt, 
intel_engine_mask_t stalled_mask)
__intel_engine_reset(engine, stalled_mask & engine->mask);
local_bh_enable();
 
+   intel_uc_reset(>uc, true);
+
intel_ggtt_restore_fences(gt->ggtt);
 
return err;
@@ -850,6 +852,8 @@ static void reset_finish(struct intel_gt *gt, 
intel_engine_mask_t awake)
if (awake & engine->mask)
intel_engine_pm_put(engine);
}
+
+   intel_uc_reset_finish(>uc);
 }
 
 static void nop_submit_request(struct i915_request *request)
@@ -903,6 +907,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
for_each_engine(engine, gt, id)
if (engine->reset.cancel)
engine->reset.cancel(engine);
+   intel_uc_cancel_requests(>uc);
local_bh_enable();
 
reset_finish(gt, awake);
@@ -1191,6 +1196,9 @@ int __intel_engine_reset_bh(struct intel_engine_cs 
*engine, const char *msg)
ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags);
GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, >reset.flags));
 
+   if (intel_engine_uses_guc(engine))
+   return -ENODEV;
+
if (!intel_engine_pm_get_if_awake(engine))
return 0;
 
@@ -1201,13 +1209,10 @@ int __intel_engine_reset_bh(struct intel_engine_cs 
*engine, const char *msg)
   "Resetting %s for %s\n", engine->name, msg);

atomic_inc(>i915->gpu_error.reset_engine_count[engine->uabi_class]);
 
-   if (intel_engine_uses_guc(engine))
-   ret = intel_guc_reset_engine(>gt->uc.guc, engine);
-   else
-   ret = intel_gt_reset_engine(engine);
+   ret = intel_gt_reset_engine(engine);
if (ret) {
/* If we fail here, we expect to fallback to a global reset */
-   ENGINE_TRACE(engine, "Failed to reset, err: %d\n", ret);
+   ENGINE_TRACE(engine, "Failed to reset %s, err: %d\n", 
engine->name, ret);
goto out;
}
 
@@ -1341,7 +1346,8 @@ void intel_gt_handle_error(struct intel_gt *gt,
 * Try engine reset when available. We fall back to full reset if
 * single reset fails.
 */
-   if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) {
+   if (!intel_uc_uses_guc_submission(>uc) &&
+   intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) {
local_bh_disable();

[PATCH 12/51] drm/i915/guc: Ensure request ordering via completion fences

2021-07-16 Thread Matthew Brost
If two requests are on the same ring, they are explicitly ordered by the
HW. So, a submission fence is sufficient to ensure ordering when using
the new GuC submission interface. Conversely, if two requests share a
timeline and are on the same physical engine but different context this
doesn't ensure ordering on the new GuC submission interface. So, a
completion fence needs to be used to ensure ordering.

Signed-off-by: John Harrison 
Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c |  1 -
 drivers/gpu/drm/i915/i915_request.c   | 12 ++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 9dc1a256e185..4443cc6f5320 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -933,7 +933,6 @@ static void guc_context_sched_disable(struct intel_context 
*ce)
 * a request before we set the 'context_pending_disable' flag here.
 */
if (unlikely(atomic_add_unless(>pin_count, -2, 2))) {
-   spin_unlock_irqrestore(>guc_state.lock, flags);
return;
}
guc_id = prep_context_pending_disable(ce);
diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index b48c4905d3fc..2b2b63cba06c 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -432,6 +432,7 @@ void i915_request_retire_upto(struct i915_request *rq)
 
do {
tmp = list_first_entry(>requests, typeof(*tmp), link);
+   GEM_BUG_ON(!i915_request_completed(tmp));
} while (i915_request_retire(tmp) && tmp != rq);
 }
 
@@ -1380,6 +1381,9 @@ i915_request_await_external(struct i915_request *rq, 
struct dma_fence *fence)
return err;
 }
 
+static int
+i915_request_await_request(struct i915_request *to, struct i915_request *from);
+
 int
 i915_request_await_execution(struct i915_request *rq,
 struct dma_fence *fence)
@@ -1465,7 +1469,8 @@ i915_request_await_request(struct i915_request *to, 
struct i915_request *from)
return ret;
}
 
-   if (is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask)))
+   if (!intel_engine_uses_guc(to->engine) &&
+   is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask)))
ret = await_request_submit(to, from);
else
ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
@@ -1626,6 +1631,8 @@ __i915_request_add_to_timeline(struct i915_request *rq)
prev = to_request(__i915_active_fence_set(>last_request,
  >fence));
if (prev && !__i915_request_is_complete(prev)) {
+   bool uses_guc = intel_engine_uses_guc(rq->engine);
+
/*
 * The requests are supposed to be kept in order. However,
 * we need to be wary in case the timeline->last_request
@@ -1636,7 +1643,8 @@ __i915_request_add_to_timeline(struct i915_request *rq)
   i915_seqno_passed(prev->fence.seqno,
 rq->fence.seqno));
 
-   if (is_power_of_2(READ_ONCE(prev->engine)->mask | 
rq->engine->mask))
+   if ((!uses_guc && is_power_of_2(READ_ONCE(prev->engine)->mask | 
rq->engine->mask)) ||
+   (uses_guc && prev->context == rq->context))
i915_sw_fence_await_sw_fence(>submit,
 >submit,
 >submitq);
-- 
2.28.0



[PATCH 30/51] drm/i915/guc: Handle context reset notification

2021-07-16 Thread Matthew Brost
GuC will issue a reset on detecting an engine hang and will notify
the driver via a G2H message. The driver will service the notification
by resetting the guilty context to a simple state or banning it
completely.

v2:
 (John Harrison)
  - Move msg[0] lookup after length check

Cc: Matthew Brost 
Cc: John Harrison 
Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|  2 ++
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |  3 ++
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 36 +++
 drivers/gpu/drm/i915/i915_trace.h | 10 ++
 4 files changed, 51 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index b3cfc52fe0bc..f23a3a618550 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -262,6 +262,8 @@ int intel_guc_deregister_done_process_msg(struct intel_guc 
*guc,
  const u32 *msg, u32 len);
 int intel_guc_sched_done_process_msg(struct intel_guc *guc,
 const u32 *msg, u32 len);
+int intel_guc_context_reset_process_msg(struct intel_guc *guc,
+   const u32 *msg, u32 len);
 
 void intel_guc_submission_reset_prepare(struct intel_guc *guc);
 void intel_guc_submission_reset(struct intel_guc *guc, bool stalled);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 503a78517610..c4f9b44b9f86 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -981,6 +981,9 @@ static int ct_process_request(struct intel_guc_ct *ct, 
struct ct_incoming_msg *r
case INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
ret = intel_guc_sched_done_process_msg(guc, payload, len);
break;
+   case INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION:
+   ret = intel_guc_context_reset_process_msg(guc, payload, len);
+   break;
default:
ret = -EOPNOTSUPP;
break;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index fdb17279095c..feaf1ca61eaa 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -2196,6 +2196,42 @@ int intel_guc_sched_done_process_msg(struct intel_guc 
*guc,
return 0;
 }
 
+static void guc_context_replay(struct intel_context *ce)
+{
+   struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
+
+   __guc_reset_context(ce, true);
+   tasklet_hi_schedule(_engine->tasklet);
+}
+
+static void guc_handle_context_reset(struct intel_guc *guc,
+struct intel_context *ce)
+{
+   trace_intel_context_reset(ce);
+   guc_context_replay(ce);
+}
+
+int intel_guc_context_reset_process_msg(struct intel_guc *guc,
+   const u32 *msg, u32 len)
+{
+   struct intel_context *ce;
+   int desc_idx;
+
+   if (unlikely(len != 1)) {
+   drm_dbg(_to_gt(guc)->i915->drm, "Invalid length %u", len);
+   return -EPROTO;
+   }
+
+   desc_idx = msg[0];
+   ce = g2h_context_lookup(guc, desc_idx);
+   if (unlikely(!ce))
+   return -EPROTO;
+
+   guc_handle_context_reset(guc, ce);
+
+   return 0;
+}
+
 void intel_guc_submission_print_info(struct intel_guc *guc,
 struct drm_printer *p)
 {
diff --git a/drivers/gpu/drm/i915/i915_trace.h 
b/drivers/gpu/drm/i915/i915_trace.h
index 97c2e83984ed..c095c4d39456 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -929,6 +929,11 @@ DECLARE_EVENT_CLASS(intel_context,
  __entry->guc_sched_state_no_lock)
 );
 
+DEFINE_EVENT(intel_context, intel_context_reset,
+TP_PROTO(struct intel_context *ce),
+TP_ARGS(ce)
+);
+
 DEFINE_EVENT(intel_context, intel_context_register,
 TP_PROTO(struct intel_context *ce),
 TP_ARGS(ce)
@@ -1026,6 +1031,11 @@ trace_i915_request_out(struct i915_request *rq)
 {
 }
 
+static inline void
+trace_intel_context_reset(struct intel_context *ce)
+{
+}
+
 static inline void
 trace_intel_context_register(struct intel_context *ce)
 {
-- 
2.28.0



[PATCH 06/51] drm/i915/guc: Implement GuC context operations for new inteface

2021-07-16 Thread Matthew Brost
Implement GuC context operations which includes GuC specific operations
alloc, pin, unpin, and destroy.

v2:
 (Daniel Vetter)
  - Use msleep_interruptible rather than cond_resched in busy loop
 (Michal)
  - Remove C++ style comment

Signed-off-by: John Harrison 
Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/intel_context.c   |   5 +
 drivers/gpu/drm/i915/gt/intel_context_types.h |  22 +-
 drivers/gpu/drm/i915/gt/intel_lrc_reg.h   |   1 -
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|  40 ++
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |   4 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 666 --
 drivers/gpu/drm/i915/i915_reg.h   |   1 +
 drivers/gpu/drm/i915/i915_request.c   |   1 +
 8 files changed, 685 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index bd63813c8a80..32fd6647154b 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -384,6 +384,11 @@ intel_context_init(struct intel_context *ce, struct 
intel_engine_cs *engine)
 
mutex_init(>pin_mutex);
 
+   spin_lock_init(>guc_state.lock);
+
+   ce->guc_id = GUC_INVALID_LRC_ID;
+   INIT_LIST_HEAD(>guc_id_link);
+
i915_active_init(>active,
 __intel_context_active, __intel_context_retire, 0);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 6d99631d19b9..606c480aec26 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -96,6 +96,7 @@ struct intel_context {
 #define CONTEXT_BANNED 6
 #define CONTEXT_FORCE_SINGLE_SUBMISSION7
 #define CONTEXT_NOPREEMPT  8
+#define CONTEXT_LRCA_DIRTY 9
 
struct {
u64 timeout_us;
@@ -138,14 +139,29 @@ struct intel_context {
 
u8 wa_bb_page; /* if set, page num reserved for context workarounds */
 
+   struct {
+   /** lock: protects everything in guc_state */
+   spinlock_t lock;
+   /**
+* sched_state: scheduling state of this context using GuC
+* submission
+*/
+   u8 sched_state;
+   } guc_state;
+
/* GuC scheduling state flags that do not require a lock. */
atomic_t guc_sched_state_no_lock;
 
+   /* GuC LRC descriptor ID */
+   u16 guc_id;
+
+   /* GuC LRC descriptor reference count */
+   atomic_t guc_id_ref;
+
/*
-* GuC LRC descriptor ID - Not assigned in this patch but future patches
-* in the series will.
+* GuC ID link - in list when unpinned but guc_id still valid in GuC
 */
-   u16 guc_id;
+   struct list_head guc_id_link;
 };
 
 #endif /* __INTEL_CONTEXT_TYPES__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h 
b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index 41e5350a7a05..49d4857ad9b7 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -87,7 +87,6 @@
 #define GEN11_CSB_WRITE_PTR_MASK   (GEN11_CSB_PTR_MASK << 0)
 
 #define MAX_CONTEXT_HW_ID  (1 << 21) /* exclusive */
-#define MAX_GUC_CONTEXT_HW_ID  (1 << 20) /* exclusive */
 #define GEN11_MAX_CONTEXT_HW_ID(1 << 11) /* exclusive */
 /* in Gen12 ID 0x7FF is reserved to indicate idle */
 #define GEN12_MAX_CONTEXT_HW_ID(GEN11_MAX_CONTEXT_HW_ID - 1)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 8c7b92f699f1..30773cd699f5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -7,6 +7,7 @@
 #define _INTEL_GUC_H_
 
 #include 
+#include 
 
 #include "intel_uncore.h"
 #include "intel_guc_fw.h"
@@ -44,6 +45,14 @@ struct intel_guc {
void (*disable)(struct intel_guc *guc);
} interrupts;
 
+   /*
+* contexts_lock protects the pool of free guc ids and a linked list of
+* guc ids available to be stolen
+*/
+   spinlock_t contexts_lock;
+   struct ida guc_ids;
+   struct list_head guc_id_list;
+
bool submission_selected;
 
struct i915_vma *ads_vma;
@@ -101,6 +110,34 @@ intel_guc_send_and_receive(struct intel_guc *guc, const 
u32 *action, u32 len,
 response_buf, response_buf_size, 0);
 }
 
+static inline int intel_guc_send_busy_loop(struct intel_guc* guc,
+  const u32 *action,
+  u32 len,
+  bool loop)
+{
+   int err;
+   unsigned int sleep_period_ms = 1;
+   bool not_atomic = !in_atomic() && !irqs_disabled();
+
+   /* No sleeping with spin locks, just busy loop */
+   might_sleep_if(loop && not_atomic);
+
+retry:
+   err = 

[PATCH 05/51] drm/i915/guc: Add bypass tasklet submission path to GuC

2021-07-16 Thread Matthew Brost
Add bypass tasklet submission path to GuC. The tasklet is only used if H2G
channel has backpresure.

Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 37 +++
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index ca0717166a27..53b4a5eb4a85 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -172,6 +172,12 @@ static int guc_add_request(struct intel_guc *guc, struct 
i915_request *rq)
return err;
 }
 
+static inline void guc_set_lrc_tail(struct i915_request *rq)
+{
+   rq->context->lrc_reg_state[CTX_RING_TAIL] =
+   intel_ring_set_tail(rq->ring, rq->tail);
+}
+
 static inline int rq_prio(const struct i915_request *rq)
 {
return rq->sched.attr.priority;
@@ -215,8 +221,7 @@ static int guc_dequeue_one_context(struct intel_guc *guc)
}
 done:
if (submit) {
-   last->context->lrc_reg_state[CTX_RING_TAIL] =
-   intel_ring_set_tail(last->ring, last->tail);
+   guc_set_lrc_tail(last);
 resubmit:
/*
 * We only check for -EBUSY here even though it is possible for
@@ -496,20 +501,36 @@ static inline void queue_request(struct i915_sched_engine 
*sched_engine,
set_bit(I915_FENCE_FLAG_PQUEUE, >fence.flags);
 }
 
+static int guc_bypass_tasklet_submit(struct intel_guc *guc,
+struct i915_request *rq)
+{
+   int ret;
+
+   __i915_request_submit(rq);
+
+   trace_i915_request_in(rq, 0);
+
+   guc_set_lrc_tail(rq);
+   ret = guc_add_request(guc, rq);
+   if (ret == -EBUSY)
+   guc->stalled_request = rq;
+
+   return ret;
+}
+
 static void guc_submit_request(struct i915_request *rq)
 {
struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
+   struct intel_guc *guc = >engine->gt->uc.guc;
unsigned long flags;
 
/* Will be called from irq-context when using foreign fences. */
spin_lock_irqsave(_engine->lock, flags);
 
-   queue_request(sched_engine, rq, rq_prio(rq));
-
-   GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine));
-   GEM_BUG_ON(list_empty(>sched.link));
-
-   tasklet_hi_schedule(_engine->tasklet);
+   if (guc->stalled_request || !i915_sched_engine_is_empty(sched_engine))
+   queue_request(sched_engine, rq, rq_prio(rq));
+   else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
+   tasklet_hi_schedule(_engine->tasklet);
 
spin_unlock_irqrestore(_engine->lock, flags);
 }
-- 
2.28.0



[PATCH 23/51] drm/i915/guc: Direct all breadcrumbs for a class to single breadcrumbs

2021-07-16 Thread Matthew Brost
With GuC virtual engines the physical engine which a request executes
and completes on isn't known to the i915. Therefore we can't attach a
request to a physical engines breadcrumbs. To work around this we create
a single breadcrumbs per engine class when using GuC submission and
direct all physical engine interrupts to this breadcrumbs.

v2:
 (John H)
  - Rework header file structure so intel_engine_mask_t can be in
intel_engine_types.h

Signed-off-by: Matthew Brost 
CC: John Harrison 
---
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c   | 41 +---
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.h   | 16 -
 .../gpu/drm/i915/gt/intel_breadcrumbs_types.h |  7 ++
 drivers/gpu/drm/i915/gt/intel_engine.h|  3 +
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 28 +++-
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  2 +-
 .../drm/i915/gt/intel_execlists_submission.c  |  2 +-
 drivers/gpu/drm/i915/gt/mock_engine.c |  4 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 67 +--
 9 files changed, 133 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c 
b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 38cc42783dfb..2007dc6f6b99 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -15,28 +15,14 @@
 #include "intel_gt_pm.h"
 #include "intel_gt_requests.h"
 
-static bool irq_enable(struct intel_engine_cs *engine)
+static bool irq_enable(struct intel_breadcrumbs *b)
 {
-   if (!engine->irq_enable)
-   return false;
-
-   /* Caller disables interrupts */
-   spin_lock(>gt->irq_lock);
-   engine->irq_enable(engine);
-   spin_unlock(>gt->irq_lock);
-
-   return true;
+   return intel_engine_irq_enable(b->irq_engine);
 }
 
-static void irq_disable(struct intel_engine_cs *engine)
+static void irq_disable(struct intel_breadcrumbs *b)
 {
-   if (!engine->irq_disable)
-   return;
-
-   /* Caller disables interrupts */
-   spin_lock(>gt->irq_lock);
-   engine->irq_disable(engine);
-   spin_unlock(>gt->irq_lock);
+   intel_engine_irq_disable(b->irq_engine);
 }
 
 static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
@@ -57,7 +43,7 @@ static void __intel_breadcrumbs_arm_irq(struct 
intel_breadcrumbs *b)
WRITE_ONCE(b->irq_armed, true);
 
/* Requests may have completed before we could enable the interrupt. */
-   if (!b->irq_enabled++ && irq_enable(b->irq_engine))
+   if (!b->irq_enabled++ && b->irq_enable(b))
irq_work_queue(>irq_work);
 }
 
@@ -76,7 +62,7 @@ static void __intel_breadcrumbs_disarm_irq(struct 
intel_breadcrumbs *b)
 {
GEM_BUG_ON(!b->irq_enabled);
if (!--b->irq_enabled)
-   irq_disable(b->irq_engine);
+   b->irq_disable(b);
 
WRITE_ONCE(b->irq_armed, false);
intel_gt_pm_put_async(b->irq_engine->gt);
@@ -281,7 +267,7 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
if (!b)
return NULL;
 
-   b->irq_engine = irq_engine;
+   kref_init(>ref);
 
spin_lock_init(>signalers_lock);
INIT_LIST_HEAD(>signalers);
@@ -290,6 +276,10 @@ intel_breadcrumbs_create(struct intel_engine_cs 
*irq_engine)
spin_lock_init(>irq_lock);
init_irq_work(>irq_work, signal_irq_work);
 
+   b->irq_engine = irq_engine;
+   b->irq_enable = irq_enable;
+   b->irq_disable = irq_disable;
+
return b;
 }
 
@@ -303,9 +293,9 @@ void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
spin_lock_irqsave(>irq_lock, flags);
 
if (b->irq_enabled)
-   irq_enable(b->irq_engine);
+   b->irq_enable(b);
else
-   irq_disable(b->irq_engine);
+   b->irq_disable(b);
 
spin_unlock_irqrestore(>irq_lock, flags);
 }
@@ -325,11 +315,14 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
}
 }
 
-void intel_breadcrumbs_free(struct intel_breadcrumbs *b)
+void intel_breadcrumbs_free(struct kref *kref)
 {
+   struct intel_breadcrumbs *b = container_of(kref, typeof(*b), ref);
+
irq_work_sync(>irq_work);
GEM_BUG_ON(!list_empty(>signalers));
GEM_BUG_ON(b->irq_armed);
+
kfree(b);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h 
b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h
index 3ce5ce270b04..be0d4f379a85 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h
@@ -9,7 +9,7 @@
 #include 
 #include 
 
-#include "intel_engine_types.h"
+#include "intel_breadcrumbs_types.h"
 
 struct drm_printer;
 struct i915_request;
@@ -17,7 +17,7 @@ struct intel_breadcrumbs;
 
 struct intel_breadcrumbs *
 intel_breadcrumbs_create(struct intel_engine_cs *irq_engine);
-void intel_breadcrumbs_free(struct intel_breadcrumbs *b);
+void intel_breadcrumbs_free(struct kref *kref);
 
 void 

[PATCH 04/51] drm/i915/guc: Implement GuC submission tasklet

2021-07-16 Thread Matthew Brost
Implement GuC submission tasklet for new interface. The new GuC
interface uses H2G to submit contexts to the GuC. Since H2G use a single
channel, a single tasklet submits is used for the submission path.

Also the per engine interrupt handler has been updated to disable the
rescheduling of the physical engine tasklet, when using GuC scheduling,
as the physical engine tasklet is no longer used.

In this patch the field, guc_id, has been added to intel_context and is
not assigned. Patches later in the series will assign this value.

v2:
 (John Harrison)
  - Clean up some comments

Cc: John Harrison 
Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/intel_context_types.h |   9 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|   4 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 231 +-
 3 files changed, 127 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 90026c177105..6d99631d19b9 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -137,6 +137,15 @@ struct intel_context {
struct intel_sseu sseu;
 
u8 wa_bb_page; /* if set, page num reserved for context workarounds */
+
+   /* GuC scheduling state flags that do not require a lock. */
+   atomic_t guc_sched_state_no_lock;
+
+   /*
+* GuC LRC descriptor ID - Not assigned in this patch but future patches
+* in the series will.
+*/
+   u16 guc_id;
 };
 
 #endif /* __INTEL_CONTEXT_TYPES__ */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 35783558d261..8c7b92f699f1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -30,6 +30,10 @@ struct intel_guc {
struct intel_guc_log log;
struct intel_guc_ct ct;
 
+   /* Global engine used to submit requests to GuC */
+   struct i915_sched_engine *sched_engine;
+   struct i915_request *stalled_request;
+
/* intel_guc_recv interrupt related state */
spinlock_t irq_lock;
unsigned int msg_enabled_mask;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 23a94a896a0b..ca0717166a27 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -60,6 +60,31 @@
 
 #define GUC_REQUEST_SIZE 64 /* bytes */
 
+/*
+ * Below is a set of functions which control the GuC scheduling state which do
+ * not require a lock as all state transitions are mutually exclusive. i.e. It
+ * is not possible for the context pinning code and submission, for the same
+ * context, to be executing simultaneously. We still need an atomic as it is
+ * possible for some of the bits to changing at the same time though.
+ */
+#define SCHED_STATE_NO_LOCK_ENABLEDBIT(0)
+static inline bool context_enabled(struct intel_context *ce)
+{
+   return (atomic_read(>guc_sched_state_no_lock) &
+   SCHED_STATE_NO_LOCK_ENABLED);
+}
+
+static inline void set_context_enabled(struct intel_context *ce)
+{
+   atomic_or(SCHED_STATE_NO_LOCK_ENABLED, >guc_sched_state_no_lock);
+}
+
+static inline void clr_context_enabled(struct intel_context *ce)
+{
+   atomic_and((u32)~SCHED_STATE_NO_LOCK_ENABLED,
+  >guc_sched_state_no_lock);
+}
+
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
return rb_entry(rb, struct i915_priolist, node);
@@ -122,37 +147,29 @@ static inline void set_lrc_desc_registered(struct 
intel_guc *guc, u32 id,
xa_store_irq(>context_lookup, id, ce, GFP_ATOMIC);
 }
 
-static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
+static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 {
-   /* Leaving stub as this function will be used in future patches */
-}
+   int err;
+   struct intel_context *ce = rq->context;
+   u32 action[3];
+   int len = 0;
+   bool enabled = context_enabled(ce);
 
-/*
- * When we're doing submissions using regular execlists backend, writing to
- * ELSP from CPU side is enough to make sure that writes to ringbuffer pages
- * pinned in mappable aperture portion of GGTT are visible to command streamer.
- * Writes done by GuC on our behalf are not guaranteeing such ordering,
- * therefore, to ensure the flush, we're issuing a POSTING READ.
- */
-static void flush_ggtt_writes(struct i915_vma *vma)
-{
-   if (i915_vma_is_map_and_fenceable(vma))
-   intel_uncore_posting_read_fw(vma->vm->gt->uncore,
-GUC_STATUS);
-}
+   if (!enabled) {
+   action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
+   action[len++] = ce->guc_id;
+   action[len++] = GUC_CONTEXT_ENABLE;
+   } else {
+

[PATCH 18/51] drm/i915: Add intel_context tracing

2021-07-16 Thread Matthew Brost
Add intel_context tracing. These trace points are particular helpful
when debugging the GuC firmware and can be enabled via
CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS kernel config option.

Cc: John Harrison 
Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/intel_context.c   |   6 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  14 ++
 drivers/gpu/drm/i915/i915_trace.h | 144 ++
 3 files changed, 164 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 91349d071e0e..251ff7eea22d 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -8,6 +8,7 @@
 
 #include "i915_drv.h"
 #include "i915_globals.h"
+#include "i915_trace.h"
 
 #include "intel_context.h"
 #include "intel_engine.h"
@@ -28,6 +29,7 @@ static void rcu_context_free(struct rcu_head *rcu)
 {
struct intel_context *ce = container_of(rcu, typeof(*ce), rcu);
 
+   trace_intel_context_free(ce);
kmem_cache_free(global.slab_ce, ce);
 }
 
@@ -46,6 +48,7 @@ intel_context_create(struct intel_engine_cs *engine)
return ERR_PTR(-ENOMEM);
 
intel_context_init(ce, engine);
+   trace_intel_context_create(ce);
return ce;
 }
 
@@ -268,6 +271,8 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
 
GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
 
+   trace_intel_context_do_pin(ce);
+
 err_unlock:
mutex_unlock(>pin_mutex);
 err_post_unpin:
@@ -323,6 +328,7 @@ void __intel_context_do_unpin(struct intel_context *ce, int 
sub)
 */
intel_context_get(ce);
intel_context_active_release(ce);
+   trace_intel_context_do_unpin(ce);
intel_context_put(ce);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 480fb2184ecf..05958260e849 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -343,6 +343,7 @@ static int guc_add_request(struct intel_guc *guc, struct 
i915_request *rq)
 
err = intel_guc_send_nb(guc, action, len, g2h_len_dw);
if (!enabled && !err) {
+   trace_intel_context_sched_enable(ce);
atomic_inc(>outstanding_submission_g2h);
set_context_enabled(ce);
} else if (!enabled) {
@@ -808,6 +809,8 @@ static int register_context(struct intel_context *ce)
u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) +
ce->guc_id * sizeof(struct guc_lrc_desc);
 
+   trace_intel_context_register(ce);
+
return __guc_action_register_context(guc, ce->guc_id, offset);
 }
 
@@ -828,6 +831,8 @@ static int deregister_context(struct intel_context *ce, u32 
guc_id)
 {
struct intel_guc *guc = ce_to_guc(ce);
 
+   trace_intel_context_deregister(ce);
+
return __guc_action_deregister_context(guc, guc_id);
 }
 
@@ -902,6 +907,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce)
 * GuC before registering this context.
 */
if (context_registered) {
+   trace_intel_context_steal_guc_id(ce);
set_context_wait_for_deregister_to_register(ce);
intel_context_get(ce);
 
@@ -960,6 +966,7 @@ static void __guc_context_sched_disable(struct intel_guc 
*guc,
 
GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID);
 
+   trace_intel_context_sched_disable(ce);
intel_context_get(ce);
 
guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
@@ -1121,6 +1128,9 @@ static void __guc_signal_context_fence(struct 
intel_context *ce)
 
lockdep_assert_held(>guc_state.lock);
 
+   if (!list_empty(>guc_state.fences))
+   trace_intel_context_fence_release(ce);
+
list_for_each_entry(rq, >guc_state.fences, guc_fence_link)
i915_sw_fence_complete(>submit);
 
@@ -1531,6 +1541,8 @@ int intel_guc_deregister_done_process_msg(struct 
intel_guc *guc,
if (unlikely(!ce))
return -EPROTO;
 
+   trace_intel_context_deregister_done(ce);
+
if (context_wait_for_deregister_to_register(ce)) {
struct intel_runtime_pm *runtime_pm =
>engine->gt->i915->runtime_pm;
@@ -1582,6 +1594,8 @@ int intel_guc_sched_done_process_msg(struct intel_guc 
*guc,
return -EPROTO;
}
 
+   trace_intel_context_sched_done(ce);
+
if (context_pending_enable(ce)) {
clr_context_pending_enable(ce);
} else if (context_pending_disable(ce)) {
diff --git a/drivers/gpu/drm/i915/i915_trace.h 
b/drivers/gpu/drm/i915/i915_trace.h
index ea41d069bf7d..97c2e83984ed 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -905,6 +905,90 @@ TRACE_EVENT(i915_request_out,
  __entry->ctx, 

[PATCH 16/51] drm/i915/guc: Update GuC debugfs to support new GuC

2021-07-16 Thread Matthew Brost
Update GuC debugfs to support the new GuC structures.

v2:
 (John Harrison)
  - Remove intel_lrc_reg.h include from i915_debugfs.c
 (Michal)
  - Rename GuC debugfs functions

Signed-off-by: John Harrison 
Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 22 
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  3 +
 .../gpu/drm/i915/gt/uc/intel_guc_debugfs.c| 23 +++-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 55 +++
 .../gpu/drm/i915/gt/uc/intel_guc_submission.h |  5 ++
 5 files changed, 107 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index f1cbed6b9f0a..503a78517610 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -1171,3 +1171,25 @@ void intel_guc_ct_event_handler(struct intel_guc_ct *ct)
 
ct_try_receive_message(ct);
 }
+
+void intel_guc_ct_print_info(struct intel_guc_ct *ct,
+struct drm_printer *p)
+{
+   drm_printf(p, "CT %s\n", enableddisabled(ct->enabled));
+
+   if (!ct->enabled)
+   return;
+
+   drm_printf(p, "H2G Space: %u\n",
+  atomic_read(>ctbs.send.space) * 4);
+   drm_printf(p, "Head: %u\n",
+  ct->ctbs.send.desc->head);
+   drm_printf(p, "Tail: %u\n",
+  ct->ctbs.send.desc->tail);
+   drm_printf(p, "G2H Space: %u\n",
+  atomic_read(>ctbs.recv.space) * 4);
+   drm_printf(p, "Head: %u\n",
+  ct->ctbs.recv.desc->head);
+   drm_printf(p, "Tail: %u\n",
+  ct->ctbs.recv.desc->tail);
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
index 4b30a562ae63..7b34026d264a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
@@ -16,6 +16,7 @@
 
 struct i915_vma;
 struct intel_guc;
+struct drm_printer;
 
 /**
  * DOC: Command Transport (CT).
@@ -112,4 +113,6 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 
*action, u32 len,
  u32 *response_buf, u32 response_buf_size, u32 flags);
 void intel_guc_ct_event_handler(struct intel_guc_ct *ct);
 
+void intel_guc_ct_print_info(struct intel_guc_ct *ct, struct drm_printer *p);
+
 #endif /* _INTEL_GUC_CT_H_ */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
index fe7cb7b29a1e..7a454c91a736 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
@@ -9,6 +9,8 @@
 #include "intel_guc.h"
 #include "intel_guc_debugfs.h"
 #include "intel_guc_log_debugfs.h"
+#include "gt/uc/intel_guc_ct.h"
+#include "gt/uc/intel_guc_submission.h"
 
 static int guc_info_show(struct seq_file *m, void *data)
 {
@@ -22,16 +24,35 @@ static int guc_info_show(struct seq_file *m, void *data)
drm_puts(, "\n");
intel_guc_log_info(>log, );
 
-   /* Add more as required ... */
+   if (!intel_guc_submission_is_used(guc))
+   return 0;
+
+   intel_guc_ct_print_info(>ct, );
+   intel_guc_submission_print_info(guc, );
 
return 0;
 }
 DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_info);
 
+static int guc_registered_contexts_show(struct seq_file *m, void *data)
+{
+   struct intel_guc *guc = m->private;
+   struct drm_printer p = drm_seq_file_printer(m);
+
+   if (!intel_guc_submission_is_used(guc))
+   return -ENODEV;
+
+   intel_guc_submission_print_context_info(guc, );
+
+   return 0;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_registered_contexts);
+
 void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root)
 {
static const struct debugfs_gt_file files[] = {
{ "guc_info", _info_fops, NULL },
+   { "guc_registered_contexts", _registered_contexts_fops, 
NULL },
};
 
if (!intel_guc_is_supported(guc))
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 088d11e2e497..a2af7e17dcc2 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1602,3 +1602,58 @@ int intel_guc_sched_done_process_msg(struct intel_guc 
*guc,
 
return 0;
 }
+
+void intel_guc_submission_print_info(struct intel_guc *guc,
+struct drm_printer *p)
+{
+   struct i915_sched_engine *sched_engine = guc->sched_engine;
+   struct rb_node *rb;
+   unsigned long flags;
+
+   if (!sched_engine)
+   return;
+
+   drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n",
+  atomic_read(>outstanding_submission_g2h));
+   drm_printf(p, "GuC tasklet count: %u\n\n",
+  atomic_read(_engine->tasklet.count));
+
+   spin_lock_irqsave(_engine->lock, 

[PATCH 15/51] drm/i915/guc: Update intel_gt_wait_for_idle to work with GuC

2021-07-16 Thread Matthew Brost
When running the GuC the GPU can't be considered idle if the GuC still
has contexts pinned. As such, a call has been added in
intel_gt_wait_for_idle to idle the UC and in turn the GuC by waiting for
the number of unpinned contexts to go to zero.

v2: rtimeout -> remaining_timeout
v3: Drop unnecessary includes, guc_submission_busy_loop ->
guc_submission_send_busy_loop, drop negatie timeout trick, move a
refactor of guc_context_unpin to earlier path (John H)

Cc: John Harrison 
Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gt/intel_gt.c| 19 +
 drivers/gpu/drm/i915/gt/intel_gt.h|  2 +
 drivers/gpu/drm/i915/gt/intel_gt_requests.c   | 21 ++---
 drivers/gpu/drm/i915/gt/intel_gt_requests.h   |  7 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|  4 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |  1 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  4 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 85 +--
 drivers/gpu/drm/i915/gt/uc/intel_uc.h |  5 ++
 drivers/gpu/drm/i915/i915_gem_evict.c |  1 +
 .../gpu/drm/i915/selftests/igt_live_test.c|  2 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  3 +-
 13 files changed, 129 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index a90f796e85c0..6fffd4d377c2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -645,7 +645,8 @@ mmap_offset_attach(struct drm_i915_gem_object *obj,
goto insert;
 
/* Attempt to reap some mmap space from dead objects */
-   err = intel_gt_retire_requests_timeout(>gt, MAX_SCHEDULE_TIMEOUT);
+   err = intel_gt_retire_requests_timeout(>gt, MAX_SCHEDULE_TIMEOUT,
+  NULL);
if (err)
goto err;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index e714e21c0a4d..acfdd53b2678 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -585,6 +585,25 @@ static void __intel_gt_disable(struct intel_gt *gt)
GEM_BUG_ON(intel_gt_pm_is_awake(gt));
 }
 
+int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
+{
+   long remaining_timeout;
+
+   /* If the device is asleep, we have no requests outstanding */
+   if (!intel_gt_pm_is_awake(gt))
+   return 0;
+
+   while ((timeout = intel_gt_retire_requests_timeout(gt, timeout,
+  _timeout)) 
> 0) {
+   cond_resched();
+   if (signal_pending(current))
+   return -EINTR;
+   }
+
+   return timeout ? timeout : intel_uc_wait_for_idle(>uc,
+ remaining_timeout);
+}
+
 int intel_gt_init(struct intel_gt *gt)
 {
int err;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h 
b/drivers/gpu/drm/i915/gt/intel_gt.h
index e7aabe0cc5bf..74e771871a9b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -48,6 +48,8 @@ void intel_gt_driver_release(struct intel_gt *gt);
 
 void intel_gt_driver_late_release(struct intel_gt *gt);
 
+int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
+
 void intel_gt_check_and_clear_faults(struct intel_gt *gt);
 void intel_gt_clear_error_registers(struct intel_gt *gt,
intel_engine_mask_t engine_mask);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c 
b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index 647eca9d867a..edb881d75630 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -130,7 +130,8 @@ void intel_engine_fini_retire(struct intel_engine_cs 
*engine)
GEM_BUG_ON(engine->retire);
 }
 
-long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
+long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout,
+ long *remaining_timeout)
 {
struct intel_gt_timelines *timelines = >timelines;
struct intel_timeline *tl, *tn;
@@ -195,22 +196,10 @@ out_active:   spin_lock(>lock);
if (flush_submission(gt, timeout)) /* Wait, there's more! */
active_count++;
 
-   return active_count ? timeout : 0;
-}
-
-int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
-{
-   /* If the device is asleep, we have no requests outstanding */
-   if (!intel_gt_pm_is_awake(gt))
-   return 0;
-
-   while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) {
-   cond_resched();
-   if (signal_pending(current))
-   return -EINTR;
-   }
+   if (remaining_timeout)
+   *remaining_timeout = timeout;
 
-   return timeout;

[PATCH 11/51] drm/i915: Disable preempt busywait when using GuC scheduling

2021-07-16 Thread Matthew Brost
Disable preempt busywait when using GuC scheduling. This isn't needed as
the GuC controls preemption when scheduling.

v2:
 (John H):
  - Fix commit message

Cc: John Harrison 
Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c 
b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 87b06572fd2e..f7aae502ec3d 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -506,7 +506,8 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 
*cs)
*cs++ = MI_USER_INTERRUPT;
 
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-   if (intel_engine_has_semaphores(rq->engine))
+   if (intel_engine_has_semaphores(rq->engine) &&
+   !intel_uc_uses_guc_submission(>engine->gt->uc))
cs = emit_preempt_busywait(rq, cs);
 
rq->tail = intel_ring_offset(rq, cs);
@@ -598,7 +599,8 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, 
u32 *cs)
*cs++ = MI_USER_INTERRUPT;
 
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-   if (intel_engine_has_semaphores(rq->engine))
+   if (intel_engine_has_semaphores(rq->engine) &&
+   !intel_uc_uses_guc_submission(>engine->gt->uc))
cs = gen12_emit_preempt_busywait(rq, cs);
 
rq->tail = intel_ring_offset(rq, cs);
-- 
2.28.0



[PATCH 13/51] drm/i915/guc: Disable semaphores when using GuC scheduling

2021-07-16 Thread Matthew Brost
Semaphores are an optimization and not required for basic GuC submission
to work properly. Disable until we have time to do the implementation to
enable semaphores and tune them for performance. Also long direction is
just to delete semaphores from the i915 so another reason to not enable
these for GuC submission.

This patch fixes an existing bug where I915_ENGINE_HAS_SEMAPHORES was
not honored correctly.

v2: Reword commit message
v3:
 (John H)
  - Add text to commit indicating this also fixing an existing bug

Cc: John Harrison 
Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 7d6f52d8a801..64659802d4df 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -799,7 +799,8 @@ static int intel_context_set_gem(struct intel_context *ce,
}
 
if (ctx->sched.priority >= I915_PRIORITY_NORMAL &&
-   intel_engine_has_timeslices(ce->engine))
+   intel_engine_has_timeslices(ce->engine) &&
+   intel_engine_has_semaphores(ce->engine))
__set_bit(CONTEXT_USE_SEMAPHORES, >flags);
 
if (IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) &&
@@ -1778,7 +1779,8 @@ static void __apply_priority(struct intel_context *ce, 
void *arg)
if (!intel_engine_has_timeslices(ce->engine))
return;
 
-   if (ctx->sched.priority >= I915_PRIORITY_NORMAL)
+   if (ctx->sched.priority >= I915_PRIORITY_NORMAL &&
+   intel_engine_has_semaphores(ce->engine))
intel_context_set_use_semaphores(ce);
else
intel_context_clear_use_semaphores(ce);
-- 
2.28.0



[PATCH 01/51] drm/i915/guc: Add new GuC interface defines and structures

2021-07-16 Thread Matthew Brost
Add new GuC interface defines and structures while maintaining old ones
in parallel.

Cc: John Harrison 
Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  | 14 +++
 drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   | 41 +++
 2 files changed, 55 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index 2d6198e63ebe..57e18babdf4b 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -124,10 +124,24 @@ enum intel_guc_action {
INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302,
INTEL_GUC_ACTION_ENTER_S_STATE = 0x501,
INTEL_GUC_ACTION_EXIT_S_STATE = 0x502,
+   INTEL_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506,
+   INTEL_GUC_ACTION_SCHED_CONTEXT = 0x1000,
+   INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001,
+   INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002,
+   INTEL_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003,
+   INTEL_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004,
+   INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY = 0x1005,
+   INTEL_GUC_ACTION_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006,
+   INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007,
+   INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008,
+   INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009,
INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003,
INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
+   INTEL_GUC_ACTION_REGISTER_CONTEXT = 0x4502,
+   INTEL_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503,
INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
+   INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
INTEL_GUC_ACTION_LIMIT
 };
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 617ec601648d..28245a217a39 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -17,6 +17,9 @@
 #include "abi/guc_communication_ctb_abi.h"
 #include "abi/guc_messages_abi.h"
 
+#define GUC_CONTEXT_DISABLE0
+#define GUC_CONTEXT_ENABLE 1
+
 #define GUC_CLIENT_PRIORITY_KMD_HIGH   0
 #define GUC_CLIENT_PRIORITY_HIGH   1
 #define GUC_CLIENT_PRIORITY_KMD_NORMAL 2
@@ -26,6 +29,9 @@
 #define GUC_MAX_STAGE_DESCRIPTORS  1024
 #defineGUC_INVALID_STAGE_IDGUC_MAX_STAGE_DESCRIPTORS
 
+#define GUC_MAX_LRC_DESCRIPTORS65535
+#defineGUC_INVALID_LRC_ID  GUC_MAX_LRC_DESCRIPTORS
+
 #define GUC_RENDER_ENGINE  0
 #define GUC_VIDEO_ENGINE   1
 #define GUC_BLITTER_ENGINE 2
@@ -237,6 +243,41 @@ struct guc_stage_desc {
u64 desc_private;
 } __packed;
 
+#define CONTEXT_REGISTRATION_FLAG_KMD  BIT(0)
+
+#define CONTEXT_POLICY_DEFAULT_EXECUTION_QUANTUM_US 100
+#define CONTEXT_POLICY_DEFAULT_PREEMPTION_TIME_US 50
+
+/* Preempt to idle on quantum expiry */
+#define CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLEBIT(0)
+
+/*
+ * GuC Context registration descriptor.
+ * FIXME: This is only required to exist during context registration.
+ * The current 1:1 between guc_lrc_desc and LRCs for the lifetime of the LRC
+ * is not required.
+ */
+struct guc_lrc_desc {
+   u32 hw_context_desc;
+   u32 slpm_perf_mode_hint;/* SPLC v1 only */
+   u32 slpm_freq_hint;
+   u32 engine_submit_mask; /* In logical space */
+   u8 engine_class;
+   u8 reserved0[3];
+   u32 priority;
+   u32 process_desc;
+   u32 wq_addr;
+   u32 wq_size;
+   u32 context_flags;  /* CONTEXT_REGISTRATION_* */
+   /* Time for one workload to execute. (in micro seconds) */
+   u32 execution_quantum;
+   /* Time to wait for a preemption request to complete before issuing a
+* reset. (in micro seconds). */
+   u32 preemption_timeout;
+   u32 policy_flags;   /* CONTEXT_POLICY_* */
+   u32 reserved1[19];
+} __packed;
+
 #define GUC_POWER_UNSPECIFIED  0
 #define GUC_POWER_D0   1
 #define GUC_POWER_D1   2
-- 
2.28.0



[PATCH 03/51] drm/i915/guc: Add LRC descriptor context lookup array

2021-07-16 Thread Matthew Brost
Add LRC descriptor context lookup array which can resolve the
intel_context from the LRC descriptor index. In addition to lookup, it
can determine if the LRC descriptor context is currently registered with
the GuC by checking if an entry for a descriptor index is present.
Future patches in the series will make use of this array.

v2:
 (Michal)
  - "linux/xarray.h" -> 
  - s/lrc/LRC
 (John H)
  - Fix commit message

Cc: John Harrison 
Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|  5 +++
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 32 +--
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 2625d2d5959f..35783558d261 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -6,6 +6,8 @@
 #ifndef _INTEL_GUC_H_
 #define _INTEL_GUC_H_
 
+#include 
+
 #include "intel_uncore.h"
 #include "intel_guc_fw.h"
 #include "intel_guc_fwif.h"
@@ -46,6 +48,9 @@ struct intel_guc {
struct i915_vma *lrc_desc_pool;
void *lrc_desc_pool_vaddr;
 
+   /* guc_id to intel_context lookup */
+   struct xarray context_lookup;
+
/* Control params for fw initialization */
u32 params[GUC_CTL_MAX_DWORDS];
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index a366890fb840..23a94a896a0b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -65,8 +65,6 @@ static inline struct i915_priolist *to_priolist(struct 
rb_node *rb)
return rb_entry(rb, struct i915_priolist, node);
 }
 
-/* Future patches will use this function */
-__attribute__ ((unused))
 static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc *guc, u32 index)
 {
struct guc_lrc_desc *base = guc->lrc_desc_pool_vaddr;
@@ -76,6 +74,15 @@ static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc 
*guc, u32 index)
return [index];
 }
 
+static inline struct intel_context *__get_context(struct intel_guc *guc, u32 
id)
+{
+   struct intel_context *ce = xa_load(>context_lookup, id);
+
+   GEM_BUG_ON(id >= GUC_MAX_LRC_DESCRIPTORS);
+
+   return ce;
+}
+
 static int guc_lrc_desc_pool_create(struct intel_guc *guc)
 {
u32 size;
@@ -96,6 +103,25 @@ static void guc_lrc_desc_pool_destroy(struct intel_guc *guc)
i915_vma_unpin_and_release(>lrc_desc_pool, I915_VMA_RELEASE_MAP);
 }
 
+static inline void reset_lrc_desc(struct intel_guc *guc, u32 id)
+{
+   struct guc_lrc_desc *desc = __get_lrc_desc(guc, id);
+
+   memset(desc, 0, sizeof(*desc));
+   xa_erase_irq(>context_lookup, id);
+}
+
+static inline bool lrc_desc_registered(struct intel_guc *guc, u32 id)
+{
+   return __get_context(guc, id);
+}
+
+static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id,
+  struct intel_context *ce)
+{
+   xa_store_irq(>context_lookup, id, ce, GFP_ATOMIC);
+}
+
 static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 {
/* Leaving stub as this function will be used in future patches */
@@ -400,6 +426,8 @@ int intel_guc_submission_init(struct intel_guc *guc)
 */
GEM_BUG_ON(!guc->lrc_desc_pool);
 
+   xa_init_flags(>context_lookup, XA_FLAGS_LOCK_IRQ);
+
return 0;
 }
 
-- 
2.28.0



[PATCH 00/51] GuC submission support

2021-07-16 Thread Matthew Brost
As discussed in [1], [2] we are enabling GuC submission support in the
i915. This is a subset of the patches in step 5 described in [1],
basically it is absolute to enable CI with GuC submission on gen11+
platforms.

This series itself will likely be broken down into smaller patch sets to
merge. Likely into CTBs changes, basic submission, virtual engines, and
resets.

A following series will address the missing patches remaining from [1].

Locally tested on TGL machine and basic tests seem to be passing.

v2: Address all review comments in [3], include several more patches to
make CI [4] happy.

Signed-off-by: Matthew Brost 

[1] https://patchwork.freedesktop.org/series/89844/
[2] https://patchwork.freedesktop.org/series/91417/
[3] https://patchwork.freedesktop.org/series/91840/
[4] https://patchwork.freedesktop.org/series/91885/

Signed-off-by: Matthew Brost 

Daniele Ceraolo Spurio (1):
  drm/i915/guc: Unblock GuC submission on Gen11+

John Harrison (12):
  drm/i915: Track 'serial' counts for virtual engines
  drm/i915/guc: Provide mmio list to be saved/restored on engine reset
  drm/i915/guc: Don't complain about reset races
  drm/i915/guc: Enable GuC engine reset
  drm/i915/guc: Fix for error capture after full GPU reset with GuC
  drm/i915/guc: Hook GuC scheduling policies up
  drm/i915/guc: Connect reset modparam updates to GuC policy flags
  drm/i915/guc: Include scheduling policies in the debugfs state dump
  drm/i915/guc: Add golden context to GuC ADS
  drm/i915/selftest: Better error reporting from hangcheck selftest
  drm/i915/selftest: Fix hangcheck self test for GuC submission
  drm/i915/selftest: Bump selftest timeouts for hangcheck

Matthew Brost (36):
  drm/i915/guc: Add new GuC interface defines and structures
  drm/i915/guc: Remove GuC stage descriptor, add LRC descriptor
  drm/i915/guc: Add LRC descriptor context lookup array
  drm/i915/guc: Implement GuC submission tasklet
  drm/i915/guc: Add bypass tasklet submission path to GuC
  drm/i915/guc: Implement GuC context operations for new inteface
  drm/i915/guc: Insert fence on context when deregistering
  drm/i915/guc: Defer context unpin until scheduling is disabled
  drm/i915/guc: Disable engine barriers with GuC during unpin
  drm/i915/guc: Extend deregistration fence to schedule disable
  drm/i915: Disable preempt busywait when using GuC scheduling
  drm/i915/guc: Ensure request ordering via completion fences
  drm/i915/guc: Disable semaphores when using GuC scheduling
  drm/i915/guc: Ensure G2H response has space in buffer
  drm/i915/guc: Update intel_gt_wait_for_idle to work with GuC
  drm/i915/guc: Update GuC debugfs to support new GuC
  drm/i915/guc: Add several request trace points
  drm/i915: Add intel_context tracing
  drm/i915/guc: GuC virtual engines
  drm/i915: Hold reference to intel_context over life of i915_request
  drm/i915/guc: Disable bonding extension with GuC submission
  drm/i915/guc: Direct all breadcrumbs for a class to single breadcrumbs
  drm/i915: Add i915_sched_engine destroy vfunc
  drm/i915: Move active request tracking to a vfunc
  drm/i915/guc: Reset implementation for new GuC interface
  drm/i915: Reset GPU immediately if submission is disabled
  drm/i915/guc: Add disable interrupts to guc sanitize
  drm/i915/guc: Suspend/resume implementation for new interface
  drm/i915/guc: Handle context reset notification
  drm/i915/guc: Handle engine reset failure notification
  drm/i915/guc: Enable the timer expired interrupt for GuC
  drm/i915/guc: Capture error state on context reset
  drm/i915/guc: Implement banned contexts for GuC submission
  drm/i915/guc: Support request cancellation
  drm/i915/selftest: Increase some timeouts in live_requests
  drm/i915/guc: Implement GuC priority management

Rahul Kumar Singh (2):
  drm/i915/selftest: Fix workarounds selftest for GuC submission
  drm/i915/selftest: Fix MOCS selftest for GuC submission

 drivers/gpu/drm/i915/Makefile |1 +
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |   21 +-
 drivers/gpu/drm/i915/gem/i915_gem_context.h   |1 +
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |3 +-
 drivers/gpu/drm/i915/gt/gen8_engine_cs.c  |6 +-
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c   |   44 +-
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.h   |   16 +-
 .../gpu/drm/i915/gt/intel_breadcrumbs_types.h |7 +
 drivers/gpu/drm/i915/gt/intel_context.c   |   50 +-
 drivers/gpu/drm/i915/gt/intel_context.h   |   50 +-
 drivers/gpu/drm/i915/gt/intel_context_types.h |   63 +-
 drivers/gpu/drm/i915/gt/intel_engine.h|   54 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c |  182 +-
 .../gpu/drm/i915/gt/intel_engine_heartbeat.c  |   71 +-
 .../gpu/drm/i915/gt/intel_engine_heartbeat.h  |4 +
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   13 +-
 drivers/gpu/drm/i915/gt/intel_engine_user.c   |4 +
 .../drm/i915/gt/intel_execlists_submission.c  |   95 +-
 .../drm/i915/gt/intel_execlists_submission.h  |4 -
 

Re: [PATCH 5/7] drm/i915/gem/ttm: Respect the objection region in placement_from_obj

2021-07-16 Thread Jason Ekstrand
On Fri, Jul 16, 2021 at 1:45 PM Matthew Auld
 wrote:
>
> On Fri, 16 Jul 2021 at 18:39, Jason Ekstrand  wrote:
> >
> > On Fri, Jul 16, 2021 at 11:00 AM Matthew Auld
> >  wrote:
> > >
> > > On Fri, 16 Jul 2021 at 16:52, Matthew Auld
> > >  wrote:
> > > >
> > > > On Fri, 16 Jul 2021 at 15:10, Jason Ekstrand  
> > > > wrote:
> > > > >
> > > > > On Fri, Jul 16, 2021 at 8:54 AM Matthew Auld
> > > > >  wrote:
> > > > > >
> > > > > > On Thu, 15 Jul 2021 at 23:39, Jason Ekstrand  
> > > > > > wrote:
> > > > > > >
> > > > > > > Whenever we had a user object (n_placements > 0), we were ignoring
> > > > > > > obj->mm.region and always putting obj->placements[0] as the 
> > > > > > > requested
> > > > > > > region.  For LMEM+SMEM objects, this was causing them to get 
> > > > > > > shoved into
> > > > > > > LMEM on every i915_ttm_get_pages() even when SMEM was requested 
> > > > > > > by, say,
> > > > > > > i915_gem_object_migrate().
> > > > > >
> > > > > > i915_ttm_migrate calls i915_ttm_place_from_region() directly with 
> > > > > > the
> > > > > > requested region, so there shouldn't be an issue with migration 
> > > > > > right?
> > > > > > Do you have some more details?
> > > > >
> > > > > With i915_ttm_migrate directly, no.  But, in the last patch in the
> > > > > series, we're trying to migrate LMEM+SMEM buffers into SMEM on
> > > > > attach() and pin it there.  This blows up in a very unexpected (IMO)
> > > > > way.  The flow goes something like this:
> > > > >
> > > > >  - Client attempts a dma-buf import from another device
> > > > >  - In attach() we call i915_gem_object_migrate() which calls
> > > > > i915_ttm_migrate() which migrates as requested.
> > > > >  - Once the migration is complete, we call i915_gem_object_pin_pages()
> > > > > which calls i915_ttm_get_pages() which depends on
> > > > > i915_ttm_placement_from_obj() and so migrates it right back to LMEM.
> > > >
> > > > The mm.pages must be NULL here, otherwise it would just increment the
> > > > pages_pin_count?
> >
> > Given that the test is using the four_underscores version, it
> > doesn't have that check.  However, this executes after we've done the
> > dma-buf import which pinned pages.  So we should definitely have
> > pages.
>
> We shouldn't call four_underscores() if we might already have
> pages though. Under non-TTM that would leak the pages, and in TTM we
> might hit the WARN_ON(mm->pages) in __i915_ttm_get_pages(), if for
> example nothing was moved. I take it we can't just call pin_pages()?
> Four scary underscores usually means "don't call this in normal code".

I've switched the four_underscores call to a __two_underscores in
the selftests and it had no effect, good or bad.  But, still, probably
better to call that one.

> >
> > > > >
> > > > > Maybe the problem here is actually that our TTM code isn't respecting
> > > > > obj->mm.pages_pin_count?
> > > >
> > > > I think if the resource is moved, we always nuke the mm.pages after
> > > > being notified of the move. Also TTM is also not allowed to move
> > > > pinned buffers.
> > > >
> > > > I guess if we are evicted/swapped, so assuming we are not holding the
> > > > object lock, and it's not pinned, the future call to get_pages() will
> > > > see mm.pages = NULL, even though the ttm_resource is still there, and
> > > > because we prioritise the placements[0], instead of mm.region we end
> > > > up moving it for no good reason. But in your case you are holding the
> > > > lock, or it's pinned? Also is this just with the selftest, or
> > > > something real?
> > >
> > > Or at least in the selftest I see i915_gem_object_get_pages()
> > > which doesn't even consider the mm.pages AFAIK.
> >
> > The bogus migration is happening as part of the
> > __i915_gem_object_get_pages() (2 __underscores) call in
> > i915_gem_dmabuf_attach (see last patch).  That code is attempting to
> > migrate the BO to SMEM and then pin it there using the obvious calls
> > to do so.  However, in the pin_pages call, it gets implicitly migrated
> > back to LMEM thanks to i915_ttm_get_pages().  Why is _get_pages()
> > migrating things at all?
>
> Not sure yet, but __two_underscores() checks if
> i915_gem_object_has_pages() before actually calling into
> i915_ttm_get_pages(), so the mm.pages would have to be NULL here for
> some reason, so best guess is something to do with move_notify().

Did a bit of experimenting along those lines and added the following
to the self-test BEFORE the export/import:

i915_gem_object_lock(obj, NULL);
err = __i915_gem_object_get_pages(obj);
__i915_gem_object_unpin_pages(obj);
i915_gem_object_unlock(obj);
if (err) {
pr_err("__i915_gem_object_get_pages failed with err=%d\n", err);
goto out_ret;
}

This seems to make the migration happen as expected without this
patch.  So it seems the problem only exists on buffers that haven't
gotten any backing storage yet (if I'm understanding get_pages
correctly).

One potential work-around 

Re: [git pull] drm fixes for 5.14-rc2

2021-07-16 Thread pr-tracker-bot
The pull request you sent on Fri, 16 Jul 2021 13:41:18 +1000:

> git://anongit.freedesktop.org/drm/drm tags/drm-fixes-2021-07-16

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/786cb0a2f9bba267c8a80caf906b94c76d18f7e8

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html


Re: [Intel-gfx] [PATCH 5/7] drm/i915/gem/ttm: Respect the objection region in placement_from_obj

2021-07-16 Thread Matthew Auld
On Fri, 16 Jul 2021 at 15:14, Jason Ekstrand  wrote:
>
> Whenever we had a user object (n_placements > 0), we were ignoring
> obj->mm.region and always putting obj->placements[0] as the requested
> region.  For LMEM+SMEM objects, this was causing them to get shoved into
> LMEM on every i915_ttm_get_pages() even when SMEM was requested by, say,
> i915_gem_object_migrate().
>
> Signed-off-by: Jason Ekstrand 
> Cc: Thomas Hellström 
> Cc: Matthew Auld 
> Cc: Maarten Lankhorst 

AFAIK makes sense, just a question of properly understanding that
weird migration issue first.

Assuming CI is happy,
Reviewed-by: Matthew Auld 

> ---
>  drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
> b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> index 6589411396d3f..8eeb73c7c401c 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> @@ -150,8 +150,7 @@ i915_ttm_placement_from_obj(const struct 
> drm_i915_gem_object *obj,
> unsigned int i;
>
> placement->num_placement = 1;
> -   i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] :
> -  obj->mm.region, requested, flags);
> +   i915_ttm_place_from_region(obj->mm.region, requested, flags);
>
> /* Cache this on object? */
> placement->num_busy_placement = num_allowed;
> --
> 2.31.1
>
> ___
> Intel-gfx mailing list
> intel-...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 4/7] drm/i915/gem: Unify user object creation (v2)

2021-07-16 Thread Matthew Auld
On Fri, 16 Jul 2021 at 15:14, Jason Ekstrand  wrote:
>
> Instead of hand-rolling the same three calls in each function, pull them
> into an i915_gem_object_create_user helper.  Apart from re-ordering of
> the placements array ENOMEM check, there should be no functional change.
>
> v2 (Matthew Auld):
>  - Add the call to i915_gem_flush_free_objects() from
>i915_gem_dumb_create() in a separate patch
>  - Move i915_gem_object_alloc() below the simple error checks
>
> Signed-off-by: Jason Ekstrand 
> Cc: Matthew Auld 

If CI is happy,
Reviewed-by: Matthew Auld 


Re: [PATCH 3/7] drm/i915/gem: Call i915_gem_flush_free_objects() in i915_gem_dumb_create()

2021-07-16 Thread Matthew Auld
On Fri, 16 Jul 2021 at 15:14, Jason Ekstrand  wrote:
>
> This doesn't really fix anything serious since the chances of a client
> creating and destroying a mass of dumb BOs is pretty low.  However, it
> is called by the other two create IOCTLs to garbage collect old objects.
> Call it here too for consistency.
>
> Signed-off-by: Jason Ekstrand 
> Cc: Matthew Auld 
Reviewed-by: Matthew Auld 


Re: [PATCH 2/7] drm/i915/gem: Refactor placement setup for i915_gem_object_create* (v2)

2021-07-16 Thread Matthew Auld
On Fri, 16 Jul 2021 at 15:14, Jason Ekstrand  wrote:
>
> Since we don't allow changing the set of regions after creation, we can
> make ext_set_placements() build up the region set directly in the
> create_ext and assign it to the object later.  This is similar to what
> we did for contexts with the proto-context only simpler because there's
> no funny object shuffling.  This will be used in the next patch to allow
> us to de-duplicate a bunch of code.  Also, since we know the maximum
> number of regions up-front, we can use a fixed-size temporary array for
> the regions.  This simplifies memory management a bit for this new
> delayed approach.
>
> v2 (Matthew Auld):
>  - Get rid of MAX_N_PLACEMENTS
>  - Drop kfree(placements) from set_placements()
>
> Signed-off-by: Jason Ekstrand 
> Cc: Matthew Auld 

If CI is happy,
Reviewed-by: Matthew Auld 


Re: [PATCH 5/7] drm/i915/gem/ttm: Respect the objection region in placement_from_obj

2021-07-16 Thread Matthew Auld
On Fri, 16 Jul 2021 at 18:39, Jason Ekstrand  wrote:
>
> On Fri, Jul 16, 2021 at 11:00 AM Matthew Auld
>  wrote:
> >
> > On Fri, 16 Jul 2021 at 16:52, Matthew Auld
> >  wrote:
> > >
> > > On Fri, 16 Jul 2021 at 15:10, Jason Ekstrand  wrote:
> > > >
> > > > On Fri, Jul 16, 2021 at 8:54 AM Matthew Auld
> > > >  wrote:
> > > > >
> > > > > On Thu, 15 Jul 2021 at 23:39, Jason Ekstrand  
> > > > > wrote:
> > > > > >
> > > > > > Whenever we had a user object (n_placements > 0), we were ignoring
> > > > > > obj->mm.region and always putting obj->placements[0] as the 
> > > > > > requested
> > > > > > region.  For LMEM+SMEM objects, this was causing them to get shoved 
> > > > > > into
> > > > > > LMEM on every i915_ttm_get_pages() even when SMEM was requested by, 
> > > > > > say,
> > > > > > i915_gem_object_migrate().
> > > > >
> > > > > i915_ttm_migrate calls i915_ttm_place_from_region() directly with the
> > > > > requested region, so there shouldn't be an issue with migration right?
> > > > > Do you have some more details?
> > > >
> > > > With i915_ttm_migrate directly, no.  But, in the last patch in the
> > > > series, we're trying to migrate LMEM+SMEM buffers into SMEM on
> > > > attach() and pin it there.  This blows up in a very unexpected (IMO)
> > > > way.  The flow goes something like this:
> > > >
> > > >  - Client attempts a dma-buf import from another device
> > > >  - In attach() we call i915_gem_object_migrate() which calls
> > > > i915_ttm_migrate() which migrates as requested.
> > > >  - Once the migration is complete, we call i915_gem_object_pin_pages()
> > > > which calls i915_ttm_get_pages() which depends on
> > > > i915_ttm_placement_from_obj() and so migrates it right back to LMEM.
> > >
> > > The mm.pages must be NULL here, otherwise it would just increment the
> > > pages_pin_count?
>
> Given that the test is using the four_underscores version, it
> doesn't have that check.  However, this executes after we've done the
> dma-buf import which pinned pages.  So we should definitely have
> pages.

We shouldn't call four_underscores() if we might already have
pages though. Under non-TTM that would leak the pages, and in TTM we
might hit the WARN_ON(mm->pages) in __i915_ttm_get_pages(), if for
example nothing was moved. I take it we can't just call pin_pages()?
Four scary underscores usually means "don't call this in normal code".

>
> > > >
> > > > Maybe the problem here is actually that our TTM code isn't respecting
> > > > obj->mm.pages_pin_count?
> > >
> > > I think if the resource is moved, we always nuke the mm.pages after
> > > being notified of the move. Also TTM is also not allowed to move
> > > pinned buffers.
> > >
> > > I guess if we are evicted/swapped, so assuming we are not holding the
> > > object lock, and it's not pinned, the future call to get_pages() will
> > > see mm.pages = NULL, even though the ttm_resource is still there, and
> > > because we prioritise the placements[0], instead of mm.region we end
> > > up moving it for no good reason. But in your case you are holding the
> > > lock, or it's pinned? Also is this just with the selftest, or
> > > something real?
> >
> > Or at least in the selftest I see i915_gem_object_get_pages()
> > which doesn't even consider the mm.pages AFAIK.
>
> The bogus migration is happening as part of the
> __i915_gem_object_get_pages() (2 __underscores) call in
> i915_gem_dmabuf_attach (see last patch).  That code is attempting to
> migrate the BO to SMEM and then pin it there using the obvious calls
> to do so.  However, in the pin_pages call, it gets implicitly migrated
> back to LMEM thanks to i915_ttm_get_pages().  Why is _get_pages()
> migrating things at all?

Not sure yet, but __two_underscores() checks if
i915_gem_object_has_pages() before actually calling into
i915_ttm_get_pages(), so the mm.pages would have to be NULL here for
some reason, so best guess is something to do with move_notify().

>
> --Jason
>
> > >
> > > >
> > > > In case you can't tell, I really have no clue what I'm doing here.
> > > > I'm really stumbling around in the dark finding things that make my
> > > > bug go away.  I'm happy for the feedback.
> > > >
> > > > --Jason
> > > >
> > > > >
> > > > > >
> > > > > > Signed-off-by: Jason Ekstrand 
> > > > > > Cc: Thomas Hellström 
> > > > > > Cc: Matthew Auld 
> > > > > > Cc: Maarten Lankhorst 
> > > > > > ---
> > > > > >  drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 +--
> > > > > >  1 file changed, 1 insertion(+), 2 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
> > > > > > b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > > > > index d30f274c329c7..5985e994d56cf 100644
> > > > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > > > > @@ -150,8 +150,7 @@ i915_ttm_placement_from_obj(const struct 
> > > > > > drm_i915_gem_object *obj,
> > > > > > unsigned int i;
> > > > > >
> > > > > > 

[PATCH] drm/lima: Convert to clk_bulk API

2021-07-16 Thread Marek Vasut
Instead of requesting two separate clock and then handling them
separately in various places of the driver, use clk_bulk_*() API.
This permits handling devices with more than "bus"/"core" clock,
like ZynqMP, which has "gpu"/"gpu_pp0"/"gpu_pp1" all as separate
clock.

Signed-off-by: Marek Vasut 
Cc: Qiang Yu 
Cc: l...@lists.freedesktop.org
---
 drivers/gpu/drm/lima/lima_devfreq.c | 17 +---
 drivers/gpu/drm/lima/lima_devfreq.h |  1 +
 drivers/gpu/drm/lima/lima_device.c  | 42 +++--
 drivers/gpu/drm/lima/lima_device.h  |  4 +--
 4 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/lima/lima_devfreq.c 
b/drivers/gpu/drm/lima/lima_devfreq.c
index 8989e215dfc9..533b36932f79 100644
--- a/drivers/gpu/drm/lima/lima_devfreq.c
+++ b/drivers/gpu/drm/lima/lima_devfreq.c
@@ -58,7 +58,7 @@ static int lima_devfreq_get_dev_status(struct device *dev,
struct lima_devfreq *devfreq = >devfreq;
unsigned long irqflags;
 
-   status->current_frequency = clk_get_rate(ldev->clk_gpu);
+   status->current_frequency = clk_get_rate(devfreq->clk_gpu);
 
spin_lock_irqsave(>lock, irqflags);
 
@@ -110,12 +110,23 @@ int lima_devfreq_init(struct lima_device *ldev)
struct lima_devfreq *ldevfreq = >devfreq;
struct dev_pm_opp *opp;
unsigned long cur_freq;
-   int ret;
+   int i, ret;
 
if (!device_property_present(dev, "operating-points-v2"))
/* Optional, continue without devfreq */
return 0;
 
+   /* Find first clock which are not "bus" clock */
+   for (i = 0; i < ldev->nr_clks; i++) {
+   if (!strcmp(ldev->clks[i].id, "bus"))
+   continue;
+   ldevfreq->clk_gpu = ldev->clks[i].clk;
+   break;
+   }
+
+   if (!ldevfreq->clk_gpu)
+   return -ENODEV;
+
spin_lock_init(>lock);
 
ret = devm_pm_opp_set_clkname(dev, "core");
@@ -135,7 +146,7 @@ int lima_devfreq_init(struct lima_device *ldev)
 
lima_devfreq_reset(ldevfreq);
 
-   cur_freq = clk_get_rate(ldev->clk_gpu);
+   cur_freq = clk_get_rate(ldevfreq->clk_gpu);
 
opp = devfreq_recommended_opp(dev, _freq, 0);
if (IS_ERR(opp))
diff --git a/drivers/gpu/drm/lima/lima_devfreq.h 
b/drivers/gpu/drm/lima/lima_devfreq.h
index b8e50feaeab6..ffef5c91795d 100644
--- a/drivers/gpu/drm/lima/lima_devfreq.h
+++ b/drivers/gpu/drm/lima/lima_devfreq.h
@@ -17,6 +17,7 @@ struct lima_devfreq {
struct devfreq *devfreq;
struct thermal_cooling_device *cooling;
struct devfreq_simple_ondemand_data gov_data;
+   struct clk *clk_gpu;
 
ktime_t busy_time;
ktime_t idle_time;
diff --git a/drivers/gpu/drm/lima/lima_device.c 
b/drivers/gpu/drm/lima/lima_device.c
index 65fdca366e41..9f7bde7e9d22 100644
--- a/drivers/gpu/drm/lima/lima_device.c
+++ b/drivers/gpu/drm/lima/lima_device.c
@@ -85,29 +85,23 @@ static int lima_clk_enable(struct lima_device *dev)
 {
int err;
 
-   err = clk_prepare_enable(dev->clk_bus);
+   err = clk_bulk_prepare_enable(dev->nr_clks, dev->clks);
if (err)
return err;
 
-   err = clk_prepare_enable(dev->clk_gpu);
-   if (err)
-   goto error_out0;
-
if (dev->reset) {
err = reset_control_deassert(dev->reset);
if (err) {
dev_err(dev->dev,
"reset controller deassert failed %d\n", err);
-   goto error_out1;
+   goto error;
}
}
 
return 0;
 
-error_out1:
-   clk_disable_unprepare(dev->clk_gpu);
-error_out0:
-   clk_disable_unprepare(dev->clk_bus);
+error:
+   clk_bulk_disable_unprepare(dev->nr_clks, dev->clks);
return err;
 }
 
@@ -115,31 +109,23 @@ static void lima_clk_disable(struct lima_device *dev)
 {
if (dev->reset)
reset_control_assert(dev->reset);
-   clk_disable_unprepare(dev->clk_gpu);
-   clk_disable_unprepare(dev->clk_bus);
+   clk_bulk_disable_unprepare(dev->nr_clks, dev->clks);
 }
 
 static int lima_clk_init(struct lima_device *dev)
 {
int err;
 
-   dev->clk_bus = devm_clk_get(dev->dev, "bus");
-   if (IS_ERR(dev->clk_bus)) {
-   err = PTR_ERR(dev->clk_bus);
+   err = devm_clk_bulk_get_all(dev->dev, >clks);
+   if (err < 1) {
+   if (err == 0)   /* No clock at all is an error too */
+   err = -ENODEV;
if (err != -EPROBE_DEFER)
-   dev_err(dev->dev, "get bus clk failed %d\n", err);
-   dev->clk_bus = NULL;
+   dev_err(dev->dev, "get clk failed %d\n", err);
return err;
}
 
-   dev->clk_gpu = devm_clk_get(dev->dev, "core");
-   if (IS_ERR(dev->clk_gpu)) {
-   err = PTR_ERR(dev->clk_gpu);
-   if (err != 

Re: [PATCH 7/7] drm/st7586: Use framebuffer dma-buf helpers

2021-07-16 Thread David Lechner

On 7/16/21 9:08 AM, Thomas Zimmermann wrote:

Replace dma_buf_begin_cpu_access() with drm_gem_fb_begin_cpu_access();
same for _end_cpu_access(). Remove some boiler-plate code. No functional
changes.

Signed-off-by: Thomas Zimmermann 
---


Acked-by: David Lechner 




Re: [PATCH v2] dt-bindings: display: renesas,du: Make resets optional on R-Car H1

2021-07-16 Thread Rob Herring
On Wed, 14 Jul 2021 12:19:36 +0200, Geert Uytterhoeven wrote:
> The "resets" property is not present on R-Car Gen1 SoCs.
> Supporting it would require migrating from renesas,cpg-clocks to
> renesas,cpg-mssr.
> 
> Reflect this in the DT bindings by removing the global "required:
> resets".  All SoCs that do have "resets" properties already have
> SoC-specific rules making it required.
> 
> Fixes: 99d66127fad25ebb ("dt-bindings: display: renesas,du: Convert binding 
> to YAML")
> Signed-off-by: Geert Uytterhoeven 
> Reviewed-by: Laurent Pinchart 
> ---
> v2:
>   - Add Reviewed-by.
> ---
>  Documentation/devicetree/bindings/display/renesas,du.yaml | 1 -
>  1 file changed, 1 deletion(-)
> 

Applied, thanks!


Re: [PATCH 08/16] drm/i915/guc/slpc: Add methods to set min/max frequency

2021-07-16 Thread Belgaumkar, Vinay




On 7/10/2021 10:47 AM, Michal Wajdeczko wrote:



On 10.07.2021 03:20, Vinay Belgaumkar wrote:

Add param set h2g helpers to set the min and max frequencies
for use by SLPC.

Signed-off-by: Sundaresan Sujaritha 
Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 94 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h |  2 +
  2 files changed, 96 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index e579408d1c19..19cb26479942 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -106,6 +106,19 @@ static int slpc_send(struct intel_guc_slpc *slpc,
return intel_guc_send(guc, action, in_len);
  }
  
+static int host2guc_slpc_set_param(struct intel_guc_slpc *slpc,

+  u32 id, u32 value)
+{
+   struct slpc_event_input data = {0};
+
+   data.header.value = SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2);
+   data.args[0] = id;
+   data.args[1] = value;
+
+   return slpc_send(slpc, , 4);


as suggested before, use of explicit function like:

static int guc_action_slpc_param(guc, u32 id, u32 value)
{
u32 request[] = {
INTEL_GUC_ACTION_SLPC_REQUEST,
SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
id,
value,
};

return intel_guc_send(guc, request, ARRAY_SIZE(request));
}

will be simpler/cleaner


done.




+}
+
+
  static bool slpc_running(struct intel_guc_slpc *slpc)
  {
struct slpc_shared_data *data;
@@ -134,6 +147,19 @@ static int host2guc_slpc_query_task_state(struct 
intel_guc_slpc *slpc)
return slpc_send(slpc, , 4);
  }
  
+static int slpc_set_param(struct intel_guc_slpc *slpc, u32 id, u32 value)

+{
+   struct drm_i915_private *i915 = slpc_to_i915(slpc);
+   GEM_BUG_ON(id >= SLPC_MAX_PARAM);
+
+   if (host2guc_slpc_set_param(slpc, id, value)) {
+   drm_err(>drm, "Unable to set param %x", id);


missing \n
what about printing value to be set ?
what about printing send error %pe ?


done.




+   return -EIO;
+   }
+
+   return 0;
+}
+
  static int slpc_read_task_state(struct intel_guc_slpc *slpc)
  {
return host2guc_slpc_query_task_state(slpc);
@@ -218,6 +244,74 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
return slpc_shared_data_init(slpc);
  }
  
+/**

+ * intel_guc_slpc_max_freq_set() - Set max frequency limit for SLPC.
+ * @slpc: pointer to intel_guc_slpc.
+ * @val: encoded frequency


what's the encoding ?


It should just be frequency (MHz).




+ *
+ * This function will invoke GuC SLPC action to update the max frequency
+ * limit for slice and unslice.
+ *
+ * Return: 0 on success, non-zero error code on failure.
+ */
+int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val)
+{
+   int ret;
+   struct drm_i915_private *i915 = slpc_to_i915(slpc);
+   intel_wakeref_t wakeref;
+
+   wakeref = intel_runtime_pm_get(>runtime_pm);


use can use with_intel_runtime_pm(rpm, wakeref)


Ok.



+
+   ret = slpc_set_param(slpc,
+  SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ,
+  val);
+
+   if (ret) {
+   drm_err(>drm,
+   "Set max frequency unslice returned %d", ret);


missing \n
print error with %pe
but slpc_set_param returns only -EIO ;(


I was done that way so the sysfs method that calls it gets a standard 
value. Will change that.





+   ret = -EIO;
+   goto done;
+   }
+
+done:
+   intel_runtime_pm_put(>runtime_pm, wakeref);
+   return ret;
+}
+
+/**
+ * intel_guc_slpc_min_freq_set() - Set min frequency limit for SLPC.
+ * @slpc: pointer to intel_guc_slpc.
+ * @val: encoded frequency
+ *
+ * This function will invoke GuC SLPC action to update the min frequency
+ * limit.
+ *
+ * Return: 0 on success, non-zero error code on failure.
+ */
+int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val)
+{
+   int ret;
+   struct intel_guc *guc = slpc_to_guc(slpc);
+   struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+   intel_wakeref_t wakeref;
+
+   wakeref = intel_runtime_pm_get(>runtime_pm);
+
+   ret = slpc_set_param(slpc,
+  SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+  val);
+   if (ret) {
+   drm_err(>drm,
+   "Set min frequency for unslice returned %d", ret);


as above


done.
Thanks,

Vinay.


Michal


+   ret = -EIO;
+   goto done;
+   }
+
+done:
+   intel_runtime_pm_put(>runtime_pm, wakeref);
+   return ret;
+}
+
  /*
   * intel_guc_slpc_enable() - Start SLPC
   * @slpc: pointer to intel_guc_slpc.
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
index 

Re: [PATCH v2 3/7] dt-bindings: display: mediatek: add dsi reset optional property

2021-07-16 Thread Rob Herring
On Wed, 14 Jul 2021 12:11:37 +0200, Enric Balletbo i Serra wrote:
> Update device tree binding documentation for the dsi to add the optional
> property to reset the dsi controller.
> 
> Signed-off-by: Enric Balletbo i Serra 
> ---
> 
> Changes in v2:
> - Added a new patch to describe the dsi reset optional property.
> 
>  .../devicetree/bindings/display/mediatek/mediatek,dsi.txt   | 6 ++
>  1 file changed, 6 insertions(+)
> 

Acked-by: Rob Herring 


Re: [PATCH 5/7] drm/i915/gem/ttm: Respect the objection region in placement_from_obj

2021-07-16 Thread Jason Ekstrand
On Fri, Jul 16, 2021 at 11:00 AM Matthew Auld
 wrote:
>
> On Fri, 16 Jul 2021 at 16:52, Matthew Auld
>  wrote:
> >
> > On Fri, 16 Jul 2021 at 15:10, Jason Ekstrand  wrote:
> > >
> > > On Fri, Jul 16, 2021 at 8:54 AM Matthew Auld
> > >  wrote:
> > > >
> > > > On Thu, 15 Jul 2021 at 23:39, Jason Ekstrand  
> > > > wrote:
> > > > >
> > > > > Whenever we had a user object (n_placements > 0), we were ignoring
> > > > > obj->mm.region and always putting obj->placements[0] as the requested
> > > > > region.  For LMEM+SMEM objects, this was causing them to get shoved 
> > > > > into
> > > > > LMEM on every i915_ttm_get_pages() even when SMEM was requested by, 
> > > > > say,
> > > > > i915_gem_object_migrate().
> > > >
> > > > i915_ttm_migrate calls i915_ttm_place_from_region() directly with the
> > > > requested region, so there shouldn't be an issue with migration right?
> > > > Do you have some more details?
> > >
> > > With i915_ttm_migrate directly, no.  But, in the last patch in the
> > > series, we're trying to migrate LMEM+SMEM buffers into SMEM on
> > > attach() and pin it there.  This blows up in a very unexpected (IMO)
> > > way.  The flow goes something like this:
> > >
> > >  - Client attempts a dma-buf import from another device
> > >  - In attach() we call i915_gem_object_migrate() which calls
> > > i915_ttm_migrate() which migrates as requested.
> > >  - Once the migration is complete, we call i915_gem_object_pin_pages()
> > > which calls i915_ttm_get_pages() which depends on
> > > i915_ttm_placement_from_obj() and so migrates it right back to LMEM.
> >
> > The mm.pages must be NULL here, otherwise it would just increment the
> > pages_pin_count?

Given that the test is using the four_underscores version, it
doesn't have that check.  However, this executes after we've done the
dma-buf import which pinned pages.  So we should definitely have
pages.

> > >
> > > Maybe the problem here is actually that our TTM code isn't respecting
> > > obj->mm.pages_pin_count?
> >
> > I think if the resource is moved, we always nuke the mm.pages after
> > being notified of the move. Also TTM is also not allowed to move
> > pinned buffers.
> >
> > I guess if we are evicted/swapped, so assuming we are not holding the
> > object lock, and it's not pinned, the future call to get_pages() will
> > see mm.pages = NULL, even though the ttm_resource is still there, and
> > because we prioritise the placements[0], instead of mm.region we end
> > up moving it for no good reason. But in your case you are holding the
> > lock, or it's pinned? Also is this just with the selftest, or
> > something real?
>
> Or at least in the selftest I see i915_gem_object_get_pages()
> which doesn't even consider the mm.pages AFAIK.

The bogus migration is happening as part of the
__i915_gem_object_get_pages() (2 __underscores) call in
i915_gem_dmabuf_attach (see last patch).  That code is attempting to
migrate the BO to SMEM and then pin it there using the obvious calls
to do so.  However, in the pin_pages call, it gets implicitly migrated
back to LMEM thanks to i915_ttm_get_pages().  Why is _get_pages()
migrating things at all?

--Jason

> >
> > >
> > > In case you can't tell, I really have no clue what I'm doing here.
> > > I'm really stumbling around in the dark finding things that make my
> > > bug go away.  I'm happy for the feedback.
> > >
> > > --Jason
> > >
> > > >
> > > > >
> > > > > Signed-off-by: Jason Ekstrand 
> > > > > Cc: Thomas Hellström 
> > > > > Cc: Matthew Auld 
> > > > > Cc: Maarten Lankhorst 
> > > > > ---
> > > > >  drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 +--
> > > > >  1 file changed, 1 insertion(+), 2 deletions(-)
> > > > >
> > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
> > > > > b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > > > index d30f274c329c7..5985e994d56cf 100644
> > > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > > > @@ -150,8 +150,7 @@ i915_ttm_placement_from_obj(const struct 
> > > > > drm_i915_gem_object *obj,
> > > > > unsigned int i;
> > > > >
> > > > > placement->num_placement = 1;
> > > > > -   i915_ttm_place_from_region(num_allowed ? 
> > > > > obj->mm.placements[0] :
> > > > > -  obj->mm.region, requested, flags);
> > > > > +   i915_ttm_place_from_region(obj->mm.region, requested, flags);
> > > > >
> > > > > /* Cache this on object? */
> > > > > placement->num_busy_placement = num_allowed;
> > > > > --
> > > > > 2.31.1
> > > > >


Re: [PATCH] drm/amd/display: Fix identical code for different branches

2021-07-16 Thread Len Baker
On Sun, Jul 11, 2021 at 10:45:48AM -0700, Joe Perches wrote:
> On Sun, 2021-07-11 at 19:24 +0200, Len Baker wrote:
> > The branches of the "if" statement are the same. So remove the
> > unnecessary if and goto statements.
> >
> > Addresses-Coverity-ID: 1456916 ("Identical code for different branches")
> > Fixes: 4c283fdac08ab ("drm/amd/display: Add HDCP module")
> > Signed-off-by: Len Baker 
>
> I'm not a big fan of this type of change.
>
> It's currently the same style used for six tests in this function
> and changing this last one would just make it harder to see the
> code blocks as consistent.
>
> I doubt any reasonable compiler would produce different objects.

Ok, thanks for the review. I leave it as is.

> > diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c 
> > b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c
> []
> > @@ -305,10 +305,8 @@ static enum mod_hdcp_status wait_for_ready(struct 
> > mod_hdcp *hdcp,
> >     hdcp, "bcaps_read"))
> >     goto out;
> >     }
> > -   if (!mod_hdcp_execute_and_set(check_ksv_ready,
> > -   >ready_check, ,
> > -   hdcp, "ready_check"))
> > -   goto out;
> > +   mod_hdcp_execute_and_set(check_ksv_ready, >ready_check, ,
> > +hdcp, "ready_check");
> >  out:
> >     return status;
> >  }
> > --
> > 2.25.1
> >

Thanks,
Len


Re: [PATCH] drm/amd/display: Fix 10bit 4K display on CIK GPUs

2021-07-16 Thread Alex Deucher
Applied.  Thanks!

Alex

On Thu, Jul 15, 2021 at 3:40 PM Harry Wentland  wrote:
>
>
>
> On 2021-07-15 3:19 p.m., Mario Kleiner wrote:
> > On Thu, Jul 15, 2021 at 6:10 PM Alex Deucher  wrote:
> >>
> >> On Wed, Jul 14, 2021 at 4:15 AM Liviu Dudau  wrote:
> >>>
> >>> Commit 72a7cf0aec0c ("drm/amd/display: Keep linebuffer pixel depth at
> >>> 30bpp for DCE-11.0.") doesn't seems to have fixed 10bit 4K rendering over
> >>> DisplayPort for CIK GPUs. On my machine with a HAWAII GPU I get a broken
> >>> image that looks like it has an effective resolution of 1920x1080 but
> >>> scaled up in an irregular way. Reverting the commit or applying this
> >>> patch fixes the problem on v5.14-rc1.
> >>>
> >>> Fixes: 72a7cf0aec0c ("drm/amd/display: Keep linebuffer pixel depth at 
> >>> 30bpp for DCE-11.0.")
> >>> Signed-off-by: Liviu Dudau 
> >>
> >> Harry or Mario any ideas?  Maybe we need finer grained DCE version
> >> checking?  I don't remember all of the caveats of this stuff.  DCE11
> >> and older is getting to be pretty old at this point.  I can just apply
> >> this if you don't have any insights.
> >>
> >> Alex
> >>
> >
> > Hi Alex
> >
> > I'd be fine with applying this. As my original commit says, photometer
> > measurements showed that increasing the line buffer depth was only
> > needed for my DCN-1 RavenRidge, not for my DCE-11.2 Polaris11 or a
> > DCE-8.3 cik, so this should probably not cause harm to the increased
> > precision modes.
> >
> > Note that given the hardware and USB-C/DP-HDMI adapters i have, I only
> > tested this on a 2560x1440@144 Hz DP monitor with DCN-1, DCE-11.2, and
> > a 2560x1440@100 Hz HDMI monitor iirc with DCN-1, DCE-8.3, and i think
> > on a 2880x1800@60 Hz MBP Retina eDP panel with DCE-11.2. These are the
> > highest resolution/framerate monitors I have atm.I don't have access
> > to any 4k monitors, so maybe the problem is somehow specific to such
> > high resolutions? Maybe somewhere else in the code something would
> > need to be adapted? Lacking actual hw docs, my coding here is by
> > pattern matching against existing DC code, guessing and testing on my
> > limited hw samples.
> >
> > Acked-by: Mario Kleiner 
>
> Makes sense.
>
> Reviewed-by: Harry Wentland 
>
> Harry
>
> >
> > -mario
> >
> >>> ---
> >>>  drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 2 +-
> >>>  1 file changed, 1 insertion(+), 1 deletion(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c 
> >>> b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
> >>> index a6a67244a322e..1596f6b7fed7c 100644
> >>> --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
> >>> +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
> >>> @@ -1062,7 +1062,7 @@ bool resource_build_scaling_params(struct pipe_ctx 
> >>> *pipe_ctx)
> >>>  * so use only 30 bpp on DCE_VERSION_11_0. Testing with DCE 11.2 
> >>> and 8.3
> >>>  * did not show such problems, so this seems to be the exception.
> >>>  */
> >>> -   if (plane_state->ctx->dce_version != DCE_VERSION_11_0)
> >>> +   if (plane_state->ctx->dce_version > DCE_VERSION_11_0)
> >>> pipe_ctx->plane_res.scl_data.lb_params.depth = 
> >>> LB_PIXEL_DEPTH_36BPP;
> >>> else
> >>> pipe_ctx->plane_res.scl_data.lb_params.depth = 
> >>> LB_PIXEL_DEPTH_30BPP;
> >>> --
> >>> 2.32.0
> >>>
> >>> ___
> >>> amd-gfx mailing list
> >>> amd-...@lists.freedesktop.org
> >>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx>


Re: [PATCH v1] mailbox: cmdq: add instruction time-out interrupt support

2021-07-16 Thread Chun-Kuang Hu
Hi, Yongqiang:

Yongqiang Niu  於 2021年7月16日 週五 下午2:58寫道:
>
> add time-out cycle setting to make sure time-out interrupt irq
> will happened when instruction time-out for wait and poll
>
> Signed-off-by: Yongqiang Niu 
> ---
>  drivers/mailbox/mtk-cmdq-mailbox.c | 11 +++
>  1 file changed, 11 insertions(+)
>
> diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c 
> b/drivers/mailbox/mtk-cmdq-mailbox.c
> index de4793e..9a76bcd 100644
> --- a/drivers/mailbox/mtk-cmdq-mailbox.c
> +++ b/drivers/mailbox/mtk-cmdq-mailbox.c
> @@ -35,6 +35,7 @@
>  #define CMDQ_THR_END_ADDR  0x24
>  #define CMDQ_THR_WAIT_TOKEN0x30
>  #define CMDQ_THR_PRIORITY  0x40
> +#define CMDQ_THR_INSTN_TIMEOUT_CYCLES  0x50
>
>  #define GCE_GCTL_VALUE 0x48
>
> @@ -53,6 +54,15 @@
>  #define CMDQ_JUMP_BY_OFFSET0x1000
>  #define CMDQ_JUMP_BY_PA0x1001
>
> +/*
> + * instruction time-out
> + * cycles to issue instruction time-out interrupt for wait and poll 
> instructions
> + * GCE axi_clock 156MHz
> + * 1 cycle = 6.41ns
> + * instruction time out 2^22*2*6.41ns = 53ms

I think every client has different timeout value, so it's not a good
idea to have a unique timeout value in mailbox controller. Client
could use timer or something similar to detect timeout.

Regards,
Chun-Kuang.

> + */
> +#define CMDQ_INSTN_TIMEOUT_CYCLES  22
> +
>  struct cmdq_thread {
> struct mbox_chan*chan;
> void __iomem*base;
> @@ -368,6 +378,7 @@ static int cmdq_mbox_send_data(struct mbox_chan *chan, 
> void *data)
> writel((task->pa_base + pkt->cmd_buf_size) >> cmdq->shift_pa,
>thread->base + CMDQ_THR_END_ADDR);
>
> +   writel(CMDQ_INSTN_TIMEOUT_CYCLES, thread->base + 
> CMDQ_THR_INSTN_TIMEOUT_CYCLES);
> writel(thread->priority, thread->base + CMDQ_THR_PRIORITY);
> writel(CMDQ_THR_IRQ_EN, thread->base + CMDQ_THR_IRQ_ENABLE);
> writel(CMDQ_THR_ENABLED, thread->base + CMDQ_THR_ENABLE_TASK);
> --
> 1.8.1.1.dirty
>


Re: [PATCH 5/7] drm/i915/gem/ttm: Respect the objection region in placement_from_obj

2021-07-16 Thread Matthew Auld
On Fri, 16 Jul 2021 at 16:52, Matthew Auld
 wrote:
>
> On Fri, 16 Jul 2021 at 15:10, Jason Ekstrand  wrote:
> >
> > On Fri, Jul 16, 2021 at 8:54 AM Matthew Auld
> >  wrote:
> > >
> > > On Thu, 15 Jul 2021 at 23:39, Jason Ekstrand  wrote:
> > > >
> > > > Whenever we had a user object (n_placements > 0), we were ignoring
> > > > obj->mm.region and always putting obj->placements[0] as the requested
> > > > region.  For LMEM+SMEM objects, this was causing them to get shoved into
> > > > LMEM on every i915_ttm_get_pages() even when SMEM was requested by, say,
> > > > i915_gem_object_migrate().
> > >
> > > i915_ttm_migrate calls i915_ttm_place_from_region() directly with the
> > > requested region, so there shouldn't be an issue with migration right?
> > > Do you have some more details?
> >
> > With i915_ttm_migrate directly, no.  But, in the last patch in the
> > series, we're trying to migrate LMEM+SMEM buffers into SMEM on
> > attach() and pin it there.  This blows up in a very unexpected (IMO)
> > way.  The flow goes something like this:
> >
> >  - Client attempts a dma-buf import from another device
> >  - In attach() we call i915_gem_object_migrate() which calls
> > i915_ttm_migrate() which migrates as requested.
> >  - Once the migration is complete, we call i915_gem_object_pin_pages()
> > which calls i915_ttm_get_pages() which depends on
> > i915_ttm_placement_from_obj() and so migrates it right back to LMEM.
>
> The mm.pages must be NULL here, otherwise it would just increment the
> pages_pin_count?
>
> >
> > Maybe the problem here is actually that our TTM code isn't respecting
> > obj->mm.pages_pin_count?
>
> I think if the resource is moved, we always nuke the mm.pages after
> being notified of the move. Also TTM is also not allowed to move
> pinned buffers.
>
> I guess if we are evicted/swapped, so assuming we are not holding the
> object lock, and it's not pinned, the future call to get_pages() will
> see mm.pages = NULL, even though the ttm_resource is still there, and
> because we prioritise the placements[0], instead of mm.region we end
> up moving it for no good reason. But in your case you are holding the
> lock, or it's pinned? Also is this just with the selftest, or
> something real?

Or at least in the selftest I see i915_gem_object_get_pages()
which doesn't even consider the mm.pages AFAIK.

>
> >
> > In case you can't tell, I really have no clue what I'm doing here.
> > I'm really stumbling around in the dark finding things that make my
> > bug go away.  I'm happy for the feedback.
> >
> > --Jason
> >
> > >
> > > >
> > > > Signed-off-by: Jason Ekstrand 
> > > > Cc: Thomas Hellström 
> > > > Cc: Matthew Auld 
> > > > Cc: Maarten Lankhorst 
> > > > ---
> > > >  drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 +--
> > > >  1 file changed, 1 insertion(+), 2 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
> > > > b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > > index d30f274c329c7..5985e994d56cf 100644
> > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > > @@ -150,8 +150,7 @@ i915_ttm_placement_from_obj(const struct 
> > > > drm_i915_gem_object *obj,
> > > > unsigned int i;
> > > >
> > > > placement->num_placement = 1;
> > > > -   i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] :
> > > > -  obj->mm.region, requested, flags);
> > > > +   i915_ttm_place_from_region(obj->mm.region, requested, flags);
> > > >
> > > > /* Cache this on object? */
> > > > placement->num_busy_placement = num_allowed;
> > > > --
> > > > 2.31.1
> > > >


Re: [PATCH 5/7] drm/i915/gem/ttm: Respect the objection region in placement_from_obj

2021-07-16 Thread Matthew Auld
On Fri, 16 Jul 2021 at 15:10, Jason Ekstrand  wrote:
>
> On Fri, Jul 16, 2021 at 8:54 AM Matthew Auld
>  wrote:
> >
> > On Thu, 15 Jul 2021 at 23:39, Jason Ekstrand  wrote:
> > >
> > > Whenever we had a user object (n_placements > 0), we were ignoring
> > > obj->mm.region and always putting obj->placements[0] as the requested
> > > region.  For LMEM+SMEM objects, this was causing them to get shoved into
> > > LMEM on every i915_ttm_get_pages() even when SMEM was requested by, say,
> > > i915_gem_object_migrate().
> >
> > i915_ttm_migrate calls i915_ttm_place_from_region() directly with the
> > requested region, so there shouldn't be an issue with migration right?
> > Do you have some more details?
>
> With i915_ttm_migrate directly, no.  But, in the last patch in the
> series, we're trying to migrate LMEM+SMEM buffers into SMEM on
> attach() and pin it there.  This blows up in a very unexpected (IMO)
> way.  The flow goes something like this:
>
>  - Client attempts a dma-buf import from another device
>  - In attach() we call i915_gem_object_migrate() which calls
> i915_ttm_migrate() which migrates as requested.
>  - Once the migration is complete, we call i915_gem_object_pin_pages()
> which calls i915_ttm_get_pages() which depends on
> i915_ttm_placement_from_obj() and so migrates it right back to LMEM.

The mm.pages must be NULL here, otherwise it would just increment the
pages_pin_count?

>
> Maybe the problem here is actually that our TTM code isn't respecting
> obj->mm.pages_pin_count?

I think if the resource is moved, we always nuke the mm.pages after
being notified of the move. Also TTM is also not allowed to move
pinned buffers.

I guess if we are evicted/swapped, so assuming we are not holding the
object lock, and it's not pinned, the future call to get_pages() will
see mm.pages = NULL, even though the ttm_resource is still there, and
because we prioritise the placements[0], instead of mm.region we end
up moving it for no good reason. But in your case you are holding the
lock, or it's pinned? Also is this just with the selftest, or
something real?

>
> In case you can't tell, I really have no clue what I'm doing here.
> I'm really stumbling around in the dark finding things that make my
> bug go away.  I'm happy for the feedback.
>
> --Jason
>
> >
> > >
> > > Signed-off-by: Jason Ekstrand 
> > > Cc: Thomas Hellström 
> > > Cc: Matthew Auld 
> > > Cc: Maarten Lankhorst 
> > > ---
> > >  drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 +--
> > >  1 file changed, 1 insertion(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
> > > b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > index d30f274c329c7..5985e994d56cf 100644
> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > @@ -150,8 +150,7 @@ i915_ttm_placement_from_obj(const struct 
> > > drm_i915_gem_object *obj,
> > > unsigned int i;
> > >
> > > placement->num_placement = 1;
> > > -   i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] :
> > > -  obj->mm.region, requested, flags);
> > > +   i915_ttm_place_from_region(obj->mm.region, requested, flags);
> > >
> > > /* Cache this on object? */
> > > placement->num_busy_placement = num_allowed;
> > > --
> > > 2.31.1
> > >


Re: [PATCH RFC] drm/vc4: hdmi: Fix connector detect logic

2021-07-16 Thread Maxime Ripard
Hi Stefan,

On Wed, Jul 07, 2021 at 08:01:50PM +0200, Stefan Wahren wrote:
> Hi Maxime,
> 
> Am 07.07.21 um 15:11 schrieb Maxime Ripard:
> > On Tue, Jul 06, 2021 at 05:45:27PM +0200, Stefan Wahren wrote:
> >> Hi Maxime,
> >>
> >> Am 06.07.21 um 15:21 schrieb Maxime Ripard:
> >>> Hi Stefan,
> >>>
> >>> On Tue, Jul 06, 2021 at 12:48:05PM +0200, Stefan Wahren wrote:
>  Am 06.07.21 um 11:58 schrieb Maxime Ripard:
> > Hi,
> >
> > On Mon, Jul 05, 2021 at 11:36:34PM +0200, Stefan Wahren wrote:
> >> Commit "drm/vc4: hdmi: Convert to gpiod" changes the behavior of
> >> vc4_hdmi_connector_detect() which results into CPU hangs in case there
> >> is no HDMI connected. Let's restore the old behavior.
> >>
> >> Reported-by: Nathan Chancellor 
> >> Reported-by: Ojaswin Mujoo 
> >> Fixes: 6800234ceee0 ("drm/vc4: hdmi: Convert to gpiod")
> >> Signed-off-by: Stefan Wahren 
> > I already sent this patch last week:
> >
> > https://lore.kernel.org/dri-devel/20210628124257.140453-3-max...@cerno.tech/
>  oops, i only looked in the July archive.
> 
> > I'm not entirely sure how this could create a CPU hang though. Withouth
> > this patch, if the HPD GPIO is low, we would first try to retrieve the
> > EDID, and then if it doesn't we would read the hotplug register.
>  Yes, the real issue has been revealed by the original change and this
>  patch only "hides" it again.
> > The first is using a separate i2c controller (and even if it was in the
> > same power domain, we have the pm_runtime_resume call), and the register
> > read should be fine too?
>  Sorry, i don't have a clue and time for further investigations.
> 
>  Does it mean, you are not able to reproduce this issue?
> >>> On next-20210706 at least it works fine for me without an HDMI monitor
> >>> connected, yes:
> >> which configuration do you use? Did you tried arm/multi_v7_defconfig?
> >>
> >> I tried yesterday mainline ("a180bd1d7e16173d965b263c5a536aa40afa2a2a")
> >> with multi_v7_defconfig and the issue was there.
> > I can't boot multi_v7_defconfig on my setup, but I just tested multi_v7
> > + a few options (UART, ethernet) built-in to be able to boot, and I
> > can't reproduce what you're seeing. It boots just fine without any
> > monitor attached.
> 
> not sure how do you boot, but USB mass storage boot for Raspberry Pi 3 B
> Plus is broken since Linux 5.13 with multi_v7_defconfig [1]. But this is
> a completely different issue.
> 
> To be more exact the hang in this case happens a few seconds after the
> UART console (ttyS1) becomes available.
> 
> Here is my setup:
> 
> Raspberry Pi 3 Plus
> DTS from mainline tree
> arm/multi_v7_defconfig
> Boot from SD card
> No U-Boot
> Rootfs: Raspberry Pi OS 32bit (May 7th 2021)
> VC4 firmware: 2021-04-30T13:47:07
> 
> Maybe next week, i have a little bit more time

A bit of an update, there's other users that reported it on 5.10, and it
turns out it seems to be (partially at least) related to the options set
in config.txt.

The tracking issue is there:
https://github.com/raspberrypi/linux/issues/4457

It seems like the reason it was working for me all along is that I had
hdmi_force_hotplug set, and it looks like it makes the issue go away.
It's not clear at this point why.

Maxime


signature.asc
Description: PGP signature


Re: [PATCH 4/4] drm/i915/uapi: reject set_domain for discrete

2021-07-16 Thread Jason Ekstrand
On Fri, Jul 16, 2021 at 9:52 AM Tvrtko Ursulin
 wrote:
>
>
> On 15/07/2021 11:15, Matthew Auld wrote:
> > The CPU domain should be static for discrete, and on DG1 we don't need
> > any flushing since everything is already coherent, so really all this
> > does is an object wait, for which we have an ioctl. Longer term the
> > desired caching should be an immutable creation time property for the
> > BO, which can be set with something like gem_create_ext.
> >
> > One other user is iris + userptr, which uses the set_domain to probe all
> > the pages to check if the GUP succeeds, however we now have a PROBE
> > flag for this purpose.
> >
> > v2: add some more kernel doc, also add the implicit rules with caching
> >
> > Suggested-by: Daniel Vetter 
> > Signed-off-by: Matthew Auld 
> > Cc: Thomas Hellström 
> > Cc: Maarten Lankhorst 
> > Cc: Tvrtko Ursulin 
> > Cc: Jordan Justen 
> > Cc: Kenneth Graunke 
> > Cc: Jason Ekstrand 
> > Cc: Daniel Vetter 
> > Cc: Ramalingam C 
> > Reviewed-by: Ramalingam C 
> > ---
> >   drivers/gpu/drm/i915/gem/i915_gem_domain.c |  3 +++
> >   include/uapi/drm/i915_drm.h| 19 +++
> >   2 files changed, 22 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c 
> > b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> > index 43004bef55cb..b684a62bf3b0 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> > @@ -490,6 +490,9 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void 
> > *data,
> >   u32 write_domain = args->write_domain;
> >   int err;
> >
> > + if (IS_DGFX(to_i915(dev)))
> > + return -ENODEV;
> > +
> >   /* Only handle setting domains to types used by the CPU. */
> >   if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
> >   return -EINVAL;
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index 2e4112bf4d38..04ce310e7ee6 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -901,6 +901,25 @@ struct drm_i915_gem_mmap_offset {
> >*  - I915_GEM_DOMAIN_GTT: Mappable aperture domain
> >*
> >* All other domains are rejected.
> > + *
> > + * Note that for discrete, starting from DG1, this is no longer supported, 
> > and
> > + * is instead rejected. On such platforms the CPU domain is effectively 
> > static,
> > + * where we also only support a single _i915_gem_mmap_offset cache 
> > mode,
> > + * which can't be set explicitly and instead depends on the object 
> > placements,
> > + * as per the below.
> > + *
> > + * Implicit caching rules, starting from DG1:
> > + *
> > + *   - If any of the object placements (see 
> > _i915_gem_create_ext_memory_regions)
> > + * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated 
> > and
> > + * mapped as write-combined only.

Is this accurate?  I thought they got WB when living in SMEM and WC
when on the device.  But, since both are coherent, it's safe to lie to
userspace and say it's all WC.  Is that correct or am I missing
something?

> A note about write-combine buffer? I guess saying it is userspace
> responsibility to do it and how.

What exactly are you thinking is userspace's responsibility?

> > + *
> > + *   - Everything else is always allocated and mapped as write-back, with 
> > the
> > + * guarantee that everything is also coherent with the GPU.
>
> Haven't been following this so just a question on this one - it is not
> considered interesting to offer non-coherent modes, or even write
> combine, with system memory buffers, for a specific reason?

We only care about non-coherent modes on integrated little-core.
There, we share memory between CPU and GPU but snooping from the GPU
is optional.  Depending on access patterns, we might want WB with GPU
snooping or we might want WC.  I don't think we care about WC for SMEM
allocations on discrete.  For that matter, I'm not sure you can
actually shut snooping off when going across a "real" PCIe bus.  At
least not with DG1.

--Jason

> Regards,
>
> Tvrtko
>
> > + *
> > + * Note that this is likely to change in the future again, where we might 
> > need
> > + * more flexibility on future devices, so making this all explicit as part 
> > of a
> > + * new _i915_gem_create_ext extension is probable.
> >*/
> >   struct drm_i915_gem_set_domain {
> >   /** @handle: Handle for the object. */
> >


Re: [PATCH v3 0/8] Support DEVICE_GENERIC memory in migrate_vma_*

2021-07-16 Thread Theodore Y. Ts'o
On Wed, Jun 23, 2021 at 05:49:55PM -0400, Felix Kuehling wrote:
> 
> I can think of two ways to test the changes for MEMORY_DEVICE_GENERIC in
> this patch series in a way that is reproducible without special hardware and
> firmware:
> 
> For the reference counting changes we could use the dax driver with hmem and
> use efi_fake_mem on the kernel command line to create some DEVICE_GENERIC
> pages. I'm open to suggestions for good user mode tests to exercise dax
> functionality on this type of memory.

Sorry for the thread necromancy, but now that the merge window is
past

Today I test ext4's dax support, without having any $$$ DAX hardware,
by using the kernel command line "memmap=4G!9G:memmap=9G!14G" which
reserves memory so that creates two pmem device and then I run
xfstests with DAX enabled using qemu or using a Google Compute Engine
VM, using TEST_DEV=/dev/pmem0 and SCRATCH_DEV=/dev/pmem1.

If you can give me a recipe for what kernel configs I should enable,
and what magic kernel command line arguments to use, then I'd be able
to test your patch set with ext4.

Cheers,

- Ted


Re: [PATCH 4/4] drm/i915/uapi: reject set_domain for discrete

2021-07-16 Thread Tvrtko Ursulin



On 15/07/2021 11:15, Matthew Auld wrote:

The CPU domain should be static for discrete, and on DG1 we don't need
any flushing since everything is already coherent, so really all this
does is an object wait, for which we have an ioctl. Longer term the
desired caching should be an immutable creation time property for the
BO, which can be set with something like gem_create_ext.

One other user is iris + userptr, which uses the set_domain to probe all
the pages to check if the GUP succeeds, however we now have a PROBE
flag for this purpose.

v2: add some more kernel doc, also add the implicit rules with caching

Suggested-by: Daniel Vetter 
Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Maarten Lankhorst 
Cc: Tvrtko Ursulin 
Cc: Jordan Justen 
Cc: Kenneth Graunke 
Cc: Jason Ekstrand 
Cc: Daniel Vetter 
Cc: Ramalingam C 
Reviewed-by: Ramalingam C 
---
  drivers/gpu/drm/i915/gem/i915_gem_domain.c |  3 +++
  include/uapi/drm/i915_drm.h| 19 +++
  2 files changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c 
b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 43004bef55cb..b684a62bf3b0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -490,6 +490,9 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void 
*data,
u32 write_domain = args->write_domain;
int err;
  
+	if (IS_DGFX(to_i915(dev)))

+   return -ENODEV;
+
/* Only handle setting domains to types used by the CPU. */
if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
return -EINVAL;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 2e4112bf4d38..04ce310e7ee6 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -901,6 +901,25 @@ struct drm_i915_gem_mmap_offset {
   *- I915_GEM_DOMAIN_GTT: Mappable aperture domain
   *
   * All other domains are rejected.
+ *
+ * Note that for discrete, starting from DG1, this is no longer supported, and
+ * is instead rejected. On such platforms the CPU domain is effectively static,
+ * where we also only support a single _i915_gem_mmap_offset cache mode,
+ * which can't be set explicitly and instead depends on the object placements,
+ * as per the below.
+ *
+ * Implicit caching rules, starting from DG1:
+ *
+ * - If any of the object placements (see 
_i915_gem_create_ext_memory_regions)
+ *   contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and
+ *   mapped as write-combined only.


A note about write-combine buffer? I guess saying it is userspace 
responsibility to do it and how.



+ *
+ * - Everything else is always allocated and mapped as write-back, with the
+ *   guarantee that everything is also coherent with the GPU.


Haven't been following this so just a question on this one - it is not 
considered interesting to offer non-coherent modes, or even write 
combine, with system memory buffers, for a specific reason?


Regards,

Tvrtko


+ *
+ * Note that this is likely to change in the future again, where we might need
+ * more flexibility on future devices, so making this all explicit as part of a
+ * new _i915_gem_create_ext extension is probable.
   */
  struct drm_i915_gem_set_domain {
/** @handle: Handle for the object. */



Re: [PATCH 3/4] drm/i915/userptr: Probe existence of backing struct pages upon creation

2021-07-16 Thread Daniel Vetter
On Thu, Jul 15, 2021 at 8:21 PM Kenneth Graunke  wrote:
>
> On Thursday, July 15, 2021 4:27:44 AM PDT Tvrtko Ursulin wrote:
> >
> > On 15/07/2021 12:07, Daniel Vetter wrote:
> > > On Thu, Jul 15, 2021 at 11:33:10AM +0100, Tvrtko Ursulin wrote:
> > >>
> > >> On 15/07/2021 11:15, Matthew Auld wrote:
> > >>> From: Chris Wilson 
> > >>>
> > >>> Jason Ekstrand requested a more efficient method than userptr+set-domain
> > >>> to determine if the userptr object was backed by a complete set of pages
> > >>> upon creation. To be more efficient than simply populating the userptr
> > >>> using get_user_pages() (as done by the call to set-domain or execbuf),
> > >>> we can walk the tree of vm_area_struct and check for gaps or vma not
> > >>> backed by struct page (VM_PFNMAP). The question is how to handle
> > >>> VM_MIXEDMAP which may be either struct page or pfn backed...
> > >>>
> > >>> With discrete are going to drop support for set_domain(), so offering a
> > >>> way to probe the pages, without having to resort to dummy batches has
> > >>> been requested.
> > >>>
> > >>> v2:
> > >>> - add new query param for the PROPBE flag, so userspace can easily
> > >>> check if the kernel supports it(Jason).
> > >>> - use mmap_read_{lock, unlock}.
> > >>> - add some kernel-doc.
> > >>
> > >> 1)
> > >>
> > >> I think probing is too weak to be offered as part of the uapi. What 
> > >> probes
> > >> successfully at create time might not be there anymore at usage time. So 
> > >> if
> > >> the pointer is not trusted at one point, why should it be at a later 
> > >> stage?
> > >>
> > >> Only thing which works for me is populate (so get_pages) at create time. 
> > >> But
> > >> again with no guarantees they are still there at use time clearly
> > >> documented.
> > >
> > > Populate is exactly as racy as probe. We don't support pinned userptr
> > > anymore.
> >
> > Yes, wrote so myself - "..again with no guarantees they are still there
> > at use time..".
> >
> > Perhaps I don't understand what problem is probe supposed to solve. It
> > doesn't deal 1:1 with set_domain removal since that one actually did
> > get_pages so that would be populate. But fact remains regardless that if
> > userspace is given a pointer it doesn't trust, _and_ wants the check it
> > for this reason or that, then probe solves nothing. Unless there is
> > actually at minimum some protocol to reply to whoever sent the pointer
> > like "not that pointer please".
>
> That's exactly the point.  GL_AMD_pinned_memory requires us the OpenGL
> implementation to return an error for "not that pointer, please", at the
> time when said pointer is supplied - not at first use.
>
> Sure, there can be reasons why it might seem fine up front, and not work
> later.  But an early check of "just no, you're doing it totally wrong"
> at the right moment can be helpful for application developers.  While it
> shouldn't really happen, if it ever did, it would be a lot more obvious
> to debug than "much later on, when something randomly flushed the GPU
> commands we were building, something went wrong, and we don't know why."

Also, that extension doesn't make guarantees about importing nasty
userspace memory where some non-trusted entity could e.g. truncate()
the file and render all pages invalid, even if you're holding a
reference to it still.

It's purely to check "hey I got this random pointer here from
somewhere, I trust it, can you use it assuming I will not change
anything with hit?". Which is exactly what probe solves, and pulling
in the pages is kinda overkill.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH 3/4] drm/i915/userptr: Probe existence of backing struct pages upon creation

2021-07-16 Thread Tvrtko Ursulin



On 15/07/2021 11:15, Matthew Auld wrote:

From: Chris Wilson 

Jason Ekstrand requested a more efficient method than userptr+set-domain
to determine if the userptr object was backed by a complete set of pages
upon creation. To be more efficient than simply populating the userptr
using get_user_pages() (as done by the call to set-domain or execbuf),
we can walk the tree of vm_area_struct and check for gaps or vma not
backed by struct page (VM_PFNMAP). The question is how to handle
VM_MIXEDMAP which may be either struct page or pfn backed...

With discrete are going to drop support for set_domain(), so offering a
way to probe the pages, without having to resort to dummy batches has
been requested.

v2:
- add new query param for the PROPBE flag, so userspace can easily


PROBE


   check if the kernel supports it(Jason).
- use mmap_read_{lock, unlock}.
- add some kernel-doc.

Testcase: igt/gem_userptr_blits/probe
Signed-off-by: Chris Wilson 
Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Maarten Lankhorst 
Cc: Tvrtko Ursulin 
Cc: Jordan Justen 
Cc: Kenneth Graunke 
Cc: Jason Ekstrand 
Cc: Daniel Vetter 
Cc: Ramalingam C 
---
  drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 40 -
  drivers/gpu/drm/i915/i915_getparam.c|  3 ++
  include/uapi/drm/i915_drm.h | 18 ++
  3 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c 
b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 56edfeff8c02..fd6880328596 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -422,6 +422,33 @@ static const struct drm_i915_gem_object_ops 
i915_gem_userptr_ops = {
  
  #endif
  
+static int

+probe_range(struct mm_struct *mm, unsigned long addr, unsigned long len)
+{
+   const unsigned long end = addr + len;
+   struct vm_area_struct *vma;
+   int ret = -EFAULT;
+
+   mmap_read_lock(mm);
+   for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
+   if (vma->vm_start > addr)
+   break;
+
+   if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
+   break;
+
+   if (vma->vm_end >= end) {
+   ret = 0;
+   break;
+   }
+
+   addr = vma->vm_end;
+   }
+   mmap_read_unlock(mm);


Logic here looks good to me.


+
+   return ret;
+}
+
  /*
   * Creates a new mm object that wraps some normal memory from the process
   * context - user memory.
@@ -477,7 +504,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
}
  
  	if (args->flags & ~(I915_USERPTR_READ_ONLY |

-   I915_USERPTR_UNSYNCHRONIZED))
+   I915_USERPTR_UNSYNCHRONIZED |
+   I915_USERPTR_PROBE))
return -EINVAL;
  
  	if (i915_gem_object_size_2big(args->user_size))

@@ -504,6 +532,16 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
return -ENODEV;
}
  
+	if (args->flags & I915_USERPTR_PROBE) {

+   /*
+* Check that the range pointed to represents real struct
+* pages and not iomappings (at this moment in time!)
+*/
+   ret = probe_range(current->mm, args->user_ptr, args->user_size);
+   if (ret)
+   return ret;
+   }
+
  #ifdef CONFIG_MMU_NOTIFIER
obj = i915_gem_object_alloc();
if (obj == NULL)
diff --git a/drivers/gpu/drm/i915/i915_getparam.c 
b/drivers/gpu/drm/i915/i915_getparam.c
index 24e18219eb50..d6d2e1a10d14 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -163,6 +163,9 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
case I915_PARAM_PERF_REVISION:
value = i915_perf_ioctl_version();
break;
+   case I915_PARAM_HAS_USERPTR_PROBE:
+   value = true;
+   break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index e20eeeca7a1c..2e4112bf4d38 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -674,6 +674,9 @@ typedef struct drm_i915_irq_wait {
   */
  #define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
  
+/* Query if the kernel supports the I915_USERPTR_PROBE flag. */

+#define I915_PARAM_HAS_USERPTR_PROBE 56
+
  /* Must be kept compact -- no holes and well documented */
  
  typedef struct drm_i915_getparam {

@@ -2178,12 +2181,27 @@ struct drm_i915_gem_userptr {
 * through the GTT. If the HW can't support readonly access, an error is
 * returned.
 *
+* I915_USERPTR_PROBE:
+*
+* Probe the provided @user_ptr range and validate that the @user_ptr is
+* indeed pointing to 

[PATCH 6/7] drm/i915/gem: Correct the locking and pin pattern for dma-buf (v6)

2021-07-16 Thread Jason Ekstrand
From: Thomas Hellström 

If our exported dma-bufs are imported by another instance of our driver,
that instance will typically have the imported dma-bufs locked during
dma_buf_map_attachment(). But the exporter also locks the same reservation
object in the map_dma_buf() callback, which leads to recursive locking.

So taking the lock inside _pin_pages_unlocked() is incorrect.

Additionally, the current pinning code path is contrary to the defined
way that pinning should occur.

Remove the explicit pin/unpin from the map/umap functions and move them
to the attach/detach allowing correct locking to occur, and to match
the static dma-buf drm_prime pattern.

Add a live selftest to exercise both dynamic and non-dynamic
exports.

v2:
- Extend the selftest with a fake dynamic importer.
- Provide real pin and unpin callbacks to not abuse the interface.
v3: (ruhl)
- Remove the dynamic export support and move the pinning into the
  attach/detach path.
v4: (ruhl)
- Put pages does not need to assert on the dma-resv
v5: (jason)
- Lock around dma_buf_unmap_attachment() when emulating a dynamic
  importer in the subtests.
- Use pin_pages_unlocked
v6: (jason)
- Use dma_buf_attach instead of dma_buf_attach_dynamic in the selftests

Reported-by: Michael J. Ruhl 
Signed-off-by: Thomas Hellström 
Signed-off-by: Michael J. Ruhl 
Signed-off-by: Jason Ekstrand 
Reviewed-by: Jason Ekstrand 
---
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c|  43 ++--
 .../drm/i915/gem/selftests/i915_gem_dmabuf.c  | 103 +-
 2 files changed, 132 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 616c3a2f1baf0..9a655f69a0671 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -12,6 +12,8 @@
 #include "i915_gem_object.h"
 #include "i915_scatterlist.h"
 
+I915_SELFTEST_DECLARE(static bool force_different_devices;)
+
 static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
 {
return to_intel_bo(buf->priv);
@@ -25,15 +27,11 @@ static struct sg_table *i915_gem_map_dma_buf(struct 
dma_buf_attachment *attachme
struct scatterlist *src, *dst;
int ret, i;
 
-   ret = i915_gem_object_pin_pages_unlocked(obj);
-   if (ret)
-   goto err;
-
/* Copy sg so that we make an independent mapping */
st = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
if (st == NULL) {
ret = -ENOMEM;
-   goto err_unpin_pages;
+   goto err;
}
 
ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL);
@@ -58,8 +56,6 @@ static struct sg_table *i915_gem_map_dma_buf(struct 
dma_buf_attachment *attachme
sg_free_table(st);
 err_free:
kfree(st);
-err_unpin_pages:
-   i915_gem_object_unpin_pages(obj);
 err:
return ERR_PTR(ret);
 }
@@ -68,13 +64,9 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment 
*attachment,
   struct sg_table *sg,
   enum dma_data_direction dir)
 {
-   struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf);
-
dma_unmap_sgtable(attachment->dev, sg, dir, DMA_ATTR_SKIP_CPU_SYNC);
sg_free_table(sg);
kfree(sg);
-
-   i915_gem_object_unpin_pages(obj);
 }
 
 static int i915_gem_dmabuf_vmap(struct dma_buf *dma_buf, struct dma_buf_map 
*map)
@@ -168,7 +160,31 @@ static int i915_gem_end_cpu_access(struct dma_buf 
*dma_buf, enum dma_data_direct
return err;
 }
 
+/**
+ * i915_gem_dmabuf_attach - Do any extra attach work necessary
+ * @dmabuf: imported dma-buf
+ * @attach: new attach to do work on
+ *
+ */
+static int i915_gem_dmabuf_attach(struct dma_buf *dmabuf,
+ struct dma_buf_attachment *attach)
+{
+   struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf);
+
+   return i915_gem_object_pin_pages_unlocked(obj);
+}
+
+static void i915_gem_dmabuf_detach(struct dma_buf *dmabuf,
+  struct dma_buf_attachment *attach)
+{
+   struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf);
+
+   i915_gem_object_unpin_pages(obj);
+}
+
 static const struct dma_buf_ops i915_dmabuf_ops =  {
+   .attach = i915_gem_dmabuf_attach,
+   .detach = i915_gem_dmabuf_detach,
.map_dma_buf = i915_gem_map_dma_buf,
.unmap_dma_buf = i915_gem_unmap_dma_buf,
.release = drm_gem_dmabuf_release,
@@ -204,6 +220,8 @@ static int i915_gem_object_get_pages_dmabuf(struct 
drm_i915_gem_object *obj)
struct sg_table *pages;
unsigned int sg_page_sizes;
 
+   assert_object_held(obj);
+
pages = dma_buf_map_attachment(obj->base.import_attach,
   DMA_BIDIRECTIONAL);
if (IS_ERR(pages))
@@ -241,7 +259,8 @@ struct drm_gem_object *i915_gem_prime_import(struct 
drm_device *dev,
if (dma_buf->ops 

[PATCH 5/7] drm/i915/gem/ttm: Respect the objection region in placement_from_obj

2021-07-16 Thread Jason Ekstrand
Whenever we had a user object (n_placements > 0), we were ignoring
obj->mm.region and always putting obj->placements[0] as the requested
region.  For LMEM+SMEM objects, this was causing them to get shoved into
LMEM on every i915_ttm_get_pages() even when SMEM was requested by, say,
i915_gem_object_migrate().

Signed-off-by: Jason Ekstrand 
Cc: Thomas Hellström 
Cc: Matthew Auld 
Cc: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 6589411396d3f..8eeb73c7c401c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -150,8 +150,7 @@ i915_ttm_placement_from_obj(const struct 
drm_i915_gem_object *obj,
unsigned int i;
 
placement->num_placement = 1;
-   i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] :
-  obj->mm.region, requested, flags);
+   i915_ttm_place_from_region(obj->mm.region, requested, flags);
 
/* Cache this on object? */
placement->num_busy_placement = num_allowed;
-- 
2.31.1



[PATCH 4/7] drm/i915/gem: Unify user object creation (v2)

2021-07-16 Thread Jason Ekstrand
Instead of hand-rolling the same three calls in each function, pull them
into an i915_gem_object_create_user helper.  Apart from re-ordering of
the placements array ENOMEM check, there should be no functional change.

v2 (Matthew Auld):
 - Add the call to i915_gem_flush_free_objects() from
   i915_gem_dumb_create() in a separate patch
 - Move i915_gem_object_alloc() below the simple error checks

Signed-off-by: Jason Ekstrand 
Cc: Matthew Auld 
---
 drivers/gpu/drm/i915/gem/i915_gem_create.c | 108 -
 1 file changed, 43 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index 1b370914587c0..039e4f3b39c79 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -11,13 +11,14 @@
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
 
-static u32 object_max_page_size(struct drm_i915_gem_object *obj)
+static u32 object_max_page_size(struct intel_memory_region **placements,
+   unsigned int n_placements)
 {
u32 max_page_size = 0;
int i;
 
-   for (i = 0; i < obj->mm.n_placements; i++) {
-   struct intel_memory_region *mr = obj->mm.placements[i];
+   for (i = 0; i < n_placements; i++) {
+   struct intel_memory_region *mr = placements[i];
 
GEM_BUG_ON(!is_power_of_2(mr->min_page_size));
max_page_size = max_t(u32, max_page_size, mr->min_page_size);
@@ -81,22 +82,35 @@ static int i915_gem_publish(struct drm_i915_gem_object *obj,
return 0;
 }
 
-static int
-i915_gem_setup(struct drm_i915_gem_object *obj, u64 size)
+static struct drm_i915_gem_object *
+i915_gem_object_create_user(struct drm_i915_private *i915, u64 size,
+   struct intel_memory_region **placements,
+   unsigned int n_placements)
 {
-   struct intel_memory_region *mr = obj->mm.placements[0];
+   struct intel_memory_region *mr = placements[0];
+   struct drm_i915_gem_object *obj;
unsigned int flags;
int ret;
 
-   size = round_up(size, object_max_page_size(obj));
+   i915_gem_flush_free_objects(i915);
+
+   size = round_up(size, object_max_page_size(placements, n_placements));
if (size == 0)
-   return -EINVAL;
+   return ERR_PTR(-EINVAL);
 
/* For most of the ABI (e.g. mmap) we think in system pages */
GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
 
if (i915_gem_object_size_2big(size))
-   return -E2BIG;
+   return ERR_PTR(-E2BIG);
+
+   obj = i915_gem_object_alloc();
+   if (!obj)
+   return ERR_PTR(-ENOMEM);
+
+   ret = object_set_placements(obj, placements, n_placements);
+   if (ret)
+   goto object_free;
 
/*
 * I915_BO_ALLOC_USER will make sure the object is cleared before
@@ -106,12 +120,18 @@ i915_gem_setup(struct drm_i915_gem_object *obj, u64 size)
 
ret = mr->ops->init_object(mr, obj, size, 0, flags);
if (ret)
-   return ret;
+   goto object_free;
 
GEM_BUG_ON(size != obj->base.size);
 
trace_i915_gem_object_create(obj);
-   return 0;
+   return obj;
+
+object_free:
+   if (obj->mm.n_placements > 1)
+   kfree(obj->mm.placements);
+   i915_gem_object_free(obj);
+   return ERR_PTR(ret);
 }
 
 int
@@ -124,7 +144,6 @@ i915_gem_dumb_create(struct drm_file *file,
enum intel_memory_type mem_type;
int cpp = DIV_ROUND_UP(args->bpp, 8);
u32 format;
-   int ret;
 
switch (cpp) {
case 1:
@@ -151,32 +170,19 @@ i915_gem_dumb_create(struct drm_file *file,
if (args->pitch < args->width)
return -EINVAL;
 
-   i915_gem_flush_free_objects(i915);
-
args->size = mul_u32_u32(args->pitch, args->height);
 
mem_type = INTEL_MEMORY_SYSTEM;
if (HAS_LMEM(to_i915(dev)))
mem_type = INTEL_MEMORY_LOCAL;
 
-   obj = i915_gem_object_alloc();
-   if (!obj)
-   return -ENOMEM;
-
mr = intel_memory_region_by_type(to_i915(dev), mem_type);
-   ret = object_set_placements(obj, , 1);
-   if (ret)
-   goto object_free;
 
-   ret = i915_gem_setup(obj, args->size);
-   if (ret)
-   goto object_free;
+   obj = i915_gem_object_create_user(to_i915(dev), args->size, , 1);
+   if (IS_ERR(obj))
+   return PTR_ERR(obj);
 
return i915_gem_publish(obj, file, >size, >handle);
-
-object_free:
-   i915_gem_object_free(obj);
-   return ret;
 }
 
 /**
@@ -193,28 +199,14 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
struct drm_i915_gem_create *args = data;
struct drm_i915_gem_object *obj;
struct intel_memory_region *mr;
-   int ret;
-
-   

[PATCH 7/7] drm/i915/gem: Migrate to system at dma-buf attach time (v6)

2021-07-16 Thread Jason Ekstrand
From: Thomas Hellström 

Until we support p2p dma or as a complement to that, migrate data
to system memory at dma-buf attach time if possible.

v2:
- Rebase on dynamic exporter. Update the igt_dmabuf_import_same_driver
  selftest to migrate if we are LMEM capable.
v3:
- Migrate also in the pin() callback.
v4:
- Migrate in attach
v5: (jason)
- Lock around the migration
v6: (jason)
- Move the can_migrate check outside the lock
- Rework the selftests to test more migration conditions.  In
  particular, SMEM, LMEM, and LMEM+SMEM are all checked.

Signed-off-by: Thomas Hellström 
Signed-off-by: Michael J. Ruhl 
Reported-by: kernel test robot 
Signed-off-by: Jason Ekstrand 
Reviewed-by: Jason Ekstrand 
---
 drivers/gpu/drm/i915/gem/i915_gem_create.c|  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c| 23 -
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../drm/i915/gem/selftests/i915_gem_dmabuf.c  | 89 ++-
 4 files changed, 112 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index 039e4f3b39c79..41c4cd3e1ea01 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -82,7 +82,7 @@ static int i915_gem_publish(struct drm_i915_gem_object *obj,
return 0;
 }
 
-static struct drm_i915_gem_object *
+struct drm_i915_gem_object *
 i915_gem_object_create_user(struct drm_i915_private *i915, u64 size,
struct intel_memory_region **placements,
unsigned int n_placements)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 9a655f69a0671..5d438b95826b9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -170,8 +170,29 @@ static int i915_gem_dmabuf_attach(struct dma_buf *dmabuf,
  struct dma_buf_attachment *attach)
 {
struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf);
+   struct i915_gem_ww_ctx ww;
+   int err;
+
+   if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM))
+   return -EOPNOTSUPP;
+
+   for_i915_gem_ww(, err, true) {
+   err = i915_gem_object_lock(obj, );
+   if (err)
+   continue;
+
+   err = i915_gem_object_migrate(obj, , INTEL_REGION_SMEM);
+   if (err)
+   continue;
 
-   return i915_gem_object_pin_pages_unlocked(obj);
+   err = i915_gem_object_wait_migration(obj, 0);
+   if (err)
+   continue;
+
+   err = i915_gem_object_pin_pages(obj);
+   }
+
+   return err;
 }
 
 static void i915_gem_dmabuf_detach(struct dma_buf *dmabuf,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 8be4fadeee487..fbae53bd46384 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -61,6 +61,10 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915,
 struct drm_i915_gem_object *
 i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
   const void *data, resource_size_t size);
+struct drm_i915_gem_object *
+i915_gem_object_create_user(struct drm_i915_private *i915, u64 size,
+   struct intel_memory_region **placements,
+   unsigned int n_placements);
 
 extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops;
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index 4451bbb4917e4..7b7647e7e220a 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -85,9 +85,62 @@ static int igt_dmabuf_import_self(void *arg)
return err;
 }
 
-static int igt_dmabuf_import_same_driver(void *arg)
+static int igt_dmabuf_import_same_driver_lmem(void *arg)
 {
struct drm_i915_private *i915 = arg;
+   struct intel_memory_region *lmem = i915->mm.regions[INTEL_REGION_LMEM];
+   struct drm_i915_gem_object *obj;
+   struct drm_gem_object *import;
+   struct dma_buf *dmabuf;
+   int err;
+
+   if (!i915->mm.regions[INTEL_REGION_LMEM])
+   return 0;
+
+   force_different_devices = true;
+
+   obj = i915_gem_object_create_user(i915, PAGE_SIZE, , 1);
+   if (IS_ERR(obj)) {
+   pr_err("i915_gem_object_create_user failed with err=%d\n",
+  (int)PTR_ERR(dmabuf));
+   err = PTR_ERR(obj);
+   goto out_ret;
+   }
+
+   dmabuf = i915_gem_prime_export(>base, 0);
+   if (IS_ERR(dmabuf)) {
+   pr_err("i915_gem_prime_export failed with err=%d\n",
+  (int)PTR_ERR(dmabuf));
+   

[PATCH 2/7] drm/i915/gem: Refactor placement setup for i915_gem_object_create* (v2)

2021-07-16 Thread Jason Ekstrand
Since we don't allow changing the set of regions after creation, we can
make ext_set_placements() build up the region set directly in the
create_ext and assign it to the object later.  This is similar to what
we did for contexts with the proto-context only simpler because there's
no funny object shuffling.  This will be used in the next patch to allow
us to de-duplicate a bunch of code.  Also, since we know the maximum
number of regions up-front, we can use a fixed-size temporary array for
the regions.  This simplifies memory management a bit for this new
delayed approach.

v2 (Matthew Auld):
 - Get rid of MAX_N_PLACEMENTS
 - Drop kfree(placements) from set_placements()

Signed-off-by: Jason Ekstrand 
Cc: Matthew Auld 
---
 drivers/gpu/drm/i915/gem/i915_gem_create.c | 81 --
 1 file changed, 45 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index 51f92e4b1a69d..5766749a449c0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -27,10 +27,13 @@ static u32 object_max_page_size(struct drm_i915_gem_object 
*obj)
return max_page_size;
 }
 
-static void object_set_placements(struct drm_i915_gem_object *obj,
- struct intel_memory_region **placements,
- unsigned int n_placements)
+static int object_set_placements(struct drm_i915_gem_object *obj,
+struct intel_memory_region **placements,
+unsigned int n_placements)
 {
+   struct intel_memory_region **arr;
+   unsigned int i;
+
GEM_BUG_ON(!n_placements);
 
/*
@@ -44,9 +47,20 @@ static void object_set_placements(struct drm_i915_gem_object 
*obj,
obj->mm.placements = >mm.regions[mr->id];
obj->mm.n_placements = 1;
} else {
-   obj->mm.placements = placements;
+   arr = kmalloc_array(n_placements,
+   sizeof(struct intel_memory_region *),
+   GFP_KERNEL);
+   if (!arr)
+   return -ENOMEM;
+
+   for (i = 0; i < n_placements; i++)
+   arr[i] = placements[i];
+
+   obj->mm.placements = arr;
obj->mm.n_placements = n_placements;
}
+
+   return 0;
 }
 
 static int i915_gem_publish(struct drm_i915_gem_object *obj,
@@ -148,7 +162,9 @@ i915_gem_dumb_create(struct drm_file *file,
return -ENOMEM;
 
mr = intel_memory_region_by_type(to_i915(dev), mem_type);
-   object_set_placements(obj, , 1);
+   ret = object_set_placements(obj, , 1);
+   if (ret)
+   goto object_free;
 
ret = i915_gem_setup(obj, args->size);
if (ret)
@@ -184,7 +200,9 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
return -ENOMEM;
 
mr = intel_memory_region_by_type(i915, INTEL_MEMORY_SYSTEM);
-   object_set_placements(obj, , 1);
+   ret = object_set_placements(obj, , 1);
+   if (ret)
+   goto object_free;
 
ret = i915_gem_setup(obj, args->size);
if (ret)
@@ -199,7 +217,8 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
 
 struct create_ext {
struct drm_i915_private *i915;
-   struct drm_i915_gem_object *vanilla_object;
+   struct intel_memory_region *placements[INTEL_REGION_UNKNOWN];
+   unsigned int n_placements;
 };
 
 static void repr_placements(char *buf, size_t size,
@@ -230,8 +249,7 @@ static int set_placements(struct 
drm_i915_gem_create_ext_memory_regions *args,
struct drm_i915_private *i915 = ext_data->i915;
struct drm_i915_gem_memory_class_instance __user *uregions =
u64_to_user_ptr(args->regions);
-   struct drm_i915_gem_object *obj = ext_data->vanilla_object;
-   struct intel_memory_region **placements;
+   struct intel_memory_region *placements[INTEL_REGION_UNKNOWN];
u32 mask;
int i, ret = 0;
 
@@ -245,6 +263,8 @@ static int set_placements(struct 
drm_i915_gem_create_ext_memory_regions *args,
ret = -EINVAL;
}
 
+   BUILD_BUG_ON(ARRAY_SIZE(i915->mm.regions) != ARRAY_SIZE(placements));
+   BUILD_BUG_ON(ARRAY_SIZE(ext_data->placements) != 
ARRAY_SIZE(placements));
if (args->num_regions > ARRAY_SIZE(i915->mm.regions)) {
drm_dbg(>drm, "num_regions is too large\n");
ret = -EINVAL;
@@ -253,21 +273,13 @@ static int set_placements(struct 
drm_i915_gem_create_ext_memory_regions *args,
if (ret)
return ret;
 
-   placements = kmalloc_array(args->num_regions,
-  sizeof(struct intel_memory_region *),
-  GFP_KERNEL);
-   if (!placements)
-   return -ENOMEM;
-

[PATCH 1/7] drm/i915/gem: Check object_can_migrate from object_migrate

2021-07-16 Thread Jason Ekstrand
We don't roll them together entirely because there are still a couple
cases where we want a separate can_migrate check.  For instance, the
display code checks that you can migrate a buffer to LMEM before it
accepts it in fb_create.  The dma-buf import code also uses it to do an
early check and return a different error code if someone tries to attach
a LMEM-only dma-buf to another driver.

However, no one actually wants to call object_migrate when can_migrate
has failed.  The stated intention is for self-tests but none of those
actually take advantage of this unsafe migration.

Signed-off-by: Jason Ekstrand 
Cc: Daniel Vetter 
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 13 ++---
 .../gpu/drm/i915/gem/selftests/i915_gem_migrate.c | 15 ---
 2 files changed, 2 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 9da7b288b7ede..f2244ae09a613 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -584,12 +584,6 @@ bool i915_gem_object_can_migrate(struct 
drm_i915_gem_object *obj,
  * completed yet, and to accomplish that, i915_gem_object_wait_migration()
  * must be called.
  *
- * This function is a bit more permissive than i915_gem_object_can_migrate()
- * to allow for migrating objects where the caller knows exactly what is
- * happening. For example within selftests. More specifically this
- * function allows migrating I915_BO_ALLOC_USER objects to regions
- * that are not in the list of allowable regions.
- *
  * Note: the @ww parameter is not used yet, but included to make sure
  * callers put some effort into obtaining a valid ww ctx if one is
  * available.
@@ -616,11 +610,8 @@ int i915_gem_object_migrate(struct drm_i915_gem_object 
*obj,
if (obj->mm.region == mr)
return 0;
 
-   if (!i915_gem_object_evictable(obj))
-   return -EBUSY;
-
-   if (!obj->ops->migrate)
-   return -EOPNOTSUPP;
+   if (!i915_gem_object_can_migrate(obj, id))
+   return -EINVAL;
 
return obj->ops->migrate(obj, mr);
 }
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
index 0b7144d2991ca..28a700f08b49a 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
@@ -61,11 +61,6 @@ static int igt_create_migrate(struct intel_gt *gt, enum 
intel_region_id src,
if (err)
continue;
 
-   if (!i915_gem_object_can_migrate(obj, dst)) {
-   err = -EINVAL;
-   continue;
-   }
-
err = i915_gem_object_migrate(obj, , dst);
if (err)
continue;
@@ -114,11 +109,6 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx 
*ww,
return err;
 
if (i915_gem_object_is_lmem(obj)) {
-   if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM)) {
-   pr_err("object can't migrate to smem.\n");
-   return -EINVAL;
-   }
-
err = i915_gem_object_migrate(obj, ww, INTEL_REGION_SMEM);
if (err) {
pr_err("Object failed migration to smem\n");
@@ -137,11 +127,6 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx 
*ww,
}
 
} else {
-   if (!i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM)) {
-   pr_err("object can't migrate to lmem.\n");
-   return -EINVAL;
-   }
-
err = i915_gem_object_migrate(obj, ww, INTEL_REGION_LMEM);
if (err) {
pr_err("Object failed migration to lmem\n");
-- 
2.31.1



[PATCH 3/7] drm/i915/gem: Call i915_gem_flush_free_objects() in i915_gem_dumb_create()

2021-07-16 Thread Jason Ekstrand
This doesn't really fix anything serious since the chances of a client
creating and destroying a mass of dumb BOs is pretty low.  However, it
is called by the other two create IOCTLs to garbage collect old objects.
Call it here too for consistency.

Signed-off-by: Jason Ekstrand 
Cc: Matthew Auld 
---
 drivers/gpu/drm/i915/gem/i915_gem_create.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index 5766749a449c0..1b370914587c0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -151,6 +151,8 @@ i915_gem_dumb_create(struct drm_file *file,
if (args->pitch < args->width)
return -EINVAL;
 
+   i915_gem_flush_free_objects(i915);
+
args->size = mul_u32_u32(args->pitch, args->height);
 
mem_type = INTEL_MEMORY_SYSTEM;
-- 
2.31.1



[PATCH 0/7] drm/i915: Migrate memory to SMEM when imported cross-device (v7)

2021-07-16 Thread Jason Ekstrand
This patch series fixes an issue with discrete graphics on Intel where we
allowed dma-buf import while leaving the object in local memory.  This
breaks down pretty badly if the import happened on a different physical
device.

v7:
 - Drop "drm/i915/gem/ttm: Place new BOs in the requested region"
 - Add a new "drm/i915/gem: Call i915_gem_flush_free_objects() in 
i915_gem_dumb_create()"
 - Misc. review feedback from Matthew Auld

Jason Ekstrand (5):
  drm/i915/gem: Check object_can_migrate from object_migrate
  drm/i915/gem: Refactor placement setup for i915_gem_object_create*
(v2)
  drm/i915/gem: Call i915_gem_flush_free_objects() in
i915_gem_dumb_create()
  drm/i915/gem: Unify user object creation (v2)
  drm/i915/gem/ttm: Respect the objection region in placement_from_obj

Thomas Hellström (2):
  drm/i915/gem: Correct the locking and pin pattern for dma-buf (v6)
  drm/i915/gem: Migrate to system at dma-buf attach time (v6)

 drivers/gpu/drm/i915/gem/i915_gem_create.c| 165 
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c|  64 --
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  13 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|   4 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   |   3 +-
 .../drm/i915/gem/selftests/i915_gem_dmabuf.c  | 184 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  15 --
 7 files changed, 318 insertions(+), 130 deletions(-)

-- 
2.31.1



Re: [PATCH 5/7] drm/i915/gem/ttm: Respect the objection region in placement_from_obj

2021-07-16 Thread Jason Ekstrand
On Fri, Jul 16, 2021 at 8:54 AM Matthew Auld
 wrote:
>
> On Thu, 15 Jul 2021 at 23:39, Jason Ekstrand  wrote:
> >
> > Whenever we had a user object (n_placements > 0), we were ignoring
> > obj->mm.region and always putting obj->placements[0] as the requested
> > region.  For LMEM+SMEM objects, this was causing them to get shoved into
> > LMEM on every i915_ttm_get_pages() even when SMEM was requested by, say,
> > i915_gem_object_migrate().
>
> i915_ttm_migrate calls i915_ttm_place_from_region() directly with the
> requested region, so there shouldn't be an issue with migration right?
> Do you have some more details?

With i915_ttm_migrate directly, no.  But, in the last patch in the
series, we're trying to migrate LMEM+SMEM buffers into SMEM on
attach() and pin it there.  This blows up in a very unexpected (IMO)
way.  The flow goes something like this:

 - Client attempts a dma-buf import from another device
 - In attach() we call i915_gem_object_migrate() which calls
i915_ttm_migrate() which migrates as requested.
 - Once the migration is complete, we call i915_gem_object_pin_pages()
which calls i915_ttm_get_pages() which depends on
i915_ttm_placement_from_obj() and so migrates it right back to LMEM.

Maybe the problem here is actually that our TTM code isn't respecting
obj->mm.pages_pin_count?

In case you can't tell, I really have no clue what I'm doing here.
I'm really stumbling around in the dark finding things that make my
bug go away.  I'm happy for the feedback.

--Jason

>
> >
> > Signed-off-by: Jason Ekstrand 
> > Cc: Thomas Hellström 
> > Cc: Matthew Auld 
> > Cc: Maarten Lankhorst 
> > ---
> >  drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 +--
> >  1 file changed, 1 insertion(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
> > b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > index d30f274c329c7..5985e994d56cf 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > @@ -150,8 +150,7 @@ i915_ttm_placement_from_obj(const struct 
> > drm_i915_gem_object *obj,
> > unsigned int i;
> >
> > placement->num_placement = 1;
> > -   i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] :
> > -  obj->mm.region, requested, flags);
> > +   i915_ttm_place_from_region(obj->mm.region, requested, flags);
> >
> > /* Cache this on object? */
> > placement->num_busy_placement = num_allowed;
> > --
> > 2.31.1
> >


[PATCH 5/7] drm/gm12u320: Use framebuffer dma-buf helpers

2021-07-16 Thread Thomas Zimmermann
Replace dma_buf_begin_cpu_access() with drm_gem_fb_begin_cpu_access();
same for _end_cpu_access(). Remove some boiler-plate code. No functional
changes.

Signed-off-by: Thomas Zimmermann 
---
 drivers/gpu/drm/tiny/gm12u320.c | 19 +--
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/tiny/gm12u320.c b/drivers/gpu/drm/tiny/gm12u320.c
index a233c86d428b..cf7287fccd72 100644
--- a/drivers/gpu/drm/tiny/gm12u320.c
+++ b/drivers/gpu/drm/tiny/gm12u320.c
@@ -3,7 +3,6 @@
  * Copyright 2019 Hans de Goede 
  */
 
-#include 
 #include 
 #include 
 
@@ -268,13 +267,10 @@ static void gm12u320_copy_fb_to_blocks(struct 
gm12u320_device *gm12u320)
y2 = gm12u320->fb_update.rect.y2;
vaddr = gm12u320->fb_update.src_map.vaddr; /* TODO: Use mapping 
abstraction properly */
 
-   if (fb->obj[0]->import_attach) {
-   ret = dma_buf_begin_cpu_access(
-   fb->obj[0]->import_attach->dmabuf, DMA_FROM_DEVICE);
-   if (ret) {
-   GM12U320_ERR("dma_buf_begin_cpu_access err: %d\n", ret);
-   goto put_fb;
-   }
+   ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE);
+   if (ret) {
+   GM12U320_ERR("drm_gem_fb_begin_cpu_access err: %d\n", ret);
+   goto put_fb;
}
 
src = vaddr + y1 * fb->pitches[0] + x1 * 4;
@@ -311,12 +307,7 @@ static void gm12u320_copy_fb_to_blocks(struct 
gm12u320_device *gm12u320)
src += fb->pitches[0];
}
 
-   if (fb->obj[0]->import_attach) {
-   ret = dma_buf_end_cpu_access(fb->obj[0]->import_attach->dmabuf,
-DMA_FROM_DEVICE);
-   if (ret)
-   GM12U320_ERR("dma_buf_end_cpu_access err: %d\n", ret);
-   }
+   drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE);
 put_fb:
drm_framebuffer_put(fb);
gm12u320->fb_update.fb = NULL;
-- 
2.32.0



[PATCH 6/7] drm/repaper: Use framebuffer dma-buf helpers

2021-07-16 Thread Thomas Zimmermann
Replace dma_buf_begin_cpu_access() with drm_gem_fb_begin_cpu_access();
same for _end_cpu_access(). Remove some boiler-plate code. No functional
changes.

Signed-off-by: Thomas Zimmermann 
---
 drivers/gpu/drm/tiny/repaper.c | 18 --
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/tiny/repaper.c b/drivers/gpu/drm/tiny/repaper.c
index 007d9d59f01c..4d07b21a16e6 100644
--- a/drivers/gpu/drm/tiny/repaper.c
+++ b/drivers/gpu/drm/tiny/repaper.c
@@ -14,7 +14,6 @@
  */
 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -532,7 +531,6 @@ static void repaper_gray8_to_mono_reversed(u8 *buf, u32 
width, u32 height)
 static int repaper_fb_dirty(struct drm_framebuffer *fb)
 {
struct drm_gem_cma_object *cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
-   struct dma_buf_attachment *import_attach = cma_obj->base.import_attach;
struct repaper_epd *epd = drm_to_epd(fb->dev);
struct drm_rect clip;
int idx, ret = 0;
@@ -558,21 +556,13 @@ static int repaper_fb_dirty(struct drm_framebuffer *fb)
goto out_exit;
}
 
-   if (import_attach) {
-   ret = dma_buf_begin_cpu_access(import_attach->dmabuf,
-  DMA_FROM_DEVICE);
-   if (ret)
-   goto out_free;
-   }
+   ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE);
+   if (ret)
+   goto out_free;
 
drm_fb_xrgb_to_gray8(buf, cma_obj->vaddr, fb, );
 
-   if (import_attach) {
-   ret = dma_buf_end_cpu_access(import_attach->dmabuf,
-DMA_FROM_DEVICE);
-   if (ret)
-   goto out_free;
-   }
+   drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE);
 
repaper_gray8_to_mono_reversed(buf, fb->width, fb->height);
 
-- 
2.32.0



[PATCH 3/7] drm/mipi-dbi: Use framebuffer dma-buf helpers

2021-07-16 Thread Thomas Zimmermann
Replace dma_buf_begin_cpu_access() with drm_gem_fb_begin_cpu_access();
same for _end_cpu_access(). Remove some boiler-plate code. No functional
changes.

There's one left-over reference to the imported attachment that we
keep. GEM BOs with imported attachment are considered uncached and
enables special handling within the drm_fb_swab().

Signed-off-by: Thomas Zimmermann 
---
 drivers/gpu/drm/drm_mipi_dbi.c | 20 +++-
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c
index 10b4e59384ae..71b646c4131f 100644
--- a/drivers/gpu/drm/drm_mipi_dbi.c
+++ b/drivers/gpu/drm/drm_mipi_dbi.c
@@ -7,7 +7,6 @@
 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -202,21 +201,17 @@ int mipi_dbi_buf_copy(void *dst, struct drm_framebuffer 
*fb,
 {
struct drm_gem_object *gem = drm_gem_fb_get_obj(fb, 0);
struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(gem);
-   struct dma_buf_attachment *import_attach = gem->import_attach;
void *src = cma_obj->vaddr;
-   int ret = 0;
+   int ret;
 
-   if (import_attach) {
-   ret = dma_buf_begin_cpu_access(import_attach->dmabuf,
-  DMA_FROM_DEVICE);
-   if (ret)
-   return ret;
-   }
+   ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE);
+   if (ret)
+   return ret;
 
switch (fb->format->format) {
case DRM_FORMAT_RGB565:
if (swap)
-   drm_fb_swab(dst, src, fb, clip, !import_attach);
+   drm_fb_swab(dst, src, fb, clip, !gem->import_attach);
else
drm_fb_memcpy(dst, src, fb, clip);
break;
@@ -229,9 +224,8 @@ int mipi_dbi_buf_copy(void *dst, struct drm_framebuffer *fb,
return -EINVAL;
}
 
-   if (import_attach)
-   ret = dma_buf_end_cpu_access(import_attach->dmabuf,
-DMA_FROM_DEVICE);
+   drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE);
+
return ret;
 }
 EXPORT_SYMBOL(mipi_dbi_buf_copy);
-- 
2.32.0



[PATCH 7/7] drm/st7586: Use framebuffer dma-buf helpers

2021-07-16 Thread Thomas Zimmermann
Replace dma_buf_begin_cpu_access() with drm_gem_fb_begin_cpu_access();
same for _end_cpu_access(). Remove some boiler-plate code. No functional
changes.

Signed-off-by: Thomas Zimmermann 
---
 drivers/gpu/drm/tiny/st7586.c | 18 ++
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/tiny/st7586.c b/drivers/gpu/drm/tiny/st7586.c
index 1be55bed609a..ad0faa8723c2 100644
--- a/drivers/gpu/drm/tiny/st7586.c
+++ b/drivers/gpu/drm/tiny/st7586.c
@@ -6,7 +6,6 @@
  */
 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -21,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -92,24 +92,18 @@ static int st7586_buf_copy(void *dst, struct 
drm_framebuffer *fb,
   struct drm_rect *clip)
 {
struct drm_gem_cma_object *cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
-   struct dma_buf_attachment *import_attach = cma_obj->base.import_attach;
void *src = cma_obj->vaddr;
int ret = 0;
 
-   if (import_attach) {
-   ret = dma_buf_begin_cpu_access(import_attach->dmabuf,
-  DMA_FROM_DEVICE);
-   if (ret)
-   return ret;
-   }
+   ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE);
+   if (ret)
+   return ret;
 
st7586_xrgb_to_gray332(dst, src, fb, clip);
 
-   if (import_attach)
-   ret = dma_buf_end_cpu_access(import_attach->dmabuf,
-DMA_FROM_DEVICE);
+   drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE);
 
-   return ret;
+   return 0;
 }
 
 static void st7586_fb_dirty(struct drm_framebuffer *fb, struct drm_rect *rect)
-- 
2.32.0



  1   2   >