[PATCH 04/11] drm/xe: Move lockdep protection from mem_access to xe_pm_runtime

2024-03-14 Thread Rodrigo Vivi
The mem_access itself is not holding any lock, but attempting
to train lockdep with possible scarring locks happening during
runtime pm. We are going soon to kill the mem_access get and put
helpers in favor of direct xe_pm_runtime calls, so let's just
move this lock around to where it now belongs.

v2: s/lockdep_training/lockdep_prime (Matt Auld)

Reviewed-by: Matthew Auld 
Signed-off-by: Rodrigo Vivi 
---
 drivers/gpu/drm/xe/xe_device.c | 23 -
 drivers/gpu/drm/xe/xe_device.h |  4 ---
 drivers/gpu/drm/xe/xe_pm.c | 45 --
 3 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index b0bfe75eb59f..82f686595b16 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -45,12 +45,6 @@
 #include "xe_vm.h"
 #include "xe_wait_user_fence.h"
 
-#ifdef CONFIG_LOCKDEP
-struct lockdep_map xe_device_mem_access_lockdep_map = {
-   .name = "xe_device_mem_access_lockdep_map"
-};
-#endif
-
 static int xe_file_open(struct drm_device *dev, struct drm_file *file)
 {
struct xe_device *xe = to_xe_device(dev);
@@ -702,23 +696,6 @@ void xe_device_mem_access_get(struct xe_device *xe)
if (xe_pm_read_callback_task(xe) == current)
return;
 
-   /*
-* Since the resume here is synchronous it can be quite easy to deadlock
-* if we are not careful. Also in practice it might be quite timing
-* sensitive to ever see the 0 -> 1 transition with the callers locks
-* held, so deadlocks might exist but are hard for lockdep to ever see.
-* With this in mind, help lockdep learn about the potentially scary
-* stuff that can happen inside the runtime_resume callback by acquiring
-* a dummy lock (it doesn't protect anything and gets compiled out on
-* non-debug builds).  Lockdep then only needs to see the
-* mem_access_lockdep_map -> runtime_resume callback once, and then can
-* hopefully validate all the (callers_locks) -> mem_access_lockdep_map.
-* For example if the (callers_locks) are ever grabbed in the
-* runtime_resume callback, lockdep should give us a nice splat.
-*/
-   lock_map_acquire(_device_mem_access_lockdep_map);
-   lock_map_release(_device_mem_access_lockdep_map);
-
xe_pm_runtime_get(xe);
ref = atomic_inc_return(>mem_access.ref);
 
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 14be34d9f543..2653f53bee4e 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -16,10 +16,6 @@ struct xe_file;
 #include "xe_force_wake.h"
 #include "xe_macros.h"
 
-#ifdef CONFIG_LOCKDEP
-extern struct lockdep_map xe_device_mem_access_lockdep_map;
-#endif
-
 static inline struct xe_device *to_xe_device(const struct drm_device *dev)
 {
return container_of(dev, struct xe_device, drm);
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 2e1362cf8deb..9d87a68ba6eb 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -68,6 +68,12 @@
  * management (RPS).
  */
 
+#ifdef CONFIG_LOCKDEP
+struct lockdep_map xe_pm_runtime_lockdep_map = {
+   .name = "xe_pm_runtime_lockdep_map"
+};
+#endif
+
 /**
  * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
  * @xe: xe device instance
@@ -297,11 +303,11 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
xe_pm_write_callback_task(xe, current);
 
/*
-* The actual xe_device_mem_access_put() is always async underneath, so
+* The actual xe_pm_runtime_put() is always async underneath, so
 * exactly where that is called should makes no difference to us. 
However
 * we still need to be very careful with the locks that this callback
 * acquires and the locks that are acquired and held by any callers of
-* xe_device_mem_access_get(). We already have the matching annotation
+* xe_runtime_pm_get(). We already have the matching annotation
 * on that side, but we also need it here. For example lockdep should be
 * able to tell us if the following scenario is in theory possible:
 *
@@ -309,15 +315,15 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
 * lock(A)   |
 *   | xe_pm_runtime_suspend()
 *   |  lock(A)
-* xe_device_mem_access_get()|
+* xe_pm_runtime_get()   |
 *
 * This will clearly deadlock since rpm core needs to wait for
 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A)
 * on CPU0 which prevents CPU1 making forward progress.  With the
-* annotation here and in xe_device_mem_access_get() lockdep will see
+* annotation here and in xe_pm_runtime_get() lockdep will see
 * the 

[PATCH 04/11] drm/xe: Move lockdep protection from mem_access to xe_pm_runtime

2024-03-11 Thread Rodrigo Vivi
The mem_access itself is not holding any lock, but attempting
to train lockdep with possible scarring locks happening during
runtime pm. We are going soon to kill the mem_access get and put
helpers in favor of direct xe_pm_runtime calls, so let's just
move this lock around to where it now belongs.

v2: s/lockdep_training/lockdep_prime (Matt Auld)

Reviewed-by: Matthew Auld 
Signed-off-by: Rodrigo Vivi 
---
 drivers/gpu/drm/xe/xe_device.c | 23 -
 drivers/gpu/drm/xe/xe_device.h |  4 ---
 drivers/gpu/drm/xe/xe_pm.c | 45 --
 3 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 919ad88f0495..49a413725c8f 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -45,12 +45,6 @@
 #include "xe_vm.h"
 #include "xe_wait_user_fence.h"
 
-#ifdef CONFIG_LOCKDEP
-struct lockdep_map xe_device_mem_access_lockdep_map = {
-   .name = "xe_device_mem_access_lockdep_map"
-};
-#endif
-
 static int xe_file_open(struct drm_device *dev, struct drm_file *file)
 {
struct xe_device *xe = to_xe_device(dev);
@@ -702,23 +696,6 @@ void xe_device_mem_access_get(struct xe_device *xe)
if (xe_pm_read_callback_task(xe) == current)
return;
 
-   /*
-* Since the resume here is synchronous it can be quite easy to deadlock
-* if we are not careful. Also in practice it might be quite timing
-* sensitive to ever see the 0 -> 1 transition with the callers locks
-* held, so deadlocks might exist but are hard for lockdep to ever see.
-* With this in mind, help lockdep learn about the potentially scary
-* stuff that can happen inside the runtime_resume callback by acquiring
-* a dummy lock (it doesn't protect anything and gets compiled out on
-* non-debug builds).  Lockdep then only needs to see the
-* mem_access_lockdep_map -> runtime_resume callback once, and then can
-* hopefully validate all the (callers_locks) -> mem_access_lockdep_map.
-* For example if the (callers_locks) are ever grabbed in the
-* runtime_resume callback, lockdep should give us a nice splat.
-*/
-   lock_map_acquire(_device_mem_access_lockdep_map);
-   lock_map_release(_device_mem_access_lockdep_map);
-
xe_pm_runtime_get(xe);
ref = atomic_inc_return(>mem_access.ref);
 
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 14be34d9f543..2653f53bee4e 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -16,10 +16,6 @@ struct xe_file;
 #include "xe_force_wake.h"
 #include "xe_macros.h"
 
-#ifdef CONFIG_LOCKDEP
-extern struct lockdep_map xe_device_mem_access_lockdep_map;
-#endif
-
 static inline struct xe_device *to_xe_device(const struct drm_device *dev)
 {
return container_of(dev, struct xe_device, drm);
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 847b263afe70..393f14411ae0 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -68,6 +68,12 @@
  * management (RPS).
  */
 
+#ifdef CONFIG_LOCKDEP
+struct lockdep_map xe_pm_runtime_lockdep_map = {
+   .name = "xe_pm_runtime_lockdep_map"
+};
+#endif
+
 /**
  * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
  * @xe: xe device instance
@@ -297,11 +303,11 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
xe_pm_write_callback_task(xe, current);
 
/*
-* The actual xe_device_mem_access_put() is always async underneath, so
+* The actual xe_pm_runtime_put() is always async underneath, so
 * exactly where that is called should makes no difference to us. 
However
 * we still need to be very careful with the locks that this callback
 * acquires and the locks that are acquired and held by any callers of
-* xe_device_mem_access_get(). We already have the matching annotation
+* xe_runtime_pm_get(). We already have the matching annotation
 * on that side, but we also need it here. For example lockdep should be
 * able to tell us if the following scenario is in theory possible:
 *
@@ -309,15 +315,15 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
 * lock(A)   |
 *   | xe_pm_runtime_suspend()
 *   |  lock(A)
-* xe_device_mem_access_get()|
+* xe_pm_runtime_get()   |
 *
 * This will clearly deadlock since rpm core needs to wait for
 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A)
 * on CPU0 which prevents CPU1 making forward progress.  With the
-* annotation here and in xe_device_mem_access_get() lockdep will see
+* annotation here and in xe_pm_runtime_get() lockdep will see
 * the