Re: [RFC v4 09/11] drm/amdgpu: Move in_gpu_reset into reset_domain

2022-02-09 Thread Christian König

Am 09.02.22 um 01:23 schrieb Andrey Grodzovsky:

We should have a single instance per entrire reset domain.

Signed-off-by: Andrey Grodzovsky 
Suggested-by: Lijo Lazar 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  7 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 +++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c  |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h  |  1 +
  drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c  |  4 ++--
  drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c  |  4 ++--
  6 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ddfbcc8fd3d3..b89406b01694 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1056,7 +1056,6 @@ struct amdgpu_device {
boolin_s4;
boolin_s0ix;
  
-	atomic_t 			in_gpu_reset;

enum pp_mp1_state   mp1_state;
struct amdgpu_doorbell_index doorbell_index;
  
@@ -1463,8 +1462,6 @@ static inline bool amdgpu_is_tmz(struct amdgpu_device *adev)

 return adev->gmc.tmz_enabled;
  }
  
-static inline int amdgpu_in_reset(struct amdgpu_device *adev)

-{
-   return atomic_read(>in_gpu_reset);
-}
+int amdgpu_in_reset(struct amdgpu_device *adev);
+
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index dcbb175d336f..e05d7cbefd2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3554,7 +3554,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(>mn_lock);
mutex_init(>virt.vf_errors.lock);
hash_init(adev->mn_hash);
-   atomic_set(>in_gpu_reset, 0);
mutex_init(>psp.mutex);
mutex_init(>notifier_lock);
  
@@ -4829,7 +4828,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,

  static void amdgpu_device_lock_adev(struct amdgpu_device *adev,
struct amdgpu_hive_info *hive)
  {
-   atomic_set(>in_gpu_reset, 1);
+   atomic_set(>reset_domain->in_gpu_reset, 1);
  
  	if (hive) {

down_write_nest_lock(>reset_domain->sem, 
>hive_lock);
@@ -4854,7 +4853,7 @@ static void amdgpu_device_unlock_adev(struct 
amdgpu_device *adev)
  {
amdgpu_vf_error_trans_all(adev);
adev->mp1_state = PP_MP1_STATE_NONE;
-   atomic_set(>in_gpu_reset, 0);
+   atomic_set(>reset_domain->in_gpu_reset, 0);
up_write(>reset_domain->sem);
  }
  
@@ -5699,6 +5698,11 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,

amdgpu_asic_invalidate_hdp(adev, ring);
  }
  
+int amdgpu_in_reset(struct amdgpu_device *adev)

+{
+   return atomic_read(>reset_domain->in_gpu_reset);
+   }
+   
  /**
   * amdgpu_device_halt() - bring hardware to some kind of halt state
   *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index c0988c804459..5ab72c3bfbda 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -131,6 +131,7 @@ struct amdgpu_reset_domain 
*amdgpu_reset_create_reset_domain(enum amdgpu_reset_d
  
  	}
  
+	atomic_set(_domain->in_gpu_reset, 0);

init_rwsem(_domain->sem);
  
  	return reset_domain;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index 80f918e87d4f..ea6fc98ea927 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -81,6 +81,7 @@ struct amdgpu_reset_domain {
struct workqueue_struct *wq;
enum amdgpu_reset_domain_type type;
struct rw_semaphore sem;
+   atomic_t in_gpu_reset;
  };
  
  
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c

index 4e23c29e665c..b81acf59870c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -259,7 +259,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct 
*work)
 * otherwise the mailbox msg will be ruined/reseted by
 * the VF FLR.
 */
-   if (atomic_cmpxchg(>in_gpu_reset, 0, 1) != 0)
+   if (atomic_cmpxchg(>reset_domain->in_gpu_reset, 0, 1) != 0)
return;
  
  	down_write(>reset_domain->sem);

@@ -277,7 +277,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct 
*work)
} while (timeout > 1);
  
  flr_done:

-   atomic_set(>in_gpu_reset, 0);
+   atomic_set(>reset_domain->in_gpu_reset, 0);
up_write(>reset_domain->sem);
  
  	/* Trigger recovery for world switch failure if no TDR */

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index f715780f7d20..22c10b97ea81 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ 

[RFC v4 09/11] drm/amdgpu: Move in_gpu_reset into reset_domain

2022-02-08 Thread Andrey Grodzovsky
We should have a single instance per entrire reset domain.

Signed-off-by: Andrey Grodzovsky 
Suggested-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  7 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c  |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h  |  1 +
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c  |  4 ++--
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c  |  4 ++--
 6 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ddfbcc8fd3d3..b89406b01694 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1056,7 +1056,6 @@ struct amdgpu_device {
boolin_s4;
boolin_s0ix;
 
-   atomic_tin_gpu_reset;
enum pp_mp1_state   mp1_state;
struct amdgpu_doorbell_index doorbell_index;
 
@@ -1463,8 +1462,6 @@ static inline bool amdgpu_is_tmz(struct amdgpu_device 
*adev)
return adev->gmc.tmz_enabled;
 }
 
-static inline int amdgpu_in_reset(struct amdgpu_device *adev)
-{
-   return atomic_read(>in_gpu_reset);
-}
+int amdgpu_in_reset(struct amdgpu_device *adev);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index dcbb175d336f..e05d7cbefd2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3554,7 +3554,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(>mn_lock);
mutex_init(>virt.vf_errors.lock);
hash_init(adev->mn_hash);
-   atomic_set(>in_gpu_reset, 0);
mutex_init(>psp.mutex);
mutex_init(>notifier_lock);
 
@@ -4829,7 +4828,7 @@ int amdgpu_do_asic_reset(struct list_head 
*device_list_handle,
 static void amdgpu_device_lock_adev(struct amdgpu_device *adev,
struct amdgpu_hive_info *hive)
 {
-   atomic_set(>in_gpu_reset, 1);
+   atomic_set(>reset_domain->in_gpu_reset, 1);
 
if (hive) {
down_write_nest_lock(>reset_domain->sem, 
>hive_lock);
@@ -4854,7 +4853,7 @@ static void amdgpu_device_unlock_adev(struct 
amdgpu_device *adev)
 {
amdgpu_vf_error_trans_all(adev);
adev->mp1_state = PP_MP1_STATE_NONE;
-   atomic_set(>in_gpu_reset, 0);
+   atomic_set(>reset_domain->in_gpu_reset, 0);
up_write(>reset_domain->sem);
 }
 
@@ -5699,6 +5698,11 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device 
*adev,
amdgpu_asic_invalidate_hdp(adev, ring);
 }
 
+int amdgpu_in_reset(struct amdgpu_device *adev)
+{
+   return atomic_read(>reset_domain->in_gpu_reset);
+   }
+   
 /**
  * amdgpu_device_halt() - bring hardware to some kind of halt state
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index c0988c804459..5ab72c3bfbda 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -131,6 +131,7 @@ struct amdgpu_reset_domain 
*amdgpu_reset_create_reset_domain(enum amdgpu_reset_d
 
}
 
+   atomic_set(_domain->in_gpu_reset, 0);
init_rwsem(_domain->sem);
 
return reset_domain;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index 80f918e87d4f..ea6fc98ea927 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -81,6 +81,7 @@ struct amdgpu_reset_domain {
struct workqueue_struct *wq;
enum amdgpu_reset_domain_type type;
struct rw_semaphore sem;
+   atomic_t in_gpu_reset;
 };
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 4e23c29e665c..b81acf59870c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -259,7 +259,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct 
*work)
 * otherwise the mailbox msg will be ruined/reseted by
 * the VF FLR.
 */
-   if (atomic_cmpxchg(>in_gpu_reset, 0, 1) != 0)
+   if (atomic_cmpxchg(>reset_domain->in_gpu_reset, 0, 1) != 0)
return;
 
down_write(>reset_domain->sem);
@@ -277,7 +277,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct 
*work)
} while (timeout > 1);
 
 flr_done:
-   atomic_set(>in_gpu_reset, 0);
+   atomic_set(>reset_domain->in_gpu_reset, 0);
up_write(>reset_domain->sem);
 
/* Trigger recovery for world switch failure if no TDR */
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index f715780f7d20..22c10b97ea81 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -283,7 +283,7 @@ static void xgpu_nv_mailbox_flr_work(struct