Re: [PATCH v2] drm/amdgpu: Unmap MMIO mappings when device is not unplugged

2022-01-06 Thread Andrey Grodzovsky

Got it

See bellow one small comment, with that the patch is Reviewed-by: Andrey 
Grodzovsky 


On 2022-01-05 9:24 p.m., Shi, Leslie wrote:

[AMD Official Use Only]

Hi Andrey,

It is the following patch calls to amdgpu_device_unmap_mmio() conditioned on 
device unplugged.

3efb17ae7e92 "drm/amdgpu: Call amdgpu_device_unmap_mmio() if device is unplugged to 
prevent crash in GPU initialization failure"

Regards,
Leslie

-Original Message-
From: Grodzovsky, Andrey 
Sent: Thursday, January 6, 2022 2:22 AM
To: Shi, Leslie ; Lazar, Lijo ; 
amd-gfx@lists.freedesktop.org
Cc: Chen, Guchun 
Subject: Re: [PATCH v2] drm/amdgpu: Unmap MMIO mappings when device is not 
unplugged


On 2022-01-04 11:23 p.m., Leslie Shi wrote:

Patch: 3efb17ae7e92 ("drm/amdgpu: Call amdgpu_device_unmap_mmio() if
device is unplugged to prevent crash in GPU initialization failure")
makes call to
amdgpu_device_unmap_mmio() conditioned on device unplugged. This patch
unmaps MMIO mappings even when device is not unplugged.

drm/amdgpu: Call amdgpu_device_unmap_mmio() if device is unplugged to prevent 
crash in GPU initialization failure

I don't see the 'call to amdgpu_device_unmap_mmio() conditioned on device 
unplugged'
part in this patch

Also, please add 'v2:bla bla bla' part in patch description telling what was 
done in v2

Andrey


Signed-off-by: Leslie Shi 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 +++
   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 12 
   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 11 +++
   3 files changed, 34 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 412f377f80b1..16dc16c860cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3832,6 +3832,7 @@ int amdgpu_device_init(struct amdgpu_device
*adev,
   
   static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)

   {
+



Drop the new line

Andrey



/* Clear all CPU mappings pointing to this device */
unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
   
@@ -3912,6 +3913,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device

*adev)
   
   void amdgpu_device_fini_sw(struct amdgpu_device *adev)

   {
+   int idx;
+
amdgpu_fence_driver_sw_fini(adev);
amdgpu_device_ip_fini(adev);
release_firmware(adev->firmware.gpu_info_fw);
@@ -3936,6 +3939,14 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
vga_client_register(adev->pdev, NULL, NULL, NULL);
   
+	if (drm_dev_enter(adev_to_drm(adev), )) {

+
+   iounmap(adev->rmmio);
+   adev->rmmio = NULL;
+   amdgpu_device_doorbell_fini(adev);
+   drm_dev_exit(idx);
+   }
+
if (IS_ENABLED(CONFIG_PERF_EVENTS))
amdgpu_pmu_fini(adev);
if (adev->mman.discovery_bin)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 156002db24e1..ff9dc377a3a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -33,6 +33,7 @@
   #include 
   #include 
   
+#include 

   #include 
   #include 
   #include "amdgpu.h"
@@ -1061,7 +1062,18 @@ int amdgpu_bo_init(struct amdgpu_device *adev)
*/
   void amdgpu_bo_fini(struct amdgpu_device *adev)
   {
+   int idx;
+
amdgpu_ttm_fini(adev);
+
+   if (drm_dev_enter(adev_to_drm(adev), )) {
+
+   if (!adev->gmc.xgmi.connected_to_cpu) {
+   arch_phys_wc_del(adev->gmc.vram_mtrr);
+   arch_io_free_memtype_wc(adev->gmc.aper_base, 
adev->gmc.aper_size);
+   }
+   drm_dev_exit(idx);
+   }
   }
   
   /**

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 367abed1d6e6..ea897feeddd2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -42,6 +42,7 @@
   #include 
   #include 
   
+#include 

   #include 
   #include 
   #include 
@@ -1801,6 +1802,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
*/
   void amdgpu_ttm_fini(struct amdgpu_device *adev)
   {
+   int idx;
if (!adev->mman.initialized)
return;
   
@@ -1815,6 +1817,15 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)

  NULL, NULL);
amdgpu_ttm_fw_reserve_vram_fini(adev);
   
+	if (drm_dev_enter(adev_to_drm(adev), )) {

+
+   if (adev->mman.aper_base_kaddr)
+   iounmap(adev->mman.aper_base_kaddr);
+   adev->mman.aper_base_kaddr = NULL;
+
+   drm_dev_exit(idx);
+   }
+
amdgpu_vram_mgr_fini(adev);
amdgpu_gtt_mgr_fini(adev);
amdgpu_preempt_mgr_fini(adev);


RE: [PATCH v2] drm/amdgpu: Unmap MMIO mappings when device is not unplugged

2022-01-05 Thread Shi, Leslie
[AMD Official Use Only]

Hi Andrey,

It is the following patch calls to amdgpu_device_unmap_mmio() conditioned on 
device unplugged.

3efb17ae7e92 "drm/amdgpu: Call amdgpu_device_unmap_mmio() if device is 
unplugged to prevent crash in GPU initialization failure"

Regards,
Leslie

-Original Message-
From: Grodzovsky, Andrey  
Sent: Thursday, January 6, 2022 2:22 AM
To: Shi, Leslie ; Lazar, Lijo ; 
amd-gfx@lists.freedesktop.org
Cc: Chen, Guchun 
Subject: Re: [PATCH v2] drm/amdgpu: Unmap MMIO mappings when device is not 
unplugged


On 2022-01-04 11:23 p.m., Leslie Shi wrote:
> Patch: 3efb17ae7e92 ("drm/amdgpu: Call amdgpu_device_unmap_mmio() if 
> device is unplugged to prevent crash in GPU initialization failure") 
> makes call to
> amdgpu_device_unmap_mmio() conditioned on device unplugged. This patch 
> unmaps MMIO mappings even when device is not unplugged.


I don't see the 'call to amdgpu_device_unmap_mmio() conditioned on device 
unplugged'
part in this patch

Also, please add 'v2:bla bla bla' part in patch description telling what was 
done in v2

Andrey

>
> Signed-off-by: Leslie Shi 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 +++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 12 
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 11 +++
>   3 files changed, 34 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 412f377f80b1..16dc16c860cc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3832,6 +3832,7 @@ int amdgpu_device_init(struct amdgpu_device 
> *adev,
>   
>   static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
>   {
> +
>   /* Clear all CPU mappings pointing to this device */
>   unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
>   
> @@ -3912,6 +3913,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device 
> *adev)
>   
>   void amdgpu_device_fini_sw(struct amdgpu_device *adev)
>   {
> + int idx;
> +
>   amdgpu_fence_driver_sw_fini(adev);
>   amdgpu_device_ip_fini(adev);
>   release_firmware(adev->firmware.gpu_info_fw);
> @@ -3936,6 +3939,14 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
>   if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
>   vga_client_register(adev->pdev, NULL, NULL, NULL);
>   
> + if (drm_dev_enter(adev_to_drm(adev), )) {
> +
> + iounmap(adev->rmmio);
> + adev->rmmio = NULL;
> + amdgpu_device_doorbell_fini(adev);
> + drm_dev_exit(idx);
> + }
> +
>   if (IS_ENABLED(CONFIG_PERF_EVENTS))
>   amdgpu_pmu_fini(adev);
>   if (adev->mman.discovery_bin)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 156002db24e1..ff9dc377a3a0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -33,6 +33,7 @@
>   #include 
>   #include 
>   
> +#include 
>   #include 
>   #include 
>   #include "amdgpu.h"
> @@ -1061,7 +1062,18 @@ int amdgpu_bo_init(struct amdgpu_device *adev)
>*/
>   void amdgpu_bo_fini(struct amdgpu_device *adev)
>   {
> + int idx;
> +
>   amdgpu_ttm_fini(adev);
> +
> + if (drm_dev_enter(adev_to_drm(adev), )) {
> +
> + if (!adev->gmc.xgmi.connected_to_cpu) {
> + arch_phys_wc_del(adev->gmc.vram_mtrr);
> + arch_io_free_memtype_wc(adev->gmc.aper_base, 
> adev->gmc.aper_size);
> + }
> + drm_dev_exit(idx);
> + }
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 367abed1d6e6..ea897feeddd2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -42,6 +42,7 @@
>   #include 
>   #include 
>   
> +#include 
>   #include 
>   #include 
>   #include 
> @@ -1801,6 +1802,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>*/
>   void amdgpu_ttm_fini(struct amdgpu_device *adev)
>   {
> + int idx;
>   if (!adev->mman.initialized)
>   return;
>   
> @@ -1815,6 +1817,15 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
> NULL, NULL);
>   amdgpu_ttm_fw_reserve_vram_fini(adev);
>   
> + if (drm_dev_enter(adev_to_drm(adev), )) {
> +
> + if (adev->mman.aper_base_kaddr)
> + iounmap(adev->mman.aper_base_kaddr);
> + adev->mman.aper_base_kaddr = NULL;
> +
> + drm_dev_exit(idx);
> + }
> +
>   amdgpu_vram_mgr_fini(adev);
>   amdgpu_gtt_mgr_fini(adev);
>   amdgpu_preempt_mgr_fini(adev);


Re: [PATCH v2] drm/amdgpu: Unmap MMIO mappings when device is not unplugged

2022-01-05 Thread Andrey Grodzovsky



On 2022-01-04 11:23 p.m., Leslie Shi wrote:

Patch: 3efb17ae7e92 ("drm/amdgpu: Call amdgpu_device_unmap_mmio() if device
is unplugged to prevent crash in GPU initialization failure") makes call to
amdgpu_device_unmap_mmio() conditioned on device unplugged. This patch unmaps
MMIO mappings even when device is not unplugged.



I don't see the 'call to amdgpu_device_unmap_mmio() conditioned on 
device unplugged'

part in this patch

Also, please add 'v2:bla bla bla' part in patch description telling what 
was done in v2


Andrey



Signed-off-by: Leslie Shi 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 12 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 11 +++
  3 files changed, 34 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 412f377f80b1..16dc16c860cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3832,6 +3832,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
  
  static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)

  {
+
/* Clear all CPU mappings pointing to this device */
unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
  
@@ -3912,6 +3913,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
  
  void amdgpu_device_fini_sw(struct amdgpu_device *adev)

  {
+   int idx;
+
amdgpu_fence_driver_sw_fini(adev);
amdgpu_device_ip_fini(adev);
release_firmware(adev->firmware.gpu_info_fw);
@@ -3936,6 +3939,14 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
vga_client_register(adev->pdev, NULL, NULL, NULL);
  
+	if (drm_dev_enter(adev_to_drm(adev), )) {

+
+   iounmap(adev->rmmio);
+   adev->rmmio = NULL;
+   amdgpu_device_doorbell_fini(adev);
+   drm_dev_exit(idx);
+   }
+
if (IS_ENABLED(CONFIG_PERF_EVENTS))
amdgpu_pmu_fini(adev);
if (adev->mman.discovery_bin)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 156002db24e1..ff9dc377a3a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -33,6 +33,7 @@
  #include 
  #include 
  
+#include 

  #include 
  #include 
  #include "amdgpu.h"
@@ -1061,7 +1062,18 @@ int amdgpu_bo_init(struct amdgpu_device *adev)
   */
  void amdgpu_bo_fini(struct amdgpu_device *adev)
  {
+   int idx;
+
amdgpu_ttm_fini(adev);
+
+   if (drm_dev_enter(adev_to_drm(adev), )) {
+
+   if (!adev->gmc.xgmi.connected_to_cpu) {
+   arch_phys_wc_del(adev->gmc.vram_mtrr);
+   arch_io_free_memtype_wc(adev->gmc.aper_base, 
adev->gmc.aper_size);
+   }
+   drm_dev_exit(idx);
+   }
  }
  
  /**

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 367abed1d6e6..ea897feeddd2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -42,6 +42,7 @@
  #include 
  #include 
  
+#include 

  #include 
  #include 
  #include 
@@ -1801,6 +1802,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
   */
  void amdgpu_ttm_fini(struct amdgpu_device *adev)
  {
+   int idx;
if (!adev->mman.initialized)
return;
  
@@ -1815,6 +1817,15 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)

  NULL, NULL);
amdgpu_ttm_fw_reserve_vram_fini(adev);
  
+	if (drm_dev_enter(adev_to_drm(adev), )) {

+
+   if (adev->mman.aper_base_kaddr)
+   iounmap(adev->mman.aper_base_kaddr);
+   adev->mman.aper_base_kaddr = NULL;
+
+   drm_dev_exit(idx);
+   }
+
amdgpu_vram_mgr_fini(adev);
amdgpu_gtt_mgr_fini(adev);
amdgpu_preempt_mgr_fini(adev);


[PATCH v2] drm/amdgpu: Unmap MMIO mappings when device is not unplugged

2022-01-04 Thread Leslie Shi
Patch: 3efb17ae7e92 ("drm/amdgpu: Call amdgpu_device_unmap_mmio() if device
is unplugged to prevent crash in GPU initialization failure") makes call to
amdgpu_device_unmap_mmio() conditioned on device unplugged. This patch unmaps
MMIO mappings even when device is not unplugged.

Signed-off-by: Leslie Shi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 12 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 11 +++
 3 files changed, 34 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 412f377f80b1..16dc16c860cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3832,6 +3832,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
 {
+
/* Clear all CPU mappings pointing to this device */
unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
 
@@ -3912,6 +3913,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
 
 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
 {
+   int idx;
+
amdgpu_fence_driver_sw_fini(adev);
amdgpu_device_ip_fini(adev);
release_firmware(adev->firmware.gpu_info_fw);
@@ -3936,6 +3939,14 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
vga_client_register(adev->pdev, NULL, NULL, NULL);
 
+   if (drm_dev_enter(adev_to_drm(adev), )) {
+
+   iounmap(adev->rmmio);
+   adev->rmmio = NULL;
+   amdgpu_device_doorbell_fini(adev);
+   drm_dev_exit(idx);
+   }
+
if (IS_ENABLED(CONFIG_PERF_EVENTS))
amdgpu_pmu_fini(adev);
if (adev->mman.discovery_bin)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 156002db24e1..ff9dc377a3a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include "amdgpu.h"
@@ -1061,7 +1062,18 @@ int amdgpu_bo_init(struct amdgpu_device *adev)
  */
 void amdgpu_bo_fini(struct amdgpu_device *adev)
 {
+   int idx;
+
amdgpu_ttm_fini(adev);
+
+   if (drm_dev_enter(adev_to_drm(adev), )) {
+
+   if (!adev->gmc.xgmi.connected_to_cpu) {
+   arch_phys_wc_del(adev->gmc.vram_mtrr);
+   arch_io_free_memtype_wc(adev->gmc.aper_base, 
adev->gmc.aper_size);
+   }
+   drm_dev_exit(idx);
+   }
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 367abed1d6e6..ea897feeddd2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -42,6 +42,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -1801,6 +1802,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
  */
 void amdgpu_ttm_fini(struct amdgpu_device *adev)
 {
+   int idx;
if (!adev->mman.initialized)
return;
 
@@ -1815,6 +1817,15 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
  NULL, NULL);
amdgpu_ttm_fw_reserve_vram_fini(adev);
 
+   if (drm_dev_enter(adev_to_drm(adev), )) {
+
+   if (adev->mman.aper_base_kaddr)
+   iounmap(adev->mman.aper_base_kaddr);
+   adev->mman.aper_base_kaddr = NULL;
+
+   drm_dev_exit(idx);
+   }
+
amdgpu_vram_mgr_fini(adev);
amdgpu_gtt_mgr_fini(adev);
amdgpu_preempt_mgr_fini(adev);
-- 
2.25.1