[PATCH 1/1] drm/amdgpu: drop the UCODE_MAX_PSP_PACKAGING checking

2024-09-23 Thread Le Ma
This checking is not suitable any more after introducing
psp aux firmware binary.

Signed-off-by: Le Ma 
Suggested-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   | 11 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h |  2 --
 2 files changed, 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 7452b2dd775b..7b8469da3e2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -3484,12 +3484,6 @@ int psp_init_sos_microcode(struct psp_context *psp, 
const char *chip_name)
 
fw_bin_count = le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count);
 
-   if (fw_bin_count >= UCODE_MAX_PSP_PACKAGING) {
-   dev_err(adev->dev, "packed SOS count exceeds maximum 
limit\n");
-   err = -EINVAL;
-   goto out;
-   }
-
if (sos_hdr_v2_0->header.header_version_minor == 1) {
sos_hdr_v2_1 = (const struct psp_firmware_header_v2_1 
*)adev->psp.sos_fw->data;
 
@@ -3653,11 +3647,6 @@ static int parse_ta_v2_microcode(struct psp_context *psp)
if (le16_to_cpu(ta_hdr->header.header_version_major) != 2)
return -EINVAL;
 
-   if (le32_to_cpu(ta_hdr->ta_fw_bin_count) >= UCODE_MAX_PSP_PACKAGING) {
-   dev_err(adev->dev, "packed TA count exceeds maximum limit\n");
-   return -EINVAL;
-   }
-
for (ta_index = 0; ta_index < le32_to_cpu(ta_hdr->ta_fw_bin_count); 
ta_index++) {
err = parse_ta_bin_descriptor(psp,
  &ta_hdr->ta_fw_bin[ta_index],
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 4e23419b92d4..a06cc0a155fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -456,8 +456,6 @@ union amdgpu_firmware_header {
uint8_t raw[0x100];
 };
 
-#define UCODE_MAX_PSP_PACKAGING (((sizeof(union amdgpu_firmware_header) - 
sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc)) * 
2)
-
 /*
  * fw loading support
  */
-- 
2.43.2



[PATCH 2/2] drm/amdgpu: load sos binary properly on the basis of pmfw version

2024-09-12 Thread Le Ma
To be compatible with legacy IFWI, driver needs to carry legacy tOS and
query pmfw version to load them accordingly.

Add psp_firmware_header_v2_1 to handle the combined sos binary.

Double the sos count limit for the case of aux sos fw packed.

v2: pass the correct fw_bin_desc to parse_sos_bin_descriptor

Signed-off-by: Le Ma 
Reviewed-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   | 29 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 11 -
 2 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 189574d53ebd..f702f3391c2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -3425,9 +3425,11 @@ int psp_init_sos_microcode(struct psp_context *psp, 
const char *chip_name)
const struct psp_firmware_header_v1_2 *sos_hdr_v1_2;
const struct psp_firmware_header_v1_3 *sos_hdr_v1_3;
const struct psp_firmware_header_v2_0 *sos_hdr_v2_0;
-   int err = 0;
+   const struct psp_firmware_header_v2_1 *sos_hdr_v2_1;
+   int fw_index, fw_bin_count, start_index = 0;
+   const struct psp_fw_bin_desc *fw_bin;
uint8_t *ucode_array_start_addr;
-   int fw_index = 0;
+   int err = 0;
 
err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, 
"amdgpu/%s_sos.bin", chip_name);
if (err)
@@ -3478,15 +3480,30 @@ int psp_init_sos_microcode(struct psp_context *psp, 
const char *chip_name)
case 2:
sos_hdr_v2_0 = (const struct psp_firmware_header_v2_0 
*)adev->psp.sos_fw->data;
 
-   if (le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count) >= 
UCODE_MAX_PSP_PACKAGING) {
+   fw_bin_count = le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count);
+
+   if (fw_bin_count >= UCODE_MAX_PSP_PACKAGING) {
dev_err(adev->dev, "packed SOS count exceeds maximum 
limit\n");
err = -EINVAL;
goto out;
}
 
-   for (fw_index = 0; fw_index < 
le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count); fw_index++) {
-   err = parse_sos_bin_descriptor(psp,
-  
&sos_hdr_v2_0->psp_fw_bin[fw_index],
+   if (sos_hdr_v2_0->header.header_version_minor == 1) {
+   sos_hdr_v2_1 = (const struct psp_firmware_header_v2_1 
*)adev->psp.sos_fw->data;
+
+   fw_bin = sos_hdr_v2_1->psp_fw_bin;
+
+   if (psp_is_aux_sos_load_required(psp))
+   start_index = 
le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index);
+   else
+   fw_bin_count -= 
le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index);
+
+   } else {
+   fw_bin = sos_hdr_v2_0->psp_fw_bin;
+   }
+
+   for (fw_index = start_index; fw_index < fw_bin_count; 
fw_index++) {
+   err = parse_sos_bin_descriptor(psp, fw_bin + fw_index,
   sos_hdr_v2_0);
if (err)
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 5bc37acd3981..36b14c1b94b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -136,6 +136,14 @@ struct psp_firmware_header_v2_0 {
struct psp_fw_bin_desc psp_fw_bin[];
 };
 
+/* version_major=2, version_minor=1 */
+struct psp_firmware_header_v2_1 {
+   struct common_firmware_header header;
+   uint32_t psp_fw_bin_count;
+   uint32_t psp_aux_fw_bin_index;
+   struct psp_fw_bin_desc psp_fw_bin[];
+};
+
 /* version_major=1, version_minor=0 */
 struct ta_firmware_header_v1_0 {
struct common_firmware_header header;
@@ -426,6 +434,7 @@ union amdgpu_firmware_header {
struct psp_firmware_header_v1_1 psp_v1_1;
struct psp_firmware_header_v1_3 psp_v1_3;
struct psp_firmware_header_v2_0 psp_v2_0;
+   struct psp_firmware_header_v2_0 psp_v2_1;
struct ta_firmware_header_v1_0 ta;
struct ta_firmware_header_v2_0 ta_v2_0;
struct gfx_firmware_header_v1_0 gfx;
@@ -447,7 +456,7 @@ union amdgpu_firmware_header {
uint8_t raw[0x100];
 };
 
-#define UCODE_MAX_PSP_PACKAGING ((sizeof(union amdgpu_firmware_header) - 
sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc))
+#define UCODE_MAX_PSP_PACKAGING ((sizeof(union amdgpu_firmware_header) - 
sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc)) * 2
 
 /*
  * fw loading support
-- 
2.43.2



[PATCH 1/2] drm/amdgpu: add psp funcs callback to check if aux fw is needed

2024-09-12 Thread Le Ma
Query pmfw version to determine if aux sos fw needs to be loaded in psp v13.0.

v2: refine callback to check if aux_fw loading is needed instead of
getting pmfw version barely
v3: return the comparison directly

Signed-off-by: Le Ma 
Reviewed-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |  4 
 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c  | 17 +
 2 files changed, 21 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 74a96516c913..e8abbbcb4326 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -138,6 +138,7 @@ struct psp_funcs {
int (*vbflash_stat)(struct psp_context *psp);
int (*fatal_error_recovery_quirk)(struct psp_context *psp);
bool (*get_ras_capability)(struct psp_context *psp);
+   bool (*is_aux_sos_load_required)(struct psp_context *psp);
 };
 
 struct ta_funcs {
@@ -464,6 +465,9 @@ struct amdgpu_psp_funcs {
((psp)->funcs->fatal_error_recovery_quirk ? \
(psp)->funcs->fatal_error_recovery_quirk((psp)) : 0)
 
+#define psp_is_aux_sos_load_required(psp) \
+   ((psp)->funcs->is_aux_sos_load_required ? 
(psp)->funcs->is_aux_sos_load_required((psp)) : 0)
+
 extern const struct amd_ip_funcs psp_ip_funcs;
 
 extern const struct amdgpu_ip_block_version psp_v3_1_ip_block;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 1251ee38a676..51e470e8d67d 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -81,6 +81,8 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_4_ta.bin");
 /* memory training timeout define */
 #define MEM_TRAIN_SEND_MSG_TIMEOUT_US  300
 
+#define regMP1_PUB_SCRATCH00x3b10090
+
 static int psp_v13_0_init_microcode(struct psp_context *psp)
 {
struct amdgpu_device *adev = psp->adev;
@@ -807,6 +809,20 @@ static bool psp_v13_0_get_ras_capability(struct 
psp_context *psp)
}
 }
 
+static bool psp_v13_0_is_aux_sos_load_required(struct psp_context *psp)
+{
+   struct amdgpu_device *adev = psp->adev;
+   u32 pmfw_ver;
+
+   if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))
+   return false;
+
+   /* load 4e version of sos if pmfw version less than 85.115.0 */
+   pmfw_ver = RREG32(regMP1_PUB_SCRATCH0 / 4);
+
+   return (pmfw_ver < 0x557300);
+}
+
 static const struct psp_funcs psp_v13_0_funcs = {
.init_microcode = psp_v13_0_init_microcode,
.wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state,
@@ -830,6 +846,7 @@ static const struct psp_funcs psp_v13_0_funcs = {
.vbflash_stat = psp_v13_0_vbflash_status,
.fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk,
.get_ras_capability = psp_v13_0_get_ras_capability,
+   .is_aux_sos_load_required = psp_v13_0_is_aux_sos_load_required,
 };
 
 void psp_v13_0_set_psp_funcs(struct psp_context *psp)
-- 
2.43.2



[PATCH] drm/amdgpu: init microcode chip name from ip versions

2024-04-21 Thread Le Ma
To adapt to different gc versions in gfx_v9_4_3.c file.

Change-Id: Ib4465aade0dcbbcc43318c6dc865f813c5411097
Signed-off-by: Le Ma 
Reviewed-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 835004187a58..ec4d3fa87e4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -431,16 +431,16 @@ static int gfx_v9_4_3_init_cp_compute_microcode(struct 
amdgpu_device *adev,
 
 static int gfx_v9_4_3_init_microcode(struct amdgpu_device *adev)
 {
-   const char *chip_name;
+   char ucode_prefix[30];
int r;
 
-   chip_name = "gc_9_4_3";
+   amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
 
-   r = gfx_v9_4_3_init_rlc_microcode(adev, chip_name);
+   r = gfx_v9_4_3_init_rlc_microcode(adev, ucode_prefix);
if (r)
return r;
 
-   r = gfx_v9_4_3_init_cp_compute_microcode(adev, chip_name);
+   r = gfx_v9_4_3_init_cp_compute_microcode(adev, ucode_prefix);
if (r)
return r;
 
-- 
2.43.2



[PATCH 1/1] drm/amdgpu: drop setting buffer funcs in sdma442

2024-03-15 Thread Le Ma
To fix the entity rq NULL issue. This setting has been moved to upper level.

Fixes b70438004a14 ("drm/amdgpu: move buffer funcs setting up a level")

Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 20 +---
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index eaa4f5f49949..589a734982a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -431,16 +431,11 @@ static void sdma_v4_4_2_inst_gfx_stop(struct 
amdgpu_device *adev,
struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
u32 doorbell_offset, doorbell;
u32 rb_cntl, ib_cntl;
-   int i, unset = 0;
+   int i;
 
for_each_inst(i, inst_mask) {
sdma[i] = &adev->sdma.instance[i].ring;
 
-   if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) {
-   amdgpu_ttm_set_buffer_funcs_status(adev, false);
-   unset = 1;
-   }
-
rb_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_CNTL);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_ENABLE, 
0);
WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
@@ -490,17 +485,10 @@ static void sdma_v4_4_2_inst_page_stop(struct 
amdgpu_device *adev,
struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
u32 rb_cntl, ib_cntl;
int i;
-   bool unset = false;
 
for_each_inst(i, inst_mask) {
sdma[i] = &adev->sdma.instance[i].page;
 
-   if ((adev->mman.buffer_funcs_ring == sdma[i]) &&
-   (!unset)) {
-   amdgpu_ttm_set_buffer_funcs_status(adev, false);
-   unset = true;
-   }
-
rb_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_CNTL);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL,
RB_ENABLE, 0);
@@ -950,13 +938,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device 
*adev,
r = amdgpu_ring_test_helper(page);
if (r)
return r;
-
-   if (adev->mman.buffer_funcs_ring == page)
-   amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
-
-   if (adev->mman.buffer_funcs_ring == ring)
-   amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
 
return r;
-- 
2.43.2



[PATCH v2] drm/amdgpu: move the drm client creation behind drm device registration

2024-01-25 Thread Le Ma
This patch is to eliminate interrupt warning below:

  "[drm] Fence fallback timer expired on ring sdma0.0".

An early vm pt clearing job is sent to SDMA ahead of interrupt enabled,
introduced by patch below:

  - drm/amdkfd: Export DMABufs from KFD using GEM handles

And re-locating the drm client creation following after drm_dev_register
looks like a more proper flow.

v2: wrap the drm client creation

Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  4 +++
 3 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 77e263660288..41db030ddc4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -141,11 +141,31 @@ static void amdgpu_amdkfd_reset_work(struct work_struct 
*work)
 static const struct drm_client_funcs kfd_client_funcs = {
.unregister = drm_client_release,
 };
+
+int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev)
+{
+   int ret;
+
+   if (!adev->kfd.init_complete)
+   return 0;
+
+   ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd",
+ &kfd_client_funcs);
+   if (ret) {
+   dev_err(adev->dev, "Failed to init DRM client: %d\n",
+   ret);
+   return ret;
+   }
+
+   drm_client_register(&adev->kfd.client);
+
+   return 0;
+}
+
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
int i;
int last_valid_bit;
-   int ret;
 
amdgpu_amdkfd_gpuvm_init_mem_limits();
 
@@ -164,12 +184,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
.enable_mes = adev->enable_mes,
};
 
-   ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", 
&kfd_client_funcs);
-   if (ret) {
-   dev_err(adev->dev, "Failed to init DRM client: %d\n", 
ret);
-   return;
-   }
-
/* this is going to have a few of the MSBs set that we need to
 * clear
 */
@@ -208,10 +222,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 
adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
&gpu_resources);
-   if (adev->kfd.init_complete)
-   drm_client_register(&adev->kfd.client);
-   else
-   drm_client_release(&adev->kfd.client);
 
amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 584a0cea5572..da175c384adf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -182,6 +182,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct 
amdgpu_device *adev,
 struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
struct mm_struct *mm,
struct svm_range_bo *svm_bo);
+
+int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev);
 #if defined(CONFIG_DEBUG_FS)
 int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 475bd59c9ac2..91d5d9435067 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2255,6 +2255,10 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
if (ret)
goto err_pci;
 
+   ret = amdgpu_amdkfd_drm_client_create(adev);
+   if (ret)
+   goto err_pci;
+
/*
 * 1. don't init fbdev on hw without DCE
 * 2. don't init fbdev if there are no connectors
-- 
2.38.1



[PATCH 1/1] drm/amdgpu: move the drm client creation behind drm device registration

2024-01-24 Thread Le Ma
This patch is to eliminate interrupt warning below:

  "[drm] Fence fallback timer expired on ring sdma0.0".

An early vm pt clearing job is sent to SDMA ahead of interrupt enabled,
introduced by patch below:

  - drm/amdkfd: Export DMABufs from KFD using GEM handles

And re-locating the drm client creation following after drm_dev_register
looks like a more proper flow.

Change-Id: I0fece177b78345187068f92a823d96b3b7581140
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 13 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 11 +++
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index add315644773..69eb0f5574d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -139,14 +139,13 @@ static void amdgpu_amdkfd_reset_work(struct work_struct 
*work)
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
 }
 
-static const struct drm_client_funcs kfd_client_funcs = {
+const struct drm_client_funcs kfd_client_funcs = {
.unregister = drm_client_release,
 };
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
int i;
int last_valid_bit;
-   int ret;
 
amdgpu_amdkfd_gpuvm_init_mem_limits();
 
@@ -165,12 +164,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
.enable_mes = adev->enable_mes,
};
 
-   ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", 
&kfd_client_funcs);
-   if (ret) {
-   dev_err(adev->dev, "Failed to init DRM client: %d\n", 
ret);
-   return;
-   }
-
/* this is going to have a few of the MSBs set that we need to
 * clear
 */
@@ -209,10 +202,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 
adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
&gpu_resources);
-   if (adev->kfd.init_complete)
-   drm_client_register(&adev->kfd.client);
-   else
-   drm_client_release(&adev->kfd.client);
 
amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 00eed8c10cd4..b2c6f2b3c0fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -40,6 +40,8 @@
 
 extern uint64_t amdgpu_amdkfd_total_mem_size;
 
+extern const struct drm_client_funcs kfd_client_funcs;
+
 enum TLB_FLUSH_TYPE {
TLB_FLUSH_LEGACY = 0,
TLB_FLUSH_LIGHTWEIGHT,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 0d0aa4b798ac..d0b98343481d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2293,6 +2293,17 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
drm_fbdev_generic_setup(adev_to_drm(adev), 32);
}
 
+   if (adev->kfd.init_complete) {
+   ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd",
+ &kfd_client_funcs);
+   if (ret) {
+   dev_err(adev->dev, "Failed to init DRM client: %d\n",
+   ret);
+   goto err_pci;
+   }
+   drm_client_register(&adev->kfd.client);
+   }
+
ret = amdgpu_debugfs_init(adev);
if (ret)
DRM_ERROR("Creating debugfs files failed (%d).\n", ret);
-- 
2.38.1



[PATCH 1/1] drm/amdgpu: enable interrupt prior to kfd device_init

2024-01-24 Thread Le Ma
This patch is to eliminate interrupt warning below:

  "[drm] Fence fallback timer expired on ring sdma0.0".

An early vm pt clearing job is sent to SDMA ahead of interrupt enabled,
introduced by patch below:

  - drm/amdkfd: Export DMABufs from KFD using GEM handles

Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 56d9dfa61290..c8aa07282366 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2833,12 +2833,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
*adev)
if (r)
goto init_failed;
 
-   /* Don't init kfd if whole hive need to be reset during init */
-   if (!adev->gmc.xgmi.pending_reset) {
-   kgd2kfd_init_zone_device(adev);
-   amdgpu_amdkfd_device_init(adev);
-   }
-
amdgpu_fru_get_product_info(adev);
 
 init_failed:
@@ -4204,6 +4198,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
amdgpu_fence_driver_hw_init(adev);
 
+   /* Don't init kfd if whole hive need to be reset during init */
+   if (!adev->gmc.xgmi.pending_reset) {
+   kgd2kfd_init_zone_device(adev);
+   amdgpu_amdkfd_device_init(adev);
+   }
+
dev_info(adev->dev,
"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
adev->gfx.config.max_shader_engines,
-- 
2.38.1



[PATCH 2/3 v2] drm/amdgpu: add debug flag to place fw bo on vram for frontdoor loading

2024-01-09 Thread Le Ma
Use debug_mask=0x8 param to help isolating data path issues
on new systems in early phase.

v2: rename the flag for explicitness (lijo)

Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   | 6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 3 ++-
 4 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 616b6c911767..3d8a48f46b01 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1144,6 +1144,7 @@ struct amdgpu_device {
booldebug_vm;
booldebug_largebar;
booldebug_disable_soft_recovery;
+   booldebug_use_vram_fw_buf;
 };
 
 static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 880137774b4e..0776b0c5e4e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -128,6 +128,7 @@ enum AMDGPU_DEBUG_MASK {
AMDGPU_DEBUG_VM = BIT(0),
AMDGPU_DEBUG_LARGEBAR = BIT(1),
AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2),
+   AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3),
 };
 
 unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -2117,6 +2118,11 @@ static void amdgpu_init_debug_options(struct 
amdgpu_device *adev)
pr_info("debug: soft reset for GPU recovery disabled\n");
adev->debug_disable_soft_recovery = true;
}
+
+   if (amdgpu_debug_mask & AMDGPU_DEBUG_USE_VRAM_FW_BUF) {
+   pr_info("debug: place fw in vram for frontdoor loading\n");
+   adev->debug_use_vram_fw_buf = true;
+   }
 }
 
 static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long 
flags)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 51bfe3757c89..215994409ac1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -467,7 +467,7 @@ static int psp_sw_init(void *handle)
}
 
ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
- amdgpu_sriov_vf(adev) ?
+ (amdgpu_sriov_vf(adev) || 
adev->debug_use_vram_fw_buf) ?
  AMDGPU_GEM_DOMAIN_VRAM : 
AMDGPU_GEM_DOMAIN_GTT,
  &psp->fw_pri_bo,
  &psp->fw_pri_mc_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 0efb2568cb65..3e12763e477a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1062,7 +1062,8 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
 {
if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) {
amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
-   amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : 
AMDGPU_GEM_DOMAIN_GTT,
+   (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ?
+   AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
&adev->firmware.fw_buf,
&adev->firmware.fw_buf_mc,
&adev->firmware.fw_buf_ptr);
-- 
2.38.1



[PATCH 3/3] drm/amdgpu: move debug options init prior to amdgpu device init

2024-01-09 Thread Le Ma
To bring debug options into effect in early initialization phase

Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 3e0e39a1b5ba..b67ffc3a9a3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2234,6 +2234,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 
pci_set_drvdata(pdev, ddev);
 
+   amdgpu_init_debug_options(adev);
+
ret = amdgpu_driver_load_kms(adev, flags);
if (ret)
goto err_pci;
@@ -2314,8 +2316,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
amdgpu_get_secondary_funcs(adev);
}
 
-   amdgpu_init_debug_options(adev);
-
return 0;
 
 err_pci:
-- 
2.38.1



[PATCH 2/3] drm/amdgpu: add debug flag to change fw bo placement for frontdoor loading

2024-01-09 Thread Le Ma
se debug_mask=0x8 param to help isolating data path issues
on new systems in early phase.

Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   | 6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 3 ++-
 4 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 616b6c911767..c740825cf5dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1144,6 +1144,7 @@ struct amdgpu_device {
booldebug_vm;
booldebug_largebar;
booldebug_disable_soft_recovery;
+   booldebug_change_fw_placement;
 };
 
 static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 880137774b4e..3e0e39a1b5ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -128,6 +128,7 @@ enum AMDGPU_DEBUG_MASK {
AMDGPU_DEBUG_VM = BIT(0),
AMDGPU_DEBUG_LARGEBAR = BIT(1),
AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2),
+   AMDGPU_DEBUG_CHANGE_FW_PLACEMENT = BIT(3),
 };
 
 unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -2117,6 +2118,11 @@ static void amdgpu_init_debug_options(struct 
amdgpu_device *adev)
pr_info("debug: soft reset for GPU recovery disabled\n");
adev->debug_disable_soft_recovery = true;
}
+
+   if (amdgpu_debug_mask & AMDGPU_DEBUG_CHANGE_FW_PLACEMENT) {
+   pr_info("debug: place fw in vram for frontdoor loading\n");
+   adev->debug_change_fw_placement = true;
+   }
 }
 
 static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long 
flags)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 51bfe3757c89..a9f2d33fa54b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -467,7 +467,7 @@ static int psp_sw_init(void *handle)
}
 
ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
- amdgpu_sriov_vf(adev) ?
+ (amdgpu_sriov_vf(adev) || 
adev->debug_change_fw_placement) ?
  AMDGPU_GEM_DOMAIN_VRAM : 
AMDGPU_GEM_DOMAIN_GTT,
  &psp->fw_pri_bo,
  &psp->fw_pri_mc_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 0efb2568cb65..8d36a7c1c789 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1062,7 +1062,8 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
 {
if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) {
amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
-   amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : 
AMDGPU_GEM_DOMAIN_GTT,
+   (amdgpu_sriov_vf(adev) || 
adev->debug_change_fw_placement) ?
+   AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
&adev->firmware.fw_buf,
&adev->firmware.fw_buf_mc,
&adev->firmware.fw_buf_ptr);
-- 
2.38.1



[PATCH 1/3] Revert "drm/amdgpu: add param to specify fw bo location for front-door loading"

2024-01-09 Thread Le Ma
This reverts commit 1797d97a977280ac2cec95b1b59acc5856fea11e.

Will use debug module param instead of independent module param.

Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 2 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   | 5 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 3 +--
 4 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9da14436a373..616b6c911767 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -254,8 +254,6 @@ extern int amdgpu_agp;
 
 extern int amdgpu_wbrf;
 
-extern int fw_bo_location;
-
 #define AMDGPU_VM_MAX_NUM_CTX  4096
 #define AMDGPU_SG_THRESHOLD(256*1024*1024)
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 852cec98ff26..880137774b4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -210,7 +210,6 @@ int amdgpu_seamless = -1; /* auto */
 uint amdgpu_debug_mask;
 int amdgpu_agp = -1; /* auto */
 int amdgpu_wbrf = -1;
-int fw_bo_location = -1;
 
 static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
 
@@ -990,10 +989,6 @@ MODULE_PARM_DESC(wbrf,
"Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 
= auto(default)");
 module_param_named(wbrf, amdgpu_wbrf, int, 0444);
 
-MODULE_PARM_DESC(fw_bo_location,
-   "location to put firmware bo for frontdoor loading (-1 = auto 
(default), 0 = on ram, 1 = on vram");
-module_param(fw_bo_location, int, 0644);
-
 /* These devices are not supported by amdgpu.
  * They are supported by the mach64, r128, radeon drivers
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 0993ba4a4476..51bfe3757c89 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -467,7 +467,7 @@ static int psp_sw_init(void *handle)
}
 
ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
- (amdgpu_sriov_vf(adev) || fw_bo_location 
== 1) ?
+ amdgpu_sriov_vf(adev) ?
  AMDGPU_GEM_DOMAIN_VRAM : 
AMDGPU_GEM_DOMAIN_GTT,
  &psp->fw_pri_bo,
  &psp->fw_pri_mc_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index d334e42fe0eb..0efb2568cb65 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1062,8 +1062,7 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
 {
if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) {
amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
-   (amdgpu_sriov_vf(adev) || fw_bo_location == 1) ?
-   AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
+   amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : 
AMDGPU_GEM_DOMAIN_GTT,
&adev->firmware.fw_buf,
&adev->firmware.fw_buf_mc,
&adev->firmware.fw_buf_ptr);
-- 
2.38.1



[PATCH 1/1] drm/amdgpu: add param to specify fw bo location for front-door loading

2024-01-03 Thread Le Ma
This param can help isolating data path issues on new systems in early phase.

Change-Id: I0a972dd74fe2aad6b56628cea32ad72dcd17e283
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   | 5 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 3 ++-
 4 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 616b6c911767..9da14436a373 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -254,6 +254,8 @@ extern int amdgpu_agp;
 
 extern int amdgpu_wbrf;
 
+extern int fw_bo_location;
+
 #define AMDGPU_VM_MAX_NUM_CTX  4096
 #define AMDGPU_SG_THRESHOLD(256*1024*1024)
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 880137774b4e..852cec98ff26 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -210,6 +210,7 @@ int amdgpu_seamless = -1; /* auto */
 uint amdgpu_debug_mask;
 int amdgpu_agp = -1; /* auto */
 int amdgpu_wbrf = -1;
+int fw_bo_location = -1;
 
 static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
 
@@ -989,6 +990,10 @@ MODULE_PARM_DESC(wbrf,
"Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 
= auto(default)");
 module_param_named(wbrf, amdgpu_wbrf, int, 0444);
 
+MODULE_PARM_DESC(fw_bo_location,
+   "location to put firmware bo for frontdoor loading (-1 = auto 
(default), 0 = on ram, 1 = on vram");
+module_param(fw_bo_location, int, 0644);
+
 /* These devices are not supported by amdgpu.
  * They are supported by the mach64, r128, radeon drivers
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 1bf975b8d083..2addbdf88394 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -466,7 +466,7 @@ static int psp_sw_init(void *handle)
}
 
ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
- amdgpu_sriov_vf(adev) ?
+ (amdgpu_sriov_vf(adev) || fw_bo_location 
== 1) ?
  AMDGPU_GEM_DOMAIN_VRAM : 
AMDGPU_GEM_DOMAIN_GTT,
  &psp->fw_pri_bo,
  &psp->fw_pri_mc_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index b14127429f30..1f67914568f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1062,7 +1062,8 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
 {
if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) {
amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
-   amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : 
AMDGPU_GEM_DOMAIN_GTT,
+   (amdgpu_sriov_vf(adev) || fw_bo_location == 1) ?
+   AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
&adev->firmware.fw_buf,
&adev->firmware.fw_buf_mc,
&adev->firmware.fw_buf_ptr);
-- 
2.38.1



[PATCH 1/1] drm/amdgpu: finalizing mem_partitions at the end of GMC v9 sw_fini

2023-11-13 Thread Le Ma
The valid num_mem_partitions is required during ttm pool fini, thus move the
cleanup at the end of the function.

Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index bde25eb4ed8e..c1f2f166f064 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -2170,8 +2170,6 @@ static int gmc_v9_0_sw_fini(void *handle)
 
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3))
amdgpu_gmc_sysfs_fini(adev);
-   adev->gmc.num_mem_partitions = 0;
-   kfree(adev->gmc.mem_partitions);
 
amdgpu_gmc_ras_fini(adev);
amdgpu_gem_force_release(adev);
@@ -2185,6 +2183,9 @@ static int gmc_v9_0_sw_fini(void *handle)
amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0);
amdgpu_bo_fini(adev);
 
+   adev->gmc.num_mem_partitions = 0;
+   kfree(adev->gmc.mem_partitions);
+
return 0;
 }
 
-- 
2.38.1



[PATCH 1/1] drm/amd/pm: raise the deep sleep clock threshold for smu 13.0.6

2023-11-07 Thread Le Ma
The DS clock may exceed the limit as sclk dfll divider is 16 to target freq.

Signed-off-by: Le Ma 
Reviewed-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 20f66e696f87..83e1228e6eee 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -94,7 +94,7 @@ MODULE_FIRMWARE("amdgpu/smu_13_0_6.bin");
 #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0x5
 #define LINK_SPEED_MAX 4
 
-#define SMU_13_0_6_DSCLK_THRESHOLD 100
+#define SMU_13_0_6_DSCLK_THRESHOLD 140
 
 #define MCA_BANK_IPID(_ip, _hwid, _type) \
[AMDGPU_MCA_IP_##_ip] = { .hwid = _hwid, .mcatype = _type, }
-- 
2.38.1



[PATCH 1/1] drm/amd/pm: fix the print_clk_levels issue for SMU v13.0.6

2023-09-26 Thread Le Ma
Pass the correct size to smu_v13_0_6_print_clks, otherwise
the same place in buf will be re-written.

Change-Id: Ia0e12430d01146a11490204c1bab4b4f06cd17ea
Signed-off-by: Le Ma 
---
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 24 +--
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 11a6cd96c601..19c117eb5ebe 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -821,12 +821,12 @@ static int 
smu_v13_0_6_get_current_clk_freq_by_table(struct smu_context *smu,
return smu_v13_0_6_get_smu_metrics_data(smu, member_type, value);
 }
 
-static int smu_v13_0_6_print_clks(struct smu_context *smu, char *buf,
+static int smu_v13_0_6_print_clks(struct smu_context *smu, char *buf, int size,
  struct smu_13_0_dpm_table *single_dpm_table,
  uint32_t curr_clk, const char *clk_name)
 {
struct pp_clock_levels_with_latency clocks;
-   int i, ret, size = 0, level = -1;
+   int i, ret, level = -1;
uint32_t clk1, clk2;
 
ret = smu_v13_0_6_get_clk_table(smu, &clocks, single_dpm_table);
@@ -947,8 +947,8 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context 
*smu,
 
single_dpm_table = &(dpm_context->dpm_tables.uclk_table);
 
-   return smu_v13_0_6_print_clks(smu, buf, single_dpm_table, now,
- "mclk");
+   return smu_v13_0_6_print_clks(smu, buf, size, single_dpm_table,
+ now, "mclk");
 
case SMU_SOCCLK:
ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_SOCCLK,
@@ -961,8 +961,8 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context 
*smu,
 
single_dpm_table = &(dpm_context->dpm_tables.soc_table);
 
-   return smu_v13_0_6_print_clks(smu, buf, single_dpm_table, now,
- "socclk");
+   return smu_v13_0_6_print_clks(smu, buf, size, single_dpm_table,
+ now, "socclk");
 
case SMU_FCLK:
ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_FCLK,
@@ -975,8 +975,8 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context 
*smu,
 
single_dpm_table = &(dpm_context->dpm_tables.fclk_table);
 
-   return smu_v13_0_6_print_clks(smu, buf, single_dpm_table, now,
- "fclk");
+   return smu_v13_0_6_print_clks(smu, buf, size, single_dpm_table,
+ now, "fclk");
 
case SMU_VCLK:
ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_VCLK,
@@ -989,8 +989,8 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context 
*smu,
 
single_dpm_table = &(dpm_context->dpm_tables.vclk_table);
 
-   return smu_v13_0_6_print_clks(smu, buf, single_dpm_table, now,
- "vclk");
+   return smu_v13_0_6_print_clks(smu, buf, size, single_dpm_table,
+ now, "vclk");
 
case SMU_DCLK:
ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_DCLK,
@@ -1003,8 +1003,8 @@ static int smu_v13_0_6_print_clk_levels(struct 
smu_context *smu,
 
single_dpm_table = &(dpm_context->dpm_tables.dclk_table);
 
-   return smu_v13_0_6_print_clks(smu, buf, single_dpm_table, now,
- "dclk");
+   return smu_v13_0_6_print_clks(smu, buf, size, single_dpm_table,
+ now, "dclk");
 
default:
break;
-- 
2.38.1



[PATCH 7/7] drm/amd/pm: deprecate allow_xgmi_power_down interface

2023-09-24 Thread Le Ma
Replace with set_plpd_mode uniformly for places to use.

Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c  |  4 ++--
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c   | 14 --
 drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h   |  2 --
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 17 -
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  8 
 5 files changed, 2 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 061534e845a7..1fb3f1ecfa7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -1072,7 +1072,7 @@ static int amdgpu_ras_error_inject_xgmi(struct 
amdgpu_device *adev,
if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
dev_warn(adev->dev, "Failed to disallow df cstate");
 
-   if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
+   if (amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DISALLOW))
dev_warn(adev->dev, "Failed to disallow XGMI power down");
 
ret = psp_ras_trigger_error(&adev->psp, block_info, instance_mask);
@@ -1080,7 +1080,7 @@ static int amdgpu_ras_error_inject_xgmi(struct 
amdgpu_device *adev,
if (amdgpu_ras_intr_triggered())
return ret;
 
-   if (amdgpu_dpm_allow_xgmi_power_down(adev, true))
+   if (amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DEFAULT))
dev_warn(adev->dev, "Failed to allow XGMI power down");
 
if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 9a157fe4cbc7..1b17a71ed45e 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -351,20 +351,6 @@ int amdgpu_dpm_set_df_cstate(struct amdgpu_device *adev,
return ret;
 }
 
-int amdgpu_dpm_allow_xgmi_power_down(struct amdgpu_device *adev, bool en)
-{
-   struct smu_context *smu = adev->powerplay.pp_handle;
-   int ret = 0;
-
-   if (is_support_sw_smu(adev)) {
-   mutex_lock(&adev->pm.mutex);
-   ret = smu_allow_xgmi_power_down(smu, en);
-   mutex_unlock(&adev->pm.mutex);
-   }
-
-   return ret;
-}
-
 int amdgpu_dpm_get_xgmi_plpd_mode(struct amdgpu_device *adev, char **mode_desc)
 {
struct smu_context *smu = adev->powerplay.pp_handle;
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h 
b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index 564494f29717..feccd2a7120d 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -424,8 +424,6 @@ int amdgpu_dpm_baco_enter(struct amdgpu_device *adev);
 int amdgpu_dpm_set_df_cstate(struct amdgpu_device *adev,
 uint32_t cstate);
 
-int amdgpu_dpm_allow_xgmi_power_down(struct amdgpu_device *adev, bool en);
-
 int amdgpu_dpm_get_xgmi_plpd_mode(struct amdgpu_device *adev,
  char **mode);
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 1c6b22638bf4..33eaf0d77163 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -2178,23 +2178,6 @@ static int smu_set_df_cstate(void *handle,
return ret;
 }
 
-int smu_allow_xgmi_power_down(struct smu_context *smu, bool en)
-{
-   int ret = 0;
-
-   if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
-   return -EOPNOTSUPP;
-
-   if (!smu->ppt_funcs || !smu->ppt_funcs->allow_xgmi_power_down)
-   return 0;
-
-   ret = smu->ppt_funcs->allow_xgmi_power_down(smu, en);
-   if (ret)
-   dev_err(smu->adev->dev, "[AllowXgmiPowerDown] failed!\n");
-
-   return ret;
-}
-
 int smu_write_watermarks_table(struct smu_context *smu)
 {
if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index e17169f681e8..4f6df3558b9b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -834,12 +834,6 @@ struct pptable_funcs {
 */
int (*set_df_cstate)(struct smu_context *smu, enum pp_df_cstate state);
 
-   /**
-* @allow_xgmi_power_down: Enable/disable external global memory
-* interconnect power down.
-*/
-   int (*allow_xgmi_power_down)(struct smu_context *smu, bool en);
-
/**
 * @select_xgmi_plpd_policy: Select xgmi per-link power down policy.
 */
@@ -1491,8 +1485,6 @@ int smu_set_gfx_power_up_by_imu(struct smu_context *smu);
 
 int smu_set_ac_dc(struct smu_context *smu);
 
-int smu_allow_xgmi_power_down(struct smu_context *smu, bool

[PATCH 6/7] drm/amd/pm: integrate plpd allow/disallow into select_xgmi_plpd_policy in ppt level

2023-09-24 Thread Le Ma
The allow_xgmi_power_down(true/false) will be generally replaced by:
  - allow: select_xgmi_plpd_policy(XGMI_PLPD_DEFAULT)
  - disallow: select_xgmi_plpd_policy(XGMI_PLPD_DISALLOW)

Signed-off-by: Le Ma 
---
 .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 21 +
 .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c| 23 ---
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 22 ++
 3 files changed, 38 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index 080140a0f673..6e2e665ad383 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -2227,7 +2227,8 @@ static int arcturus_set_df_cstate(struct smu_context *smu,
return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_DFCstateControl, 
state, NULL);
 }
 
-static int arcturus_allow_xgmi_power_down(struct smu_context *smu, bool en)
+static int arcturus_select_xgmi_plpd_policy(struct smu_context *smu,
+   enum pp_xgmi_plpd_mode mode)
 {
uint32_t smu_version;
int ret;
@@ -2244,16 +2245,16 @@ static int arcturus_allow_xgmi_power_down(struct 
smu_context *smu, bool en)
return -EINVAL;
}
 
-   if (en)
+   if (mode == XGMI_PLPD_DEFAULT)
return smu_cmn_send_smc_msg_with_param(smu,
   SMU_MSG_GmiPwrDnControl,
-  1,
-  NULL);
-
-   return smu_cmn_send_smc_msg_with_param(smu,
-  SMU_MSG_GmiPwrDnControl,
-  0,
-  NULL);
+  1, NULL);
+   else if (mode == XGMI_PLPD_DISALLOW)
+   return smu_cmn_send_smc_msg_with_param(smu,
+  SMU_MSG_GmiPwrDnControl,
+  0, NULL);
+   else
+   return -EINVAL;
 }
 
 static const struct throttling_logging_label {
@@ -2455,7 +2456,7 @@ static const struct pptable_funcs arcturus_ppt_funcs = {
.get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq,
.set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range,
.set_df_cstate = arcturus_set_df_cstate,
-   .allow_xgmi_power_down = arcturus_allow_xgmi_power_down,
+   .select_xgmi_plpd_policy = arcturus_select_xgmi_plpd_policy,
.log_thermal_throttling_event = arcturus_log_thermal_throttling_event,
.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
.set_pp_feature_mask = smu_cmn_set_pp_feature_mask,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index 23820204efd7..b57184a3e24f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -1604,20 +1604,27 @@ static int aldebaran_set_df_cstate(struct smu_context 
*smu,
return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_DFCstateControl, 
state, NULL);
 }
 
-static int aldebaran_allow_xgmi_power_down(struct smu_context *smu, bool en)
+static int aldebaran_select_xgmi_plpd_policy(struct smu_context *smu,
+enum pp_xgmi_plpd_mode mode)
 {
struct amdgpu_device *adev = smu->adev;
 
/* The message only works on master die and NACK will be sent
   back for other dies, only send it on master die */
-   if (!adev->smuio.funcs->get_socket_id(adev) &&
-   !adev->smuio.funcs->get_die_id(adev))
+   if (adev->smuio.funcs->get_socket_id(adev) ||
+   adev->smuio.funcs->get_die_id(adev))
+   return 0;
+
+   if (mode == XGMI_PLPD_DEFAULT)
+   return smu_cmn_send_smc_msg_with_param(smu,
+  SMU_MSG_GmiPwrDnControl,
+  0, NULL);
+   else if (mode == XGMI_PLPD_DISALLOW)
return smu_cmn_send_smc_msg_with_param(smu,
-  SMU_MSG_GmiPwrDnControl,
-  en ? 0 : 1,
-  NULL);
+  SMU_MSG_GmiPwrDnControl,
+  1, NULL);
else
-   return 0;
+   return -EINVAL;
 }
 
 static const struct throttling_logging_label {
@@ -2072,7 +2079,7 @@ static const struct pptable_funcs aldebaran_ppt_funcs = {
.set_soft_freq_limited_range = aldebaran_set_soft_freq_limited_range,
.od_edit_dpm_table = aldebaran_usr_edit_dpm_table,
.set_

[PATCH 4/7] drm/amd/pm: add xgmi_plpd_policy sysfs node for user to change plpd policy

2023-09-24 Thread Le Ma
Add xgmi_plpd_policy sysfs node for users to check and select xgmi
per-link power down policy:
  - arg 0: disallow plpd
  - arg 1: default policy
  - arg 2: optimized policy

v2: split from smu v13.0.6 code and miscellaneous updates
v3: add usage comments around set/get functions

Signed-off-by: Le Ma 
Reviewed-by: Asad Kamal 
---
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 43 
 drivers/gpu/drm/amd/pm/amdgpu_pm.c  | 68 +
 drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h |  5 ++
 3 files changed, 116 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 07853162..9a157fe4cbc7 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -365,6 +365,49 @@ int amdgpu_dpm_allow_xgmi_power_down(struct amdgpu_device 
*adev, bool en)
return ret;
 }
 
+int amdgpu_dpm_get_xgmi_plpd_mode(struct amdgpu_device *adev, char **mode_desc)
+{
+   struct smu_context *smu = adev->powerplay.pp_handle;
+   int mode = XGMI_PLPD_NONE;
+
+   if (is_support_sw_smu(adev)) {
+   mode = smu->plpd_mode;
+   if (mode_desc == NULL)
+   return mode;
+   switch (smu->plpd_mode) {
+   case XGMI_PLPD_DISALLOW:
+   *mode_desc = "disallow";
+   break;
+   case XGMI_PLPD_DEFAULT:
+   *mode_desc = "default";
+   break;
+   case XGMI_PLPD_OPTIMIZED:
+   *mode_desc = "optimized";
+   break;
+   case XGMI_PLPD_NONE:
+   default:
+   *mode_desc = "none";
+   break;
+   }
+   }
+
+   return mode;
+}
+
+int amdgpu_dpm_set_xgmi_plpd_mode(struct amdgpu_device *adev, int mode)
+{
+   struct smu_context *smu = adev->powerplay.pp_handle;
+   int ret = -EOPNOTSUPP;
+
+   if (is_support_sw_smu(adev)) {
+   mutex_lock(&adev->pm.mutex);
+   ret = smu_set_xgmi_plpd_mode(smu, mode);
+   mutex_unlock(&adev->pm.mutex);
+   }
+
+   return ret;
+}
+
 int amdgpu_dpm_enable_mgpu_fan_boost(struct amdgpu_device *adev)
 {
void *pp_handle = adev->powerplay.pp_handle;
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 2807f3bd9ebe..745f5b6a2826 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -1991,6 +1991,70 @@ static int ss_bias_attr_update(struct amdgpu_device 
*adev, struct amdgpu_device_
return 0;
 }
 
+/* Following items will be read out to indicate current plpd policy:
+ *  - -1: none
+ *  - 0: disallow
+ *  - 1: default
+ *  - 2: optimized
+ */
+static ssize_t amdgpu_get_xgmi_plpd_policy(struct device *dev,
+  struct device_attribute *attr,
+  char *buf)
+{
+   struct drm_device *ddev = dev_get_drvdata(dev);
+   struct amdgpu_device *adev = drm_to_adev(ddev);
+   char *mode_desc = "none";
+   int mode;
+
+   if (amdgpu_in_reset(adev))
+   return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
+
+   mode = amdgpu_dpm_get_xgmi_plpd_mode(adev, &mode_desc);
+
+   return sysfs_emit(buf, "%d: %s\n", mode, mode_desc);
+}
+
+/* Following argument value is expected from user to change plpd policy
+ *  - arg 0: disallow plpd
+ *  - arg 1: default policy
+ *  - arg 2: optimized policy
+ */
+static ssize_t amdgpu_set_xgmi_plpd_policy(struct device *dev,
+  struct device_attribute *attr,
+  const char *buf, size_t count)
+{
+   struct drm_device *ddev = dev_get_drvdata(dev);
+   struct amdgpu_device *adev = drm_to_adev(ddev);
+   int mode, ret;
+
+   if (amdgpu_in_reset(adev))
+   return -EPERM;
+   if (adev->in_suspend && !adev->in_runpm)
+   return -EPERM;
+
+   ret = kstrtos32(buf, 0, &mode);
+   if (ret)
+   return -EINVAL;
+
+   ret = pm_runtime_get_sync(ddev->dev);
+   if (ret < 0) {
+   pm_runtime_put_autosuspend(ddev->dev);
+   return ret;
+   }
+
+   ret = amdgpu_dpm_set_xgmi_plpd_mode(adev, mode);
+
+   pm_runtime_mark_last_busy(ddev->dev);
+   pm_runtime_put_autosuspend(ddev->dev);
+
+   if (ret)
+   return ret;
+
+   return count;
+}
+
 static struct amdgpu_device_attr amdgpu_device_attrs[] = {
AMDGPU_DEVICE_ATTR_RW(power_dpm_state,  
ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
AMDGPU_DEVICE_ATTR_RW(power_dpm_f

[PATCH 5/7] drm/amd/pm: init plpd_mode properly for different asics

2023-09-24 Thread Le Ma
Assign DEFAULT mode if it supports plpd, otherwise keeps NONE

v2: reduce ip version checks

Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 893359b26418..1c6b22638bf4 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1130,6 +1130,21 @@ static void smu_swctf_delayed_work_handler(struct 
work_struct *work)
orderly_poweroff(true);
 }
 
+static void smu_init_xgmi_plpd_mode(struct smu_context *smu)
+{
+   if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(11, 0, 2)) {
+   smu->plpd_mode = XGMI_PLPD_DEFAULT;
+   return;
+   }
+
+   /* PMFW put PLPD into default policy after enabling the feature */
+   if (smu_feature_is_enabled(smu,
+  SMU_FEATURE_XGMI_PER_LINK_PWR_DWN_BIT))
+   smu->plpd_mode = XGMI_PLPD_DEFAULT;
+   else
+   smu->plpd_mode = XGMI_PLPD_NONE;
+}
+
 static int smu_sw_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1361,6 +1376,8 @@ static int smu_smc_hw_setup(struct smu_context *smu)
return ret;
}
 
+   smu_init_xgmi_plpd_mode(smu);
+
ret = smu_feature_get_enabled_mask(smu, &features_supported);
if (ret) {
dev_err(adev->dev, "Failed to retrieve supported dpm 
features!\n");
-- 
2.38.1



[PATCH 3/7] drm/amd/pm: add xgmi plpd mode selecting interface for smu v13.0.6

2023-09-24 Thread Le Ma
Add the interface to change xgmi per-link power down policy.

v2: split from sysfs interface code and miscellaneous updates
v3: check against XGMI_PLPD_DEFAULT/XGMI_PLPD_OPTIMIZED and pass PPSMC param

Signed-off-by: Le Ma 
Reviewed-by: Asad Kamal 
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 24 ++
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  9 ++
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |  3 +-
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 31 +++
 4 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index e6f1620acdd4..893359b26418 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -3157,6 +3157,30 @@ static int smu_get_prv_buffer_details(void *handle, void 
**addr, size_t *size)
return 0;
 }
 
+int smu_set_xgmi_plpd_mode(struct smu_context *smu,
+  enum pp_xgmi_plpd_mode mode)
+{
+   int ret = -EOPNOTSUPP;
+
+   if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
+   return ret;
+
+   /* PLPD policy is not supported if it's NONE */
+   if (smu->plpd_mode == XGMI_PLPD_NONE)
+   return ret;
+
+   if (smu->plpd_mode == mode)
+   return 0;
+
+   if (smu->ppt_funcs && smu->ppt_funcs->select_xgmi_plpd_policy)
+   ret = smu->ppt_funcs->select_xgmi_plpd_policy(smu, mode);
+
+   if (!ret)
+   smu->plpd_mode = mode;
+
+   return ret;
+}
+
 static const struct amd_pm_funcs swsmu_pm_funcs = {
/* export for sysfs */
.set_fan_control_mode= smu_set_fan_control_mode,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 5356b91c6292..e17169f681e8 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -840,6 +840,12 @@ struct pptable_funcs {
 */
int (*allow_xgmi_power_down)(struct smu_context *smu, bool en);
 
+   /**
+* @select_xgmi_plpd_policy: Select xgmi per-link power down policy.
+*/
+   int (*select_xgmi_plpd_policy)(struct smu_context *smu,
+  enum pp_xgmi_plpd_mode mode);
+
/**
 * @update_pcie_parameters: Update and upload the system's PCIe
 *  capabilites to the SMU.
@@ -1487,6 +1493,9 @@ int smu_set_ac_dc(struct smu_context *smu);
 
 int smu_allow_xgmi_power_down(struct smu_context *smu, bool en);
 
+int smu_set_xgmi_plpd_mode(struct smu_context *smu,
+  enum pp_xgmi_plpd_mode mode);
+
 int smu_get_entrycount_gfxoff(struct smu_context *smu, u64 *value);
 
 int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index 7c300b4d95c7..4850e48bbef5 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -252,7 +252,8 @@
__SMU_DUMMY_MAP(QueryValidMcaCount),\
__SMU_DUMMY_MAP(QueryValidMcaCeCount),  \
__SMU_DUMMY_MAP(McaBankDumpDW), \
-   __SMU_DUMMY_MAP(McaBankCeDumpDW),
+   __SMU_DUMMY_MAP(McaBankCeDumpDW),   \
+   __SMU_DUMMY_MAP(SelectPLPDMode),
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)  SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 11a6cd96c601..b137c37903fc 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -170,6 +170,7 @@ static const struct cmn2asic_msg_mapping 
smu_v13_0_6_message_map[SMU_MSG_MAX_COU
MSG_MAP(QueryValidMcaCeCount,
PPSMC_MSG_QueryValidMcaCeCount,0),
MSG_MAP(McaBankDumpDW,   PPSMC_MSG_McaBankDumpDW,   
0),
MSG_MAP(McaBankCeDumpDW, PPSMC_MSG_McaBankCeDumpDW, 
0),
+   MSG_MAP(SelectPLPDMode,  PPSMC_MSG_SelectPLPDMode,  
0),
 };
 
 static const struct cmn2asic_mapping smu_v13_0_6_clk_map[SMU_CLK_COUNT] = {
@@ -2716,6 +2717,35 @@ static const struct amdgpu_mca_smu_funcs 
smu_v13_0_6_mca_smu_funcs = {
.mca_get_ras_mca_idx_array = mca_smu_get_ras_mca_idx_array,
 };
 
+static int smu_v13_0_6_select_xgmi_plpd_policy(struct smu_context *smu,
+  enum pp_xgmi_plpd_mode mode)
+{
+   struct amdgpu_device *adev = smu->adev;
+   int ret, param;
+
+   switch (mode) {
+   case XGMI_PLPD_DEFAULT:
+   param = PPSMC_PLPD_MODE_DEFAULT;
+   break;
+   case XGMI_PLPD_OPTIMIZED:
+   pa

[PATCH 2/7] drm/amd/pm: add plpd_mode in smu_context to indicate current mode

2023-09-24 Thread Le Ma
Add enum pp_xgmi_plpd_mode to describe PLPD policies.

v2: move the enum from amdgpu_smu.h to kgd_pp_interface.h

Signed-off-by: Le Ma 
Reviewed-by: Asad Kamal 
---
 drivers/gpu/drm/amd/include/kgd_pp_interface.h | 8 
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h  | 2 ++
 2 files changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 5a889f733462..e0bb6d39f0c3 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -265,6 +265,14 @@ enum pp_power_type
PP_PWR_TYPE_FAST,
 };
 
+enum pp_xgmi_plpd_mode {
+   XGMI_PLPD_NONE = -1,
+   XGMI_PLPD_DISALLOW,
+   XGMI_PLPD_DEFAULT,
+   XGMI_PLPD_OPTIMIZED,
+   XGMI_PLPD_COUNT,
+};
+
 #define PP_GROUP_MASK0xF000
 #define PP_GROUP_SHIFT   28
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 5a52098bcf16..5356b91c6292 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -563,6 +563,8 @@ struct smu_context {
u32 debug_resp_reg;
 
struct delayed_work swctf_delayed_work;
+
+   enum pp_xgmi_plpd_mode plpd_mode;
 };
 
 struct i2c_adapter;
-- 
2.38.1



[PATCH 1/7] drm/amd/pm: update pmfw headers for version 85.73.0

2023-09-24 Thread Le Ma
To add message to select PLPD mode.

Signed-off-by: Le Ma 
Reviewed-by: Asad Kamal 
---
 drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
index 021dcbe58473..509e3cd483fb 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
@@ -90,6 +90,7 @@
 #define PPSMC_MSG_ClearMcaOnRead0x39
 #define PPSMC_MSG_QueryValidMcaCeCount  0x3A
 #define PPSMC_MSG_McaBankCeDumpDW   0x3B
+#define PPSMC_MSG_SelectPLPDMode0x40
 #define PPSMC_Message_Count 0x41
 
 //PPSMC Reset Types for driver msg argument
@@ -107,6 +108,10 @@
 #define PPSMC_XCD_THM_TYPE  0x3
 #define PPSMC_HBM_THM_TYPE  0x4
 
+//PLPD modes
+#define PPSMC_PLPD_MODE_DEFAULT 0x1
+#define PPSMC_PLPD_MODE_OPTIMIZED   0x2
+
 typedef uint32_t PPSMC_Result;
 typedef uint32_t PPSMC_MSG;
 
-- 
2.38.1



[PATCH 2/2] drm/amdgpu: update gc_info v2_1 from discovery

2023-08-16 Thread Le Ma
Several new fields are exposed in gc_info v2_1

Change-Id: Ib320e8ca70de8960634e1a22aaf2611ba7ebbaeb
Signed-off-by: Le Ma 
Reviewed-by: Shiwu Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 10 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |  3 ++
 drivers/gpu/drm/amd/include/discovery.h   | 30 +++
 3 files changed, 43 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index f4cd43ce251b..6ffdfc5a7c0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -1390,6 +1390,7 @@ union gc_info {
struct gc_info_v1_1 v1_1;
struct gc_info_v1_2 v1_2;
struct gc_info_v2_0 v2;
+   struct gc_info_v2_1 v2_1;
 };
 
 static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
@@ -1465,6 +1466,15 @@ static int amdgpu_discovery_get_gfx_info(struct 
amdgpu_device *adev)
adev->gfx.config.num_sc_per_sh = 
le32_to_cpu(gc_info->v2.gc_num_sc_per_se) /
le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
adev->gfx.config.num_packer_per_sc = 
le32_to_cpu(gc_info->v2.gc_num_packer_per_sc);
+   if (gc_info->v2.header.version_minor == 1) {
+   adev->gfx.config.gc_num_tcp_per_sa = 
le32_to_cpu(gc_info->v2_1.gc_num_tcp_per_sh);
+   adev->gfx.config.gc_tcp_size_per_cu = 
le32_to_cpu(gc_info->v2_1.gc_tcp_size_per_cu);
+   adev->gfx.config.gc_num_sdp_interface = 
le32_to_cpu(gc_info->v2_1.gc_num_sdp_interface); /* per XCD */
+   adev->gfx.config.gc_num_cu_per_sqc = 
le32_to_cpu(gc_info->v2_1.gc_num_cu_per_sqc);
+   adev->gfx.config.gc_l1_instruction_cache_size_per_sqc = 
le32_to_cpu(gc_info->v2_1.gc_instruction_cache_size_per_sqc);
+   adev->gfx.config.gc_l1_data_cache_size_per_sqc = 
le32_to_cpu(gc_info->v2_1.gc_scalar_data_cache_size_per_sqc);
+   adev->gfx.config.gc_tcc_size = 
le32_to_cpu(gc_info->v2_1.gc_tcc_size); /* per XCD */
+   }
break;
default:
dev_err(adev->dev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index a4ff515ce896..395c1768b9fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -241,6 +241,9 @@ struct amdgpu_gfx_config {
uint32_t gc_gl1c_per_sa;
uint32_t gc_gl1c_size_per_instance;
uint32_t gc_gl2c_per_gpu;
+   uint32_t gc_tcp_size_per_cu;
+   uint32_t gc_num_cu_per_sqc;
+   uint32_t gc_tcc_size;
 };
 
 struct amdgpu_cu_info {
diff --git a/drivers/gpu/drm/amd/include/discovery.h 
b/drivers/gpu/drm/amd/include/discovery.h
index b9884e576f98..7a9d473d0917 100644
--- a/drivers/gpu/drm/amd/include/discovery.h
+++ b/drivers/gpu/drm/amd/include/discovery.h
@@ -280,6 +280,36 @@ struct gc_info_v2_0 {
uint32_t gc_num_packer_per_sc;
 };
 
+struct gc_info_v2_1 {
+   struct gpu_info_header header;
+
+   uint32_t gc_num_se;
+   uint32_t gc_num_cu_per_sh;
+   uint32_t gc_num_sh_per_se;
+   uint32_t gc_num_rb_per_se;
+   uint32_t gc_num_tccs;
+   uint32_t gc_num_gprs;
+   uint32_t gc_num_max_gs_thds;
+   uint32_t gc_gs_table_depth;
+   uint32_t gc_gsprim_buff_depth;
+   uint32_t gc_parameter_cache_depth;
+   uint32_t gc_double_offchip_lds_buffer;
+   uint32_t gc_wave_size;
+   uint32_t gc_max_waves_per_simd;
+   uint32_t gc_max_scratch_slots_per_cu;
+   uint32_t gc_lds_size;
+   uint32_t gc_num_sc_per_se;
+   uint32_t gc_num_packer_per_sc;
+   /* new for v2_1 */
+   uint32_t gc_num_tcp_per_sh;
+   uint32_t gc_tcp_size_per_cu;
+   uint32_t gc_num_sdp_interface;
+   uint32_t gc_num_cu_per_sqc;
+   uint32_t gc_instruction_cache_size_per_sqc;
+   uint32_t gc_scalar_data_cache_size_per_sqc;
+   uint32_t gc_tcc_size;
+};
+
 typedef struct harvest_info_header {
uint32_t signature; /* Table Signature */
uint32_t version;   /* Table Version */
-- 
2.38.1



[PATCH 1/2] drm/amdgpu: update mall info v2 from discovery

2023-08-16 Thread Le Ma
Mall info v2 is introduced in ip discovery

Change-Id: Ia2e49e7679c578065f85059a077fc08c9f84615c
Signed-off-by: Le Ma 
Reviewed-by: Shiwu Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 5 +
 drivers/gpu/drm/amd/include/discovery.h   | 8 +++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 9d8d08daca57..f4cd43ce251b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -1478,6 +1478,7 @@ static int amdgpu_discovery_get_gfx_info(struct 
amdgpu_device *adev)
 
 union mall_info {
struct mall_info_v1_0 v1;
+   struct mall_info_v2_0 v2;
 };
 
 static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
@@ -1518,6 +1519,10 @@ static int amdgpu_discovery_get_mall_info(struct 
amdgpu_device *adev)
adev->gmc.mall_size = mall_size;
adev->gmc.m_half_use = half_use;
break;
+   case 2:
+   mall_size_per_umc = 
le32_to_cpu(mall_info->v2.mall_size_per_umc);
+   adev->gmc.mall_size = mall_size_per_umc * adev->gmc.num_umc;
+   break;
default:
dev_err(adev->dev,
"Unhandled MALL info table %d.%d\n",
diff --git a/drivers/gpu/drm/amd/include/discovery.h 
b/drivers/gpu/drm/amd/include/discovery.h
index f43e29722ef7..b9884e576f98 100644
--- a/drivers/gpu/drm/amd/include/discovery.h
+++ b/drivers/gpu/drm/amd/include/discovery.h
@@ -30,7 +30,7 @@
 #define GC_TABLE_ID 0x4347
 #define HARVEST_TABLE_SIGNATURE 0x56524148
 #define VCN_INFO_TABLE_ID   0x004E4356
-#define MALL_INFO_TABLE_ID  0x4D414C4C
+#define MALL_INFO_TABLE_ID  0x4C4C414D
 
 typedef enum
 {
@@ -312,6 +312,12 @@ struct mall_info_v1_0 {
uint32_t reserved[5];
 };
 
+struct mall_info_v2_0 {
+   struct mall_info_header header;
+   uint32_t mall_size_per_umc;
+   uint32_t reserved[8];
+};
+
 #define VCN_INFO_TABLE_MAX_NUM_INSTANCES 4
 
 struct vcn_info_header {
-- 
2.38.1



[PATCH 1/1] drm/amdgpu: remove duplicated doorbell range init for sdma v4.4.2

2023-06-26 Thread Le Ma
Handled in earlier phase

Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 6be19ffc502b..f413898dda37 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -902,11 +902,6 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device 
*adev,
WREG32_SDMA(i, regSDMA_CNTL, temp);
 
if (!amdgpu_sriov_vf(adev)) {
-   ring = &adev->sdma.instance[i].ring;
-   adev->nbio.funcs->sdma_doorbell_range(adev, i,
-   ring->use_doorbell, ring->doorbell_index,
-   adev->doorbell_index.sdma_doorbell_range);
-
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
/* unhalt engine */
temp = RREG32_SDMA(i, regSDMA_F32_CNTL);
-- 
2.38.1



[PATCH 1/1] drm/amdgpu/pm: notify driver unloading to PMFW for SMU v13.0.6 dGPU

2023-06-01 Thread Le Ma
Per requested, follow the same sequence as APU to send only
PPSMC_MSG_PrepareForDriverUnload to PMFW during driver unloading.

Change-Id: I2dc8495572b0bce6e21eafb51b215c83d94ac647
Signed-off-by: Le Ma 
Reviewed-by: Shiwu Zhang 
Reviewed-by: Lijo Lazar 
---
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 3da614faf75d..392ccebc8dac 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -1409,18 +1409,16 @@ static int smu_v13_0_6_system_features_control(struct 
smu_context *smu,
  bool enable)
 {
struct amdgpu_device *adev = smu->adev;
-   int ret;
-
-   /* On APUs, notify FW that the device is no longer driver managed */
-   if (adev->flags & AMD_IS_APU) {
-   if (!enable)
-   smu_v13_0_6_notify_unload(smu);
+   int ret = 0;
 
-   return 0;
+   if (enable) {
+   if (!(adev->flags & AMD_IS_APU))
+   ret = smu_v13_0_system_features_control(smu, enable);
+   } else {
+   /* Notify FW that the device is no longer driver managed */
+   smu_v13_0_6_notify_unload(smu);
}
 
-   ret = smu_v13_0_system_features_control(smu, enable);
-
return ret;
 }
 
-- 
2.38.1



[PATCH] drm/amdgpu: correct the memcpy size for ip discovery firmware

2022-09-29 Thread Le Ma
Use fw->size instead of discovery_tmr_size for fallback path.

Change-Id: I61f1ec55314ea5948ed3ef821becfdd63d876272
Signed-off-by: Le Ma 
Acked-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 309d35026222..0b4f4d2f8d32 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -234,7 +234,7 @@ static int amdgpu_discovery_read_binary_from_file(struct 
amdgpu_device *adev, ui
return r;
}
 
-   memcpy((u8 *)binary, (u8 *)fw->data, adev->mman.discovery_tmr_size);
+   memcpy((u8 *)binary, (u8 *)fw->data, fw->size);
release_firmware(fw);
 
return 0;
-- 
2.17.1



[PATCH] drm/amdgpu: correct register access for RLC_JUMP_TABLE_RESTORE

2021-12-06 Thread Le Ma
From: Le Ma 

should count on GC IP base address

Signed-off-by: Le Ma 
Signed-off-by: Hawking Zhang 
Reviewed-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index b305fd39874f..edb3e3b08eed 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3070,8 +3070,8 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
  AMD_PG_SUPPORT_CP |
  AMD_PG_SUPPORT_GDS |
  AMD_PG_SUPPORT_RLC_SMU_HS)) {
-   WREG32(mmRLC_JUMP_TABLE_RESTORE,
-  adev->gfx.rlc.cp_table_gpu_addr >> 8);
+   WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
+adev->gfx.rlc.cp_table_gpu_addr >> 8);
gfx_v9_0_init_gfx_power_gating(adev);
}
 }
-- 
2.17.1



[PATCH 1/1] drm/amdgpu: fix ctx init failure for asics without gfx ring

2019-12-19 Thread Le Ma
This workaround does not affect other asics because amdgpu only need expose
one gfx sched to user for now.

Change-Id: Ica92b8565a89899aebe0eba7b2b5a25159b411d3
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 63f6365..64e2bab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -127,7 +127,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 
switch (i) {
case AMDGPU_HW_IP_GFX:
-   scheds = adev->gfx.gfx_sched;
+   sched = &adev->gfx.gfx_ring[0].sched;
+   scheds = &sched;
num_scheds = 1;
break;
case AMDGPU_HW_IP_COMPUTE:
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 07/10 v2] drm/amdgpu: add concurrent baco reset support for XGMI

2019-11-27 Thread Le Ma
Currently each XGMI node reset wq does not run in parrallel because same work
item bound to same cpu runs in sequence. So change to bound the xgmi_reset_work
item to different cpus.

XGMI requires all nodes enter into baco within very close proximity before
any node exit baco. So schedule the xgmi_reset_work wq twice for enter/exit
baco respectively.

To use baco for XGMI, PMFW supported for baco on XGMI needs to be involved.

The case that PSP reset and baco reset coexist within an XGMI hive never exist
and is not in the consideration.

v2: define use_baco flag to simplify the code for xgmi baco sequence

Change-Id: I9c08cf90134f940b42e20d2129ff87fba761c532
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 82 +-
 2 files changed, 72 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d120fe5..08929e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -998,6 +998,8 @@ struct amdgpu_device {
int pstate;
/* enable runtime pm on the device */
boolrunpm;
+
+   boolin_baco;
 };
 
 static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bd387bb..5367134 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2654,7 +2654,13 @@ static void amdgpu_device_xgmi_reset_func(struct 
work_struct *__work)
struct amdgpu_device *adev =
container_of(__work, struct amdgpu_device, xgmi_reset_work);
 
-   adev->asic_reset_res =  amdgpu_asic_reset(adev);
+   if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)
+   adev->asic_reset_res = (adev->in_baco == false) ?
+   amdgpu_device_baco_enter(adev->ddev) :
+   amdgpu_device_baco_exit(adev->ddev);
+   else
+   adev->asic_reset_res = amdgpu_asic_reset(adev);
+
if (adev->asic_reset_res)
DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
 adev->asic_reset_res, adev->ddev->unique);
@@ -3789,13 +3795,18 @@ static int amdgpu_device_pre_asic_reset(struct 
amdgpu_device *adev,
return r;
 }
 
-static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
+static int amdgpu_do_asic_reset(struct amdgpu_device *adev,
+  struct amdgpu_hive_info *hive,
   struct list_head *device_list_handle,
   bool *need_full_reset_arg)
 {
struct amdgpu_device *tmp_adev = NULL;
bool need_full_reset = *need_full_reset_arg, vram_lost = false;
int r = 0;
+   int cpu = smp_processor_id();
+   bool use_baco =
+   (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
+   true : false;
 
/*
 * ASIC reset has to be done on all HGMI hive nodes ASAP
@@ -3803,21 +3814,24 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info 
*hive,
 */
if (need_full_reset) {
list_for_each_entry(tmp_adev, device_list_handle, 
gmc.xgmi.head) {
-   /* For XGMI run all resets in parallel to speed up the 
process */
+   /*
+* For XGMI run all resets in parallel to speed up the
+* process by scheduling the highpri wq on different
+* cpus. For XGMI with baco reset, all nodes must enter
+* baco within close proximity before anyone exit.
+*/
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
-   if (!queue_work(system_highpri_wq, 
&tmp_adev->xgmi_reset_work))
+   if (!queue_work_on(cpu, system_highpri_wq,
+  &tmp_adev->xgmi_reset_work))
r = -EALREADY;
+   cpu = cpumask_next(cpu, cpu_online_mask);
} else
r = amdgpu_asic_reset(tmp_adev);
-
-   if (r) {
-   DRM_ERROR("ASIC reset failed with error, %d for 
drm dev, %s",
-r, tmp_adev->ddev->unique);
+   if (r)
break;
-   }
}
 
-   /* For XGMI wait for all PSP resets to complete before proceed 
*/
+   /* For

[PATCH 06/10 v2] drm/amdgpu: add condition to enable baco for ras recovery

2019-11-27 Thread Le Ma
Switch to baco reset method for ras recovery if baco-supported PMFW ready.
If not, keep the original reset method.

Change-Id: I07c3e6862be03e068745c73db8ea71f428ecba6b
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 18 --
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 951327f..e8c5a00 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -557,7 +557,8 @@ static int soc15_mode2_reset(struct amdgpu_device *adev)
 static enum amd_reset_method
 soc15_asic_reset_method(struct amdgpu_device *adev)
 {
-   bool baco_reset;
+   bool baco_reset = false;
+   struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
switch (adev->asic_type) {
case CHIP_RAVEN:
@@ -571,18 +572,15 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
case CHIP_VEGA20:
if (adev->psp.sos_fw_version >= 0x80067)
soc15_asic_get_baco_capability(adev, &baco_reset);
-   else
-   baco_reset = false;
-   if (baco_reset) {
-   struct amdgpu_hive_info *hive = 
amdgpu_get_xgmi_hive(adev, 0);
-   struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
-   if (hive || (ras && ras->supported))
-   baco_reset = false;
-   }
+   /*
+* 1. PMFW version > 0x284300: all cases use baco
+* 2. PMFW version <= 0x284300: only sGPU w/o RAS use baco
+*/
+   if ((ras && ras->supported) && adev->pm.fw_version <= 0x283400)
+   baco_reset = false;
break;
default:
-   baco_reset = false;
break;
}
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 10/10 v3] drm/amdgpu: reduce redundant uvd context lost warning message

2019-11-27 Thread Le Ma
Move the print out of uvd instance loop in amdgpu_uvd_suspend

v2: drop unnecessary brackets
v3: grab ras_intr state once for multiple times use

Change-Id: Ifad997debd84763e1b55d668e144b729598f115e
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index e324bfe..d587ffe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -349,6 +349,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
unsigned size;
void *ptr;
int i, j;
+   bool in_ras_intr = amdgpu_ras_intr_triggered();
 
cancel_delayed_work_sync(&adev->uvd.idle_work);
 
@@ -376,13 +377,15 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
return -ENOMEM;
 
/* re-write 0 since err_event_athub will corrupt VCPU buffer */
-   if (amdgpu_ras_intr_triggered()) {
-   DRM_WARN("UVD VCPU state may lost due to RAS 
ERREVENT_ATHUB_INTERRUPT\n");
+   if (in_ras_intr)
memset(adev->uvd.inst[j].saved_bo, 0, size);
-   } else {
+   else
memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
-   }
}
+
+   if (in_ras_intr)
+   DRM_WARN("UVD VCPU state may lost due to RAS 
ERREVENT_ATHUB_INTERRUPT\n");
+
return 0;
 }
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 10/10 v2] drm/amdgpu: reduce redundant uvd context lost warning message

2019-11-27 Thread Le Ma
Move the print out of uvd instance loop in amdgpu_uvd_suspend

v2: drop unnecessary brackets

Change-Id: Ifad997debd84763e1b55d668e144b729598f115e
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index e324bfe..69248ecb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -376,13 +376,15 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
return -ENOMEM;
 
/* re-write 0 since err_event_athub will corrupt VCPU buffer */
-   if (amdgpu_ras_intr_triggered()) {
-   DRM_WARN("UVD VCPU state may lost due to RAS 
ERREVENT_ATHUB_INTERRUPT\n");
+   if (amdgpu_ras_intr_triggered())
memset(adev->uvd.inst[j].saved_bo, 0, size);
-   } else {
+   else
memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
-   }
}
+
+   if (amdgpu_ras_intr_triggered())
+   DRM_WARN("UVD VCPU state may lost due to RAS 
ERREVENT_ATHUB_INTERRUPT\n");
+
return 0;
 }
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 08/10] drm/amdgpu: support full gpu reset workflow when ras err_event_athub occurs

2019-11-27 Thread Le Ma
This athub fatal error can be recovered by baco without system-level reboot,
so add a mode to use baco for the recovery. Not affect the default psp reset
situations for now.

Change-Id: Ib17f2a39254ff6b0473a785752adfdfea79d0e0d
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 71abfe9..53e9590 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4021,12 +4021,15 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
*adev,
struct amdgpu_device *tmp_adev = NULL;
int i, r = 0;
bool in_ras_intr = amdgpu_ras_intr_triggered();
+   bool use_baco =
+   (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
+   true : false;
 
/*
 * Flush RAM to disk so that after reboot
 * the user can read log and see why the system rebooted.
 */
-   if (in_ras_intr && amdgpu_ras_get_context(adev)->reboot) {
+   if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
 
DRM_WARN("Emergency reboot.");
 
@@ -4037,7 +4040,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
need_full_reset = job_signaled = false;
INIT_LIST_HEAD(&device_list);
 
-   dev_info(adev->dev, "GPU %s begin!\n", in_ras_intr ? "jobs 
stop":"reset");
+   dev_info(adev->dev, "GPU %s begin!\n",
+   (in_ras_intr && !use_baco) ? "jobs stop":"reset");
 
cancel_delayed_work_sync(&adev->delayed_init_work);
 
@@ -4104,7 +4108,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
amdgpu_unregister_gpu_instance(tmp_adev);
 
/* disable ras on ALL IPs */
-   if (!in_ras_intr && amdgpu_device_ip_need_full_reset(tmp_adev))
+   if (!(in_ras_intr && !use_baco) &&
+ amdgpu_device_ip_need_full_reset(tmp_adev))
amdgpu_ras_suspend(tmp_adev);
 
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
@@ -4115,13 +4120,13 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
*adev,
 
drm_sched_stop(&ring->sched, job ? &job->base : NULL);
 
-   if (in_ras_intr)
+   if (in_ras_intr && !use_baco)
amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
}
}
 
 
-   if (in_ras_intr)
+   if (in_ras_intr && !use_baco)
goto skip_sched_resume;
 
/*
@@ -4214,7 +4219,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 skip_sched_resume:
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
/*unlock kfd: SRIOV would do it separately */
-   if (!in_ras_intr && !amdgpu_sriov_vf(tmp_adev))
+   if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
amdgpu_amdkfd_post_reset(tmp_adev);
amdgpu_device_unlock_adev(tmp_adev);
}
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 07/10] drm/amdgpu: add concurrent baco reset support for XGMI

2019-11-27 Thread Le Ma
Currently each XGMI node reset wq does not run in parrallel because same work
item bound to same cpu runs in sequence. So change to bound the xgmi_reset_work
item to different cpus.

XGMI requires all nodes enter into baco within very close proximity before
any node exit baco. So schedule the xgmi_reset_work wq twice for enter/exit
baco respectively.

The default reset code path and methods do not change for vega20 production:
  - baco reset without xgmi/ras
  - psp reset with xgmi/ras

To enable baco for XGMI/RAS case, both 2 conditions below are needed:
  - amdgpu_ras_enable=2
  - baco-supported smu firmware

The case that PSP reset and baco reset coexist within an XGMI hive is not in
the consideration.

Change-Id: I9c08cf90134f940b42e20d2129ff87fba761c532
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 78 ++
 2 files changed, 70 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d120fe5..08929e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -998,6 +998,8 @@ struct amdgpu_device {
int pstate;
/* enable runtime pm on the device */
boolrunpm;
+
+   boolin_baco;
 };
 
 static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bd387bb..71abfe9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2654,7 +2654,13 @@ static void amdgpu_device_xgmi_reset_func(struct 
work_struct *__work)
struct amdgpu_device *adev =
container_of(__work, struct amdgpu_device, xgmi_reset_work);
 
-   adev->asic_reset_res =  amdgpu_asic_reset(adev);
+   if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)
+   adev->asic_reset_res = (adev->in_baco == false) ?
+   amdgpu_device_baco_enter(adev->ddev) :
+   amdgpu_device_baco_exit(adev->ddev);
+   else
+   adev->asic_reset_res = amdgpu_asic_reset(adev);
+
if (adev->asic_reset_res)
DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
 adev->asic_reset_res, adev->ddev->unique);
@@ -3796,6 +3802,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info 
*hive,
struct amdgpu_device *tmp_adev = NULL;
bool need_full_reset = *need_full_reset_arg, vram_lost = false;
int r = 0;
+   int cpu = smp_processor_id();
 
/*
 * ASIC reset has to be done on all HGMI hive nodes ASAP
@@ -3803,21 +3810,24 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info 
*hive,
 */
if (need_full_reset) {
list_for_each_entry(tmp_adev, device_list_handle, 
gmc.xgmi.head) {
-   /* For XGMI run all resets in parallel to speed up the 
process */
+   /*
+* For XGMI run all resets in parallel to speed up the
+* process by scheduling the highpri wq on different
+* cpus. For XGMI with baco reset, all nodes must enter
+* baco within close proximity before anyone exit.
+*/
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
-   if (!queue_work(system_highpri_wq, 
&tmp_adev->xgmi_reset_work))
+   if (!queue_work_on(cpu, system_highpri_wq,
+  &tmp_adev->xgmi_reset_work))
r = -EALREADY;
+   cpu = cpumask_next(cpu, cpu_online_mask);
} else
r = amdgpu_asic_reset(tmp_adev);
-
-   if (r) {
-   DRM_ERROR("ASIC reset failed with error, %d for 
drm dev, %s",
-r, tmp_adev->ddev->unique);
+   if (r)
break;
-   }
}
 
-   /* For XGMI wait for all PSP resets to complete before proceed 
*/
+   /* For XGMI wait for all work to complete before proceed */
if (!r) {
list_for_each_entry(tmp_adev, device_list_handle,
gmc.xgmi.head) {
@@ -3826,11 +3836,59 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info 
*hive,
r = tmp_adev->asic_reset_res;
 

[PATCH 02/10] drm/amdgpu: export amdgpu_ras_find_obj to use externally

2019-11-27 Thread Le Ma
Change it to external interface.

Change-Id: I2ab61f149c84a05a6f883a4c7415ea8012ec03a6
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 5 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 +++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 1593564..04394c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -198,9 +198,6 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file 
*f,
return 0;
 }
 
-static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
-   struct ras_common_if *head);
-
 /**
  * DOC: AMDGPU RAS debugfs control interface
  *
@@ -445,7 +442,7 @@ static struct ras_manager *amdgpu_ras_create_obj(struct 
amdgpu_device *adev,
 }
 
 /* return an obj equal to head, or the first when head is NULL */
-static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
+struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
struct ras_common_if *head)
 {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index f80fd34..a2c1ac1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -611,6 +611,9 @@ int amdgpu_ras_interrupt_remove_handler(struct 
amdgpu_device *adev,
 int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
struct ras_dispatch_if *info);
 
+struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
+   struct ras_common_if *head);
+
 extern atomic_t amdgpu_ras_in_intr;
 
 static inline bool amdgpu_ras_intr_triggered(void)
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 09/10] drm/amdgpu: clear err_event_athub flag after reset exit

2019-11-27 Thread Le Ma
Otherwise next err_event_athub error cannot call gpu reset. And following
resume sequence will not be affected by this flag.

v2: create function to clear amdgpu_ras_in_intr for modularity of ras driver

Change-Id: I5cd293f30f23876bf2a1860681bcb50f47713ecd
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h| 5 +
 2 files changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 53e9590..8387b44 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3890,6 +3890,9 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info 
*hive,
}
}
 
+   if (!r && amdgpu_ras_intr_triggered())
+   amdgpu_ras_intr_cleared();
+
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
if (need_full_reset) {
/* post card */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index a2c1ac1..d4ade47 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -621,6 +621,11 @@ static inline bool amdgpu_ras_intr_triggered(void)
return !!atomic_read(&amdgpu_ras_in_intr);
 }
 
+static inline void amdgpu_ras_intr_cleared(void)
+{
+   atomic_set(&amdgpu_ras_in_intr, 0);
+}
+
 void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev);
 
 #endif
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 05/10] drm/amdgpu: enable/disable doorbell interrupt in baco entry/exit helper

2019-11-27 Thread Le Ma
This operation is needed when baco entry/exit for ras recovery

Change-Id: I535c7231693f3138a8e3d5acd55672e2ac68232f
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index b1408c5..bd387bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4308,10 +4308,14 @@ static void amdgpu_device_get_pcie_info(struct 
amdgpu_device *adev)
 int amdgpu_device_baco_enter(struct drm_device *dev)
 {
struct amdgpu_device *adev = dev->dev_private;
+   struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
if (!amdgpu_device_supports_baco(adev->ddev))
return -ENOTSUPP;
 
+   if (ras && ras->supported)
+   adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
+
if (is_support_sw_smu(adev)) {
struct smu_context *smu = &adev->smu;
int ret;
@@ -4319,8 +4323,6 @@ int amdgpu_device_baco_enter(struct drm_device *dev)
ret = smu_baco_enter(smu);
if (ret)
return ret;
-
-   return 0;
} else {
void *pp_handle = adev->powerplay.pp_handle;
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
@@ -4331,14 +4333,15 @@ int amdgpu_device_baco_enter(struct drm_device *dev)
/* enter BACO state */
if (pp_funcs->set_asic_baco_state(pp_handle, 1))
return -EIO;
-
-   return 0;
}
+
+   return 0;
 }
 
 int amdgpu_device_baco_exit(struct drm_device *dev)
 {
struct amdgpu_device *adev = dev->dev_private;
+   struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
if (!amdgpu_device_supports_baco(adev->ddev))
return -ENOTSUPP;
@@ -4351,7 +4354,6 @@ int amdgpu_device_baco_exit(struct drm_device *dev)
if (ret)
return ret;
 
-   return 0;
} else {
void *pp_handle = adev->powerplay.pp_handle;
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
@@ -4362,7 +4364,10 @@ int amdgpu_device_baco_exit(struct drm_device *dev)
/* exit BACO state */
if (pp_funcs->set_asic_baco_state(pp_handle, 0))
return -EIO;
-
-   return 0;
}
+
+   if (ras && ras->supported)
+   adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
+
+   return 0;
 }
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 01/10] drm/amdgpu: remove ras global recovery handling from ras_controller_int handler

2019-11-27 Thread Le Ma
From: Le Ma 

v2: add notification when ras controller interrupt generates

Change-Id: Ic03e42e9d1c4dab1fa7f4817c191a16e485b48a9
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 0db458f..25231d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -324,7 +324,12 @@ static void 
nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
RAS_CNTLR_INTERRUPT_CLEAR, 1);
WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, 
bif_doorbell_intr_cntl);
 
-   amdgpu_ras_global_ras_isr(adev);
+   DRM_WARN("RAS controller interrupt triggered by NBIF error\n");
+
+   /* ras_controller_int is dedicated for nbif ras error,
+* not the global interrupt for sync flood
+*/
+   amdgpu_ras_reset_gpu(adev, true);
}
 }
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 10/10] drm/amdgpu: reduce redundant uvd context lost warning message

2019-11-27 Thread Le Ma
Move the print out of uvd instance loop in amdgpu_uvd_suspend

Change-Id: Ifad997debd84763e1b55d668e144b729598f115e
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index e324bfe..ac7c7795 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -377,12 +377,15 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
 
/* re-write 0 since err_event_athub will corrupt VCPU buffer */
if (amdgpu_ras_intr_triggered()) {
-   DRM_WARN("UVD VCPU state may lost due to RAS 
ERREVENT_ATHUB_INTERRUPT\n");
memset(adev->uvd.inst[j].saved_bo, 0, size);
} else {
memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
}
}
+
+   if (amdgpu_ras_intr_triggered()) {
+   DRM_WARN("UVD VCPU state may lost due to RAS 
ERREVENT_ATHUB_INTERRUPT\n");
+
return 0;
 }
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 06/10] drm/amdgpu: add condition to enable baco for xgmi/ras case

2019-11-27 Thread Le Ma
Avoid to change default reset behavior for production card by checking
amdgpu_ras_enable equal to 2. And only new enough smu ucode can support
baco for xgmi/ras case.

Change-Id: I07c3e6862be03e068745c73db8ea71f428ecba6b
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 951327f..6202333 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -577,7 +577,9 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
struct amdgpu_hive_info *hive = 
amdgpu_get_xgmi_hive(adev, 0);
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
-   if (hive || (ras && ras->supported))
+   if ((hive || (ras && ras->supported)) &&
+   (amdgpu_ras_enable != 2 ||
+   adev->pm.fw_version <= 0x283400))
baco_reset = false;
}
break;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 03/10] drm/amdgpu: clear ras controller status registers when interrupt occurs

2019-11-27 Thread Le Ma
To fix issue that ras controller interrupt cannot be triggered anymore after
one time nbif uncorrectable error. And error count is stored in nbif ras object
for query.

Change-Id: Iba482c169fdff3e9c390072c0289a622a522133c
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 25231d6..9a3a65a 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -52,6 +52,9 @@
 #define BIF_MMSCH1_DOORBELL_RANGE__OFFSET_MASK  0x0FFCL
 #define BIF_MMSCH1_DOORBELL_RANGE__SIZE_MASK0x001FL
 
+static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
+   void *ras_error_status);
+
 static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev)
 {
WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
@@ -314,6 +317,7 @@ static void nbio_v7_4_init_registers(struct amdgpu_device 
*adev)
 static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct 
amdgpu_device *adev)
 {
uint32_t bif_doorbell_intr_cntl;
+   struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if);
 
bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL);
if (REG_GET_FIELD(bif_doorbell_intr_cntl,
@@ -324,6 +328,12 @@ static void 
nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
RAS_CNTLR_INTERRUPT_CLEAR, 1);
WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, 
bif_doorbell_intr_cntl);
 
+   /*
+* clear error status after ras_controller_intr according to
+* hw team and count ue number for query
+*/
+   nbio_v7_4_query_ras_error_count(adev, &obj->err_data);
+
DRM_WARN("RAS controller interrupt triggered by NBIF error\n");
 
/* ras_controller_int is dedicated for nbif ras error,
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/1] drm/amdgpu: fix no ACK from LDS read during stress test for Arcturus

2019-10-30 Thread Le Ma
Set mmSQ_CONFIG.DISABLE_SMEM_SOFT_CLAUSE as W/R.

Change-Id: I6225909fd62702427fbb807e0c6ba6bafcfa41d5
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 5e7a01c..07962ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -699,6 +699,7 @@ static const struct soc15_reg_golden 
golden_settings_gc_9_4_1_arct[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fff, 
0x2ebd9fe3),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fff, 
0xb90f5b1),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
+   SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0x, 0x011A),
 };
 
 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/1] drm/amdgpu: add missing amdgpu_ras.h header include

2019-10-29 Thread Le Ma
Fix compilation error.

Change-Id: I461c558778f9a52378269324dc41b8d639f3ccbe
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index fce206f..bbe9ac7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -34,6 +34,8 @@
 #include "psp_v11_0.h"
 #include "psp_v12_0.h"
 
+#include "amdgpu_ras.h"
+
 static void psp_set_funcs(struct amdgpu_device *adev);
 
 static int psp_early_init(void *handle)
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4/4] drm/amdgpu: remove ras global recovery handling from ras_controller_int handler

2019-10-28 Thread Le Ma
From: Le Ma 

Change-Id: Ia8a61a4b3bd529f0f691e43e69b299d7d151c0c2
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 0db458f..876690a 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -324,7 +324,11 @@ static void 
nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
RAS_CNTLR_INTERRUPT_CLEAR, 1);
WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, 
bif_doorbell_intr_cntl);
 
-   amdgpu_ras_global_ras_isr(adev);
+   /*
+* ras_controller_int is dedicated for nbif ras error,
+* not the global interrupt for sync flood
+*/
+   amdgpu_ras_reset_gpu(adev, true);
}
 }
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/4] drm/amdgpu: clear UVD VCPU buffer when err_event_athub generated

2019-10-28 Thread Le Ma
The err_event_athub error will mess up the buffer and cause UVD resume hang.

Change-Id: If17a2161fb9b1b52eac08de00d2e935191bdbf99
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index b2c364b..b4dd89a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -39,6 +39,8 @@
 #include "cikd.h"
 #include "uvd/uvd_4_2_d.h"
 
+#include "amdgpu_ras.h"
+
 /* 1 second timeout */
 #define UVD_IDLE_TIMEOUT   msecs_to_jiffies(1000)
 
@@ -372,7 +374,13 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
if (!adev->uvd.inst[j].saved_bo)
return -ENOMEM;
 
-   memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
+   /* re-write 0 since err_event_athub will corrupt VCPU buffer */
+   if (amdgpu_ras_intr_triggered()) {
+   DRM_WARN("UVD VCPU state may lost due to RAS 
ERREVENT_ATHUB_INTERRUPT\n");
+   memset(adev->uvd.inst[j].saved_bo, 0, size);
+   } else {
+   memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
+   }
}
return 0;
 }
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/4] drm/amdgpu: bypass some cleanup work after err_event_athub

2019-10-28 Thread Le Ma
PSP lost connection when err_event_athub occurs. These cleanup work can be
skipped in BACO reset.

Change-Id: If54a3735edd6ccbb58d40a5f8833392981f8ce37
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c|  7 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 20 +++-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |  6 --
 4 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 51d74bb..72d9892 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2274,6 +2274,12 @@ static int amdgpu_device_ip_suspend_phase2(struct 
amdgpu_device *adev)
/* displays are handled in phase1 */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
continue;
+   /* PSP lost connection when err_event_athub occurs */
+   if (amdgpu_ras_intr_triggered() &&
+   adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
+   adev->ip_blocks[i].status.hw = false;
+   continue;
+   }
/* XXX handle errors */
r = adev->ip_blocks[i].version->funcs->suspend(adev);
/* XXX handle errors */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index fd7a73f..fce206f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -167,6 +167,13 @@ psp_cmd_submit_buf(struct psp_context *psp,
while (*((unsigned int *)psp->fence_buf) != index) {
if (--timeout == 0)
break;
+   /*
+* Shouldn't wait for timeout when err_event_athub occurs,
+* because gpu reset thread triggered and lock resource should
+* be released for psp resume sequence.
+*/
+   if (amdgpu_ras_intr_triggered())
+   break;
msleep(1);
amdgpu_asic_invalidate_hdp(psp->adev, NULL);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 796326b..dab90c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -558,15 +558,17 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head)))
return 0;
 
-   ret = psp_ras_enable_features(&adev->psp, &info, enable);
-   if (ret) {
-   DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n",
-   enable ? "enable":"disable",
-   ras_block_str(head->block),
-   ret);
-   if (ret == TA_RAS_STATUS__RESET_NEEDED)
-   return -EAGAIN;
-   return -EINVAL;
+   if (!amdgpu_ras_intr_triggered()) {
+   ret = psp_ras_enable_features(&adev->psp, &info, enable);
+   if (ret) {
+   DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n",
+   enable ? "enable":"disable",
+   ras_block_str(head->block),
+   ret);
+   if (ret == TA_RAS_STATUS__RESET_NEEDED)
+   return -EAGAIN;
+   return -EINVAL;
+   }
}
 
/* setup the obj */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 9fe95e7..9c2dba62 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3736,8 +3736,10 @@ static int gfx_v9_0_hw_fini(void *handle)
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
 
-   /* disable KCQ to avoid CPC touch memory not valid anymore */
-   gfx_v9_0_kcq_disable(adev);
+   /* DF freeze and kcq disable will fail */
+   if (!amdgpu_ras_intr_triggered())
+   /* disable KCQ to avoid CPC touch memory not valid anymore */
+   gfx_v9_0_kcq_disable(adev);
 
if (amdgpu_sriov_vf(adev)) {
gfx_v9_0_cp_gfx_enable(adev, false);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/4] drm/amdgpu: reset err_event_athub flag if gpu recovery succeeded

2019-10-28 Thread Le Ma
Otherwise next err_event_athub error cannot call gpu reset.

Change-Id: I5cd293f30f23876bf2a1860681bcb50f47713ecd
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 676cad1..51d74bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4089,6 +4089,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
}
}
 
+   if (!r && in_ras_intr)
+   atomic_set(&amdgpu_ras_in_intr, 0);
+
 skip_sched_resume:
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
/*unlock kfd: SRIOV would do it separately */
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/4] drm/amdgpu/soc15: disable doorbell interrupt as part of BACO entry sequence

2019-10-11 Thread Le Ma
Workaround to make RAS recovery work in BACO reset.

Change-Id: I4e4a81f719dcc88dfd49f583c4be3a373b5eab2c
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h | 2 ++
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c   | 8 
 drivers/gpu/drm/amd/amdgpu/soc15.c   | 9 +
 3 files changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
index 1f26a17..919bd56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -67,6 +67,8 @@ struct amdgpu_nbio_funcs {
  bool enable);
void (*ih_doorbell_range)(struct amdgpu_device *adev,
  bool use_doorbell, int doorbell_index);
+   void (*enable_doorbell_interrupt)(struct amdgpu_device *adev,
+ bool enable);
void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
 bool enable);
void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 238c248..0db458f 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -502,6 +502,13 @@ static void nbio_v7_4_query_ras_error_count(struct 
amdgpu_device *adev,
}
 }
 
+static void nbio_v7_4_enable_doorbell_interrupt(struct amdgpu_device *adev,
+   bool enable)
+{
+   WREG32_FIELD15(NBIO, 0, BIF_DOORBELL_INT_CNTL,
+  DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1);
+}
+
 const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
.get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset,
@@ -516,6 +523,7 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
.enable_doorbell_aperture = nbio_v7_4_enable_doorbell_aperture,
.enable_doorbell_selfring_aperture = 
nbio_v7_4_enable_doorbell_selfring_aperture,
.ih_doorbell_range = nbio_v7_4_ih_doorbell_range,
+   .enable_doorbell_interrupt = nbio_v7_4_enable_doorbell_interrupt,
.update_medium_grain_clock_gating = 
nbio_v7_4_update_medium_grain_clock_gating,
.update_medium_grain_light_sleep = 
nbio_v7_4_update_medium_grain_light_sleep,
.get_clockgating_state = nbio_v7_4_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index fc6cfbc..5cf5f11 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -493,10 +493,15 @@ static int soc15_asic_baco_reset(struct amdgpu_device 
*adev)
 {
void *pp_handle = adev->powerplay.pp_handle;
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+   struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
if (!pp_funcs ||!pp_funcs->get_asic_baco_state 
||!pp_funcs->set_asic_baco_state)
return -ENOENT;
 
+   /* avoid NBIF got stuck when do RAS recovery in BACO reset */
+   if (ras && ras->supported)
+   adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
+
/* enter BACO state */
if (pp_funcs->set_asic_baco_state(pp_handle, 1))
return -EIO;
@@ -505,6 +510,10 @@ static int soc15_asic_baco_reset(struct amdgpu_device 
*adev)
if (pp_funcs->set_asic_baco_state(pp_handle, 0))
return -EIO;
 
+   /* re-enable doorbell interrupt after BACO exit */
+   if (ras && ras->supported)
+   adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
+
dev_info(adev->dev, "GPU BACO reset\n");
 
adev->in_baco_reset = 1;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/4] drm/amd/powerplay: send EnterBaco msg with argument as RAS recovery flag

2019-10-11 Thread Le Ma
1 indicates RAS recovery flag in SMU FW.

Change-Id: Icb8c14586fca1b8ae443bbde764570a9e41850fa
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c
index b068d1c..9b5e72b 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c
@@ -89,10 +89,15 @@ int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum 
BACO_STATE state)
data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL);
data |= 0x8000;
WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data);
-   }
 
-   if(smum_send_msg_to_smc_with_parameter(hwmgr, 
PPSMC_MSG_EnterBaco, 0))
-   return -EINVAL;
+   if(smum_send_msg_to_smc_with_parameter(hwmgr,
+   PPSMC_MSG_EnterBaco, 0))
+   return -EINVAL;
+   } else {
+   if(smum_send_msg_to_smc_with_parameter(hwmgr,
+   PPSMC_MSG_EnterBaco, 1))
+   return -EINVAL;
+   }
 
} else if (state == BACO_STATE_OUT) {
if (smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ExitBaco))
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/4] drm/amd/powerplay: avoid disabling ECC if RAS is enabled for VEGA20

2019-10-11 Thread Le Ma
Program THM_BACO_CNTL.SOC_DOMAIN_IDLE=1 will tell VBIOS to disable ECC when
BACO exit. This can save BACO exit time by PSP on none-ECC SKU. Drop the setting
for ECC supported SKU.

Change-Id: I2a82c128fa5e9731b886dd61f1273dc48ea1923c
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c
index df6ff92..b068d1c 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c
@@ -29,7 +29,7 @@
 #include "vega20_baco.h"
 #include "vega20_smumgr.h"
 
-
+#include "amdgpu_ras.h"
 
 static const struct soc15_baco_cmd_entry clean_baco_tbl[] =
 {
@@ -74,6 +74,7 @@ int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum 
BACO_STATE *state)
 int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
+   struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
enum BACO_STATE cur_state;
uint32_t data;
 
@@ -84,10 +85,11 @@ int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum 
BACO_STATE state)
return 0;
 
if (state == BACO_STATE_IN) {
-   data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL);
-   data |= 0x8000;
-   WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data);
-
+   if (!ras || !ras->supported) {
+   data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL);
+   data |= 0x8000;
+   WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data);
+   }
 
if(smum_send_msg_to_smc_with_parameter(hwmgr, 
PPSMC_MSG_EnterBaco, 0))
return -EINVAL;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4/4] drm/amd/powerplay: add BACO platformCaps for VEGA20

2019-10-11 Thread Le Ma
BACO reset is needed for RAS recovery.

Change-Id: I8207fc314744468c89ba4a030cb2bb15b082aac7
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 6629c475..3d3c647 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -183,6 +183,9 @@ static int vega20_set_features_platform_caps(struct 
pp_hwmgr *hwmgr)
PHM_PlatformCaps_TablelessHardwareInterface);
 
phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+   PHM_PlatformCaps_BACO);
+
+   phm_cap_set(hwmgr->platform_descriptor.platformCaps,
PHM_PlatformCaps_EnableSMU7ThermalManagement);
 
if (adev->pg_flags & AMD_PG_SUPPORT_UVD)
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amdgpu: correct condition check for psp rlc autoload

2019-09-23 Thread Le Ma
Change-Id: Ia91d0fb7179f6944214e892f370d7ef3d6b7d30e
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index d359f1d..2aa1ae6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1080,7 +1080,8 @@ static int psp_np_fw_load(struct psp_context *psp)
return ret;
 
/* Start rlc autoload after psp recieved all the gfx firmware */
-   if (ucode->ucode_id == 
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) {
+   if (psp->autoload_supported && ucode->ucode_id ==
+   AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) {
ret = psp_rlc_autoload(psp);
if (ret) {
DRM_ERROR("Failed to start rlc autoload\n");
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdgpu: add command id in psp response failure message

2019-09-23 Thread Le Ma
From: Hawking Zhang 

Change-Id: I88649fc5dbc7376f3c90ec2114236294ca9189de
Signed-off-by: Hawking Zhang 
Reviewed-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index f90a0cd..d359f1d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -168,8 +168,9 @@ psp_cmd_submit_buf(struct psp_context *psp,
if (ucode)
DRM_WARN("failed to load ucode id (%d) ",
  ucode->ucode_id);
-   DRM_WARN("psp command failed and response status is (0x%X)\n",
- psp->cmd_buf_mem->resp.status & GFX_CMD_STATUS_MASK);
+   DRM_WARN("psp command (0x%X) failed and response status is 
(0x%X)\n",
+psp->cmd_buf_mem->cmd_id,
+psp->cmd_buf_mem->resp.status & GFX_CMD_STATUS_MASK);
if (!timeout) {
mutex_unlock(&psp->mutex);
return -EINVAL;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/3] drm/amdgpu: add psp ip block for Arcturus

2019-09-22 Thread Le Ma
Change-Id: I6b69bfba66aa12d5486527e29a7c322336c95dd5
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 5bec851..dbd790e 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -755,6 +755,8 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
+   if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
+   amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/3] drm/amdgpu: disable vcn ip block for front door loading on Arcturus

2019-09-22 Thread Le Ma
Change-Id: Ibf137cd57659e70516bcbbe456a00ad77e60647c
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 7c7e9f5..5bec851 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -760,7 +760,8 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
-   amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block);
+   if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT))
+   amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block);
break;
case CHIP_RENOIR:
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/3] drm/amdgpu: enable psp front door loading by default on Arcturus

2019-09-22 Thread Le Ma
Change-Id: I13a5f590d5a49655965a13eb7ce773d1efffcbd0
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 82f6b41..fce1f71 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -360,6 +360,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int 
load_type)
case CHIP_RAVEN:
case CHIP_VEGA12:
case CHIP_VEGA20:
+   case CHIP_ARCTURUS:
case CHIP_RENOIR:
case CHIP_NAVI10:
case CHIP_NAVI14:
@@ -368,8 +369,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int 
load_type)
return AMDGPU_FW_LOAD_DIRECT;
else
return AMDGPU_FW_LOAD_PSP;
-   case CHIP_ARCTURUS:
-   return AMDGPU_FW_LOAD_DIRECT;
 
default:
DRM_ERROR("Unknown firmware load type\n");
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/4] drm/amdgpu: increase CGCG gfx idle threshold for Arcturus

2019-08-09 Thread Le Ma
Follow the hw spec, and no need to consider gfxoff on Arcturus

Change-Id: Ib9cad79b1b9c096014447fc0a7d29cdb594e15e3
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 78150ff..9b85a73 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4677,8 +4677,12 @@ static void 
gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
/* enable cgcg FSM(0x363F) */
def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
 
-   data = (0x36 << 
RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
-   RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+   if (adev->asic_type == CHIP_ARCTURUS)
+   data = (0x2000 << 
RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+   RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+   else
+   data = (0x36 << 
RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+   RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
data |= (0x000F << 
RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/1] drm/amdgpu/powerplay: update Arcturus smu version in new place

2019-08-09 Thread Le Ma
Follow patch below:
drm/amd/powerplay: re-define smu interface version for smu v11

Change-Id: Id78651209adc7a094f4c19ba965dcded37dd3ba7
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/powerplay/arcturus_ppt.c | 1 -
 drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h | 2 +-
 drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h| 1 +
 drivers/gpu/drm/amd/powerplay/smu_v11_0.c| 3 +++
 4 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c 
b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
index cff3777..e6fcbdf 100644
--- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
@@ -1918,6 +1918,5 @@ void arcturus_set_ppt_funcs(struct smu_context *smu)
struct smu_table_context *smu_table = &smu->smu_table;
 
smu->ppt_funcs = &arcturus_ppt_funcs;
-   smu->smc_if_version = SMU11_DRIVER_IF_VERSION;
smu_table->table_count = TABLE_COUNT;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h 
b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h
index c7a7953..b99e98c 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h
@@ -27,7 +27,7 @@
 // *** IMPORTANT ***
 // SMU TEAM: Always increment the interface version if
 // any structure is changed in this file
-#define SMU11_DRIVER_IF_VERSION 0x08
+//#define SMU11_DRIVER_IF_VERSION 0x08
 
 #define PPTABLE_ARCTURUS_SMU_VERSION 4
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h 
b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
index ee8542d..acbb83d 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
@@ -27,6 +27,7 @@
 
 #define SMU11_DRIVER_IF_VERSION_INV 0x
 #define SMU11_DRIVER_IF_VERSION_VG20 0x13
+#define SMU11_DRIVER_IF_VERSION_ARCT 0x08
 #define SMU11_DRIVER_IF_VERSION_NV10 0x33
 #define SMU11_DRIVER_IF_VERSION_NV14 0x34
 
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c 
b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index 91dfae1..3b8e58e 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -275,6 +275,9 @@ static int smu_v11_0_check_fw_version(struct smu_context 
*smu)
case CHIP_VEGA20:
smu->smc_if_version = SMU11_DRIVER_IF_VERSION_VG20;
break;
+   case CHIP_ARCTURUS:
+   smu->smc_if_version = SMU11_DRIVER_IF_VERSION_ARCT;
+   break;
case CHIP_NAVI10:
smu->smc_if_version = SMU11_DRIVER_IF_VERSION_NV10;
break;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4/4] drm/amdgpu: enable mmhub clock gating for Arcturus

2019-08-09 Thread Le Ma
Init MC_MGCG/LS flag. Also apply to athub CG.

Change-Id: Ic00cb8e6d69eb75dd32f34f778352cee93063ee0
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 1 -
 drivers/gpu/drm/amd/amdgpu/soc15.c  | 4 +++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index e52e4d1..0cf7ef4 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -615,7 +615,6 @@ int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev,
return 0;
 }
 
-/* TODO: get 2 mmhub instances CG state */
 void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags)
 {
int data, data1;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index aecba1c..235cb5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1126,7 +1126,9 @@ static int soc15_common_early_init(void *handle)
AMD_CG_SUPPORT_HDP_MGCG |
AMD_CG_SUPPORT_HDP_LS |
AMD_CG_SUPPORT_SDMA_MGCG |
-   AMD_CG_SUPPORT_SDMA_LS;
+   AMD_CG_SUPPORT_SDMA_LS |
+   AMD_CG_SUPPORT_MC_MGCG |
+   AMD_CG_SUPPORT_MC_LS;
adev->pg_flags = 0;
adev->external_rev_id = adev->rev_id + 0x32;
break;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/4] drm/amdgpu: add GFX_CP_LS flag to Arcturus

2019-08-09 Thread Le Ma
Missed AMD_CG_SUPPORT_GFX_CP_LS accidently when commit patch before
drm/amdgpu: enable gfx clock gating for Arcturus

Change-Id: I9d70319dd07f7d642416cb260f9f5b3342b6f3f2
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 261493a..aecba1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1122,6 +1122,7 @@ static int soc15_common_early_init(void *handle)
AMD_CG_SUPPORT_GFX_MGLS |
AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_CGLS |
+   AMD_CG_SUPPORT_GFX_CP_LS |
AMD_CG_SUPPORT_HDP_MGCG |
AMD_CG_SUPPORT_HDP_LS |
AMD_CG_SUPPORT_SDMA_MGCG |
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/4] drm/amdgpu: add mmhub clock gating for Arcturus

2019-08-09 Thread Le Ma
Add 2 mmhub instances CG

Change-Id: I76ab7a50cd9a40de3022f733787b42e4e5c4dbf5
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |  12 +--
 drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 126 
 drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h |   3 +
 3 files changed, 135 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index cccb6e9..44ac122 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1465,9 +1465,9 @@ static int gmc_v9_0_set_clockgating_state(void *handle,
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
if (adev->asic_type == CHIP_ARCTURUS)
-   return 0;
-
-   mmhub_v1_0_set_clockgating(adev, state);
+   mmhub_v9_4_set_clockgating(adev, state);
+   else
+   mmhub_v1_0_set_clockgating(adev, state);
 
athub_v1_0_set_clockgating(adev, state);
 
@@ -1479,9 +1479,9 @@ static void gmc_v9_0_get_clockgating_state(void *handle, 
u32 *flags)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
if (adev->asic_type == CHIP_ARCTURUS)
-   return;
-
-   mmhub_v1_0_get_clockgating(adev, flags);
+   mmhub_v9_4_get_clockgating(adev, flags);
+   else
+   mmhub_v1_0_get_clockgating(adev, flags);
 
athub_v1_0_get_clockgating(adev, flags);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index 33b0de5..e52e4d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -515,3 +515,129 @@ void mmhub_v9_4_init(struct amdgpu_device *adev)
i * MMHUB_INSTANCE_REGISTER_OFFSET;
}
 }
+
+static void mmhub_v9_4_update_medium_grain_clock_gating(struct amdgpu_device 
*adev,
+   bool enable)
+{
+   uint32_t def, data, def1, data1;
+   int i, j;
+   int dist = mmDAGB1_CNTL_MISC2 - mmDAGB0_CNTL_MISC2;
+
+   for (i = 0; i < MMHUB_NUM_INSTANCES; i++) {
+   def = data = RREG32_SOC15_OFFSET(MMHUB, 0,
+   mmATCL2_0_ATC_L2_MISC_CG,
+   i * MMHUB_INSTANCE_REGISTER_OFFSET);
+
+   if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+   data |= ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK;
+   else
+   data &= ~ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK;
+
+   if (def != data)
+   WREG32_SOC15_OFFSET(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG,
+   i * MMHUB_INSTANCE_REGISTER_OFFSET, data);
+
+   for (j = 0; j < 5; j++) {
+   def1 = data1 = RREG32_SOC15_OFFSET(MMHUB, 0,
+   mmDAGB0_CNTL_MISC2,
+   i * MMHUB_INSTANCE_REGISTER_OFFSET +
+   j * dist);
+   if (enable &&
+   (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) {
+   data1 &=
+   ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+   DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+   DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+   DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+   DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+   DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+   } else {
+   data1 |=
+   (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+   DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+   DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+   DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+   DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+   DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+   }
+
+   if (def1 != data1)
+   WREG32_SOC15_OFFSET(MMHUB, 0,
+   mmDAGB0_CNTL_MISC2,
+   i * MMHUB_INSTANCE_REGISTER_OFFSET +
+   j * dist, data1);
+
+   if (i == 1 && j == 3)
+   break;
+   }
+   }
+}
+
+static void mmhub_v9_4_update_medium_grain_light_sleep(struct amdgpu_device 
*adev,
+  bool enable)
+{
+   uint32_t def, data;
+   int i;
+
+   for (i = 0; 

[PATCH 1/1] drm/amdgpu: split athub clock gating from mmhub

2019-08-08 Thread Le Ma
Untie the bind of get/set athub CG state from mmhub, for cosmetic fix and Asic
not using mmhub 1.0. Besides, also fix wrong athub CG state in amdgpu_pm_info.

Change-Id: I4ba970cae558ad5163e93fa9bc77f589196a22b1
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/Makefile |   1 +
 drivers/gpu/drm/amd/amdgpu/athub_v1_0.c | 103 
 drivers/gpu/drm/amd/amdgpu/athub_v1_0.h |  30 ++
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |   9 ++-
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c |  55 -
 5 files changed, 154 insertions(+), 44 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/athub_v1_0.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 8afa0bc..464bfe5 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -154,6 +154,7 @@ amdgpu-y += \
 
 # add ATHUB block
 amdgpu-y += \
+   athub_v1_0.o \
athub_v2_0.o
 
 # add amdkfd interfaces
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
new file mode 100644
index 000..d9cc746
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "athub_v1_0.h"
+
+#include "athub/athub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
+#include "vega10_enum.h"
+
+#include "soc15_common.h"
+
+static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+  bool enable)
+{
+   uint32_t def, data;
+
+   def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+
+   if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+   data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+   else
+   data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+
+   if (def != data)
+   WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
+}
+
+static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+   uint32_t def, data;
+
+   def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+
+   if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
+   (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
+   data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+   else
+   data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+
+   if(def != data)
+   WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
+}
+
+int athub_v1_0_set_clockgating(struct amdgpu_device *adev,
+  enum amd_clockgating_state state)
+{
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
+   switch (adev->asic_type) {
+   case CHIP_VEGA10:
+   case CHIP_VEGA12:
+   case CHIP_VEGA20:
+   case CHIP_RAVEN:
+   athub_update_medium_grain_clock_gating(adev,
+   state == AMD_CG_STATE_GATE ? true : false);
+   athub_update_medium_grain_light_sleep(adev,
+   state == AMD_CG_STATE_GATE ? true : false);
+   break;
+   default:
+   break;
+   }
+
+   return 0;
+}
+
+void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+{
+   int data;
+
+   if (amdgpu_sriov_vf(adev))
+   *flags = 0;
+
+   /* AMD_CG_SUPPORT_ATHUB_MGCG */
+   data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+   if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
+   *flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
+
+   /* AMD_CG_SUPPORT_ATHUB_LS */
+

[PATCH 4/9] drm/amdgpu: enable hdp clock gating for Arcturus

2019-08-08 Thread Le Ma
Init hdp MGCG/LS flag as Vega20

Change-Id: Ia33ca064f79ac409c53d3beb6f01b6e814a92041
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 4fbaca3..6038dce 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1017,7 +1017,9 @@ static int soc15_common_early_init(void *handle)
AMD_CG_SUPPORT_GFX_MGLS |
AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_CGLS |
-   AMD_CG_SUPPORT_GFX_CP_LS;
+   AMD_CG_SUPPORT_GFX_CP_LS |
+   AMD_CG_SUPPORT_HDP_MGCG |
+   AMD_CG_SUPPORT_HDP_LS;
adev->pg_flags = 0;
adev->external_rev_id = adev->rev_id + 0x32;
break;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/9] drm/amdgpu: add hdp clock gating for Arcturus

2019-08-08 Thread Le Ma
Add hdp CGLS for Arcturus in set common clockgating function

Change-Id: I44e392fa5f7653908b36b0902e721d56eed3eb92
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 00758be..4fbaca3 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1157,7 +1157,8 @@ static void soc15_update_hdp_light_sleep(struct 
amdgpu_device *adev, bool enable
 {
uint32_t def, data;
 
-   if (adev->asic_type == CHIP_VEGA20) {
+   if (adev->asic_type == CHIP_VEGA20 ||
+   adev->asic_type == CHIP_ARCTURUS) {
def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, 
mmHDP_MEM_POWER_CTRL));
 
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
@@ -1289,6 +1290,10 @@ static int soc15_common_set_clockgating_state(void 
*handle,
soc15_update_rom_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
break;
+   case CHIP_ARCTURUS:
+   soc15_update_hdp_light_sleep(adev,
+   state == AMD_CG_STATE_GATE ? true : false);
+   break;
default:
break;
}
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/9] drm/amdgpu: enable gfx clock gating for Arcturus

2019-08-08 Thread Le Ma
Init gfx MGCG/LS, CGCG/LS, CP_LS flag.

Change-Id: I88db76d1b8f2b2cecce10846a4d22eec638eea8a
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 15f6356..00758be 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1013,7 +1013,11 @@ static int soc15_common_early_init(void *handle)
break;
case CHIP_ARCTURUS:
adev->asic_funcs = &vega20_asic_funcs;
-   adev->cg_flags = 0;
+   adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+   AMD_CG_SUPPORT_GFX_MGLS |
+   AMD_CG_SUPPORT_GFX_CGCG |
+   AMD_CG_SUPPORT_GFX_CGLS |
+   AMD_CG_SUPPORT_GFX_CP_LS;
adev->pg_flags = 0;
adev->external_rev_id = adev->rev_id + 0x32;
break;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 6/9] drm/amdgpu: add sdma clock gating for Arcturus

2019-08-08 Thread Le Ma
Add ARCTURUS case in sdma set clockgating function

Change-Id: I65a3d99a140a8a76949b4d03c20bc6e0195c9854
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 9e3c63c..185dff0 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -2157,6 +2157,7 @@ static int sdma_v4_0_set_clockgating_state(void *handle,
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
+   case CHIP_ARCTURUS:
sdma_v4_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
sdma_v4_0_update_medium_grain_light_sleep(adev,
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 7/9] drm/amdgpu: enable sdma clock gating for Arcturus

2019-08-08 Thread Le Ma
Init sdma MGCG/LS flag

Change-Id: I600b8c67b1dfa74240269f2f028960b2c93a0ec2
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 6038dce..ad64975 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1019,7 +1019,9 @@ static int soc15_common_early_init(void *handle)
AMD_CG_SUPPORT_GFX_CGLS |
AMD_CG_SUPPORT_GFX_CP_LS |
AMD_CG_SUPPORT_HDP_MGCG |
-   AMD_CG_SUPPORT_HDP_LS;
+   AMD_CG_SUPPORT_HDP_LS |
+   AMD_CG_SUPPORT_SDMA_MGCG |
+   AMD_CG_SUPPORT_SDMA_LS;
adev->pg_flags = 0;
adev->external_rev_id = adev->rev_id + 0x32;
break;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/9] drm/amdgpu: add gfx clock gating for Arcturus

2019-08-08 Thread Le Ma
Add ARCTURUS case in gfx set clockgating function. No 3d clock on Arcturus.

Change-Id: I9893a2afea7f0b5d433baa14f48ae55a36516fac
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index fdd90c1..de3de1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4218,6 +4218,9 @@ static void gfx_v9_0_update_3d_clock_gating(struct 
amdgpu_device *adev,
 {
uint32_t data, def;
 
+   if (adev->asic_type == CHIP_ARCTURUS)
+   return;
+
amdgpu_gfx_rlc_enter_safe_mode(adev);
 
/* Enable 3D CGCG/CGLS */
@@ -4410,6 +4413,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle,
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
+   case CHIP_ARCTURUS:
gfx_v9_0_update_gfx_clock_gating(adev,
 state == AMD_CG_STATE_GATE ? 
true : false);
break;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 5/9] drm/amdgpu: support sdma clock gating for more instances

2019-08-08 Thread Le Ma
Shorten the code with RREG32_SDMA/WREG32_SDMA macro in CG part.

Change-Id: Icbf94169bb703877b105a307f14c708609faaae4
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 105 +++--
 1 file changed, 34 insertions(+), 71 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 7acf947..9e3c63c 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -2084,61 +2084,35 @@ static void sdma_v4_0_update_medium_grain_clock_gating(
bool enable)
 {
uint32_t data, def;
+   int i;
 
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
-   /* enable sdma0 clock gating */
-   def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, 
mmSDMA0_CLK_CTRL));
-   data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
- SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
-   if (def != data)
-   WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 
data);
-
-   if (adev->sdma.num_instances > 1) {
-   def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, 
mmSDMA1_CLK_CTRL));
-   data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK |
- SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK);
+   for (i = 0; i < adev->sdma.num_instances; i++) {
+   def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL);
+   data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
if (def != data)
-   WREG32(SOC15_REG_OFFSET(SDMA1, 0, 
mmSDMA1_CLK_CTRL), data);
+   WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data);
}
} else {
-   /* disable sdma0 clock gating */
-   def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, 
mmSDMA0_CLK_CTRL));
-   data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
-SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
-SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
-SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
-SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
-SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
-SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
-SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
-
-   if (def != data)
-   WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 
data);
-
-   if (adev->sdma.num_instances > 1) {
-   def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, 
mmSDMA1_CLK_CTRL));
-   data |= (SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK |
-SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK |
-SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK |
-SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK |
-SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK |
-SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK |
-SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK |
-SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK);
+   for (i = 0; i < adev->sdma.num_instances; i++) {
+   def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL);
+   data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
+SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
+

[PATCH libdrm 1/1] tests/amdgpu: add the missing deactivation case for dispatch test

2019-08-04 Thread Le Ma
Change-Id: I502cc5fde7f00e41d496bfba0963d4db20459e00
Signed-off-by: Le Ma 
---
 tests/amdgpu/amdgpu_test.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/amdgpu/amdgpu_test.c b/tests/amdgpu/amdgpu_test.c
index a4ce8ce..dc54155 100644
--- a/tests/amdgpu/amdgpu_test.c
+++ b/tests/amdgpu/amdgpu_test.c
@@ -472,9 +472,12 @@ static void amdgpu_disable_suites()
fprintf(stderr, "test deactivation failed - %s\n", 
CU_get_error_msg());
 
/* This test was ran on GFX9 only */
-   if (family_id < AMDGPU_FAMILY_AI || family_id > AMDGPU_FAMILY_RV)
-   if (amdgpu_set_test_active(BASIC_TESTS_STR, "Dispatch Test", 
CU_FALSE))
+   if (family_id < AMDGPU_FAMILY_AI || family_id > AMDGPU_FAMILY_RV) {
+   if (amdgpu_set_test_active(BASIC_TESTS_STR, "Dispatch Test 
(GFX)", CU_FALSE))
+   fprintf(stderr, "test deactivation failed - %s\n", 
CU_get_error_msg());
+   if (amdgpu_set_test_active(BASIC_TESTS_STR, "Dispatch Test 
(Compute)", CU_FALSE))
fprintf(stderr, "test deactivation failed - %s\n", 
CU_get_error_msg());
+   }
 
/* This test was ran on GFX9 only */
if (family_id < AMDGPU_FAMILY_AI || family_id > AMDGPU_FAMILY_RV)
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH libdrm 1/1 v2] tests/amdgpu: divide dispatch test into compute and gfx

2019-07-22 Thread Le Ma
for better clarification

v2: accordingly change dispatch_test caller in gpu_reset test

Change-Id: I245d760d5f9d64eb10b137d5ce375ef52a4d873a
Signed-off-by: Le Ma 
Reviewed-by: Flora Cui 
---
 tests/amdgpu/basic_tests.c | 19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c
index 938106e..ab2a672 100644
--- a/tests/amdgpu/basic_tests.c
+++ b/tests/amdgpu/basic_tests.c
@@ -55,7 +55,8 @@ static void amdgpu_userptr_test(void);
 static void amdgpu_semaphore_test(void);
 static void amdgpu_sync_dependency_test(void);
 static void amdgpu_bo_eviction_test(void);
-static void amdgpu_dispatch_test(void);
+static void amdgpu_compute_dispatch_test(void);
+static void amdgpu_gfx_dispatch_test(void);
 static void amdgpu_draw_test(void);
 static void amdgpu_gpu_reset_test(void);
 
@@ -79,7 +80,8 @@ CU_TestInfo basic_tests[] = {
{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
{ "SW semaphore Test",  amdgpu_semaphore_test },
{ "Sync dependency Test",  amdgpu_sync_dependency_test },
-   { "Dispatch Test",  amdgpu_dispatch_test },
+   { "Dispatch Test (Compute)",  amdgpu_compute_dispatch_test },
+   { "Dispatch Test (GFX)",  amdgpu_gfx_dispatch_test },
{ "Draw Test",  amdgpu_draw_test },
{ "GPU reset Test", amdgpu_gpu_reset_test },
CU_TEST_INFO_NULL,
@@ -2448,7 +2450,8 @@ static void 
amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
r = amdgpu_cs_ctx_free(context_handle);
CU_ASSERT_EQUAL(r, 0);
 }
-static void amdgpu_dispatch_test(void)
+
+static void amdgpu_compute_dispatch_test(void)
 {
int r;
struct drm_amdgpu_info_hw_ip info;
@@ -2463,6 +2466,13 @@ static void amdgpu_dispatch_test(void)
amdgpu_memset_dispatch_test(device_handle, 
AMDGPU_HW_IP_COMPUTE, ring_id);
amdgpu_memcpy_dispatch_test(device_handle, 
AMDGPU_HW_IP_COMPUTE, ring_id);
}
+}
+
+static void amdgpu_gfx_dispatch_test(void)
+{
+   int r;
+   struct drm_amdgpu_info_hw_ip info;
+   uint32_t ring_id;
 
r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
CU_ASSERT_EQUAL(r, 0);
@@ -3170,5 +3180,6 @@ static void amdgpu_gpu_reset_test(void)
r = amdgpu_cs_ctx_free(context_handle);
CU_ASSERT_EQUAL(r, 0);
 
-   amdgpu_dispatch_test();
+   amdgpu_compute_dispatch_test();
+   amdgpu_gfx_dispatch_test();
 }
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH libdrm 1/1] tests/amdgpu: divide dispatch test into compute and gfx

2019-07-22 Thread Le Ma
for better clarification

Change-Id: I245d760d5f9d64eb10b137d5ce375ef52a4d873a
Signed-off-by: Le Ma 
---
 tests/amdgpu/basic_tests.c | 16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c
index 938106e..fa0f568 100644
--- a/tests/amdgpu/basic_tests.c
+++ b/tests/amdgpu/basic_tests.c
@@ -55,7 +55,8 @@ static void amdgpu_userptr_test(void);
 static void amdgpu_semaphore_test(void);
 static void amdgpu_sync_dependency_test(void);
 static void amdgpu_bo_eviction_test(void);
-static void amdgpu_dispatch_test(void);
+static void amdgpu_compute_dispatch_test(void);
+static void amdgpu_gfx_dispatch_test(void);
 static void amdgpu_draw_test(void);
 static void amdgpu_gpu_reset_test(void);
 
@@ -79,7 +80,8 @@ CU_TestInfo basic_tests[] = {
{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
{ "SW semaphore Test",  amdgpu_semaphore_test },
{ "Sync dependency Test",  amdgpu_sync_dependency_test },
-   { "Dispatch Test",  amdgpu_dispatch_test },
+   { "Dispatch Test (Compute)",  amdgpu_compute_dispatch_test },
+   { "Dispatch Test (GFX)",  amdgpu_gfx_dispatch_test },
{ "Draw Test",  amdgpu_draw_test },
{ "GPU reset Test", amdgpu_gpu_reset_test },
CU_TEST_INFO_NULL,
@@ -2448,7 +2450,8 @@ static void 
amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
r = amdgpu_cs_ctx_free(context_handle);
CU_ASSERT_EQUAL(r, 0);
 }
-static void amdgpu_dispatch_test(void)
+
+static void amdgpu_compute_dispatch_test(void)
 {
int r;
struct drm_amdgpu_info_hw_ip info;
@@ -2463,6 +2466,13 @@ static void amdgpu_dispatch_test(void)
amdgpu_memset_dispatch_test(device_handle, 
AMDGPU_HW_IP_COMPUTE, ring_id);
amdgpu_memcpy_dispatch_test(device_handle, 
AMDGPU_HW_IP_COMPUTE, ring_id);
}
+}
+
+static void amdgpu_gfx_dispatch_test(void)
+{
+   int r;
+   struct drm_amdgpu_info_hw_ip info;
+   uint32_t ring_id;
 
r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
CU_ASSERT_EQUAL(r, 0);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/1] drm/amdgpu: remove unnecessary rlc reset function on gfx9

2019-04-01 Thread Le Ma
From: Le Ma 

The rlc reset function is not necessary during gfx9 initialization/resume phase.
And this function would even cause rlc fw loading failed on some gfx9 ASIC.
Remove this function safely with verification well on Vega/Raven platform.

Change-Id: I38bf0bbaf7183b7e6a53a1b63dba770de8e47d9e
Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 3765d97..3c936b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2455,8 +2455,6 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
/* disable CG */
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
 
-   adev->gfx.rlc.funcs->reset(adev);
-
gfx_v9_0_init_pg(adev);
 
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx