[PATCH 1/1] drm/amdgpu: drop the UCODE_MAX_PSP_PACKAGING checking
This checking is not suitable any more after introducing psp aux firmware binary. Signed-off-by: Le Ma Suggested-by: Lijo Lazar --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 11 --- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 2 -- 2 files changed, 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 7452b2dd775b..7b8469da3e2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -3484,12 +3484,6 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name) fw_bin_count = le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count); - if (fw_bin_count >= UCODE_MAX_PSP_PACKAGING) { - dev_err(adev->dev, "packed SOS count exceeds maximum limit\n"); - err = -EINVAL; - goto out; - } - if (sos_hdr_v2_0->header.header_version_minor == 1) { sos_hdr_v2_1 = (const struct psp_firmware_header_v2_1 *)adev->psp.sos_fw->data; @@ -3653,11 +3647,6 @@ static int parse_ta_v2_microcode(struct psp_context *psp) if (le16_to_cpu(ta_hdr->header.header_version_major) != 2) return -EINVAL; - if (le32_to_cpu(ta_hdr->ta_fw_bin_count) >= UCODE_MAX_PSP_PACKAGING) { - dev_err(adev->dev, "packed TA count exceeds maximum limit\n"); - return -EINVAL; - } - for (ta_index = 0; ta_index < le32_to_cpu(ta_hdr->ta_fw_bin_count); ta_index++) { err = parse_ta_bin_descriptor(psp, &ta_hdr->ta_fw_bin[ta_index], diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 4e23419b92d4..a06cc0a155fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -456,8 +456,6 @@ union amdgpu_firmware_header { uint8_t raw[0x100]; }; -#define UCODE_MAX_PSP_PACKAGING (((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc)) * 2) - /* * fw loading support */ -- 2.43.2
[PATCH 2/2] drm/amdgpu: load sos binary properly on the basis of pmfw version
To be compatible with legacy IFWI, driver needs to carry legacy tOS and query pmfw version to load them accordingly. Add psp_firmware_header_v2_1 to handle the combined sos binary. Double the sos count limit for the case of aux sos fw packed. v2: pass the correct fw_bin_desc to parse_sos_bin_descriptor Signed-off-by: Le Ma Reviewed-by: Lijo Lazar --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 29 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 11 - 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 189574d53ebd..f702f3391c2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -3425,9 +3425,11 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name) const struct psp_firmware_header_v1_2 *sos_hdr_v1_2; const struct psp_firmware_header_v1_3 *sos_hdr_v1_3; const struct psp_firmware_header_v2_0 *sos_hdr_v2_0; - int err = 0; + const struct psp_firmware_header_v2_1 *sos_hdr_v2_1; + int fw_index, fw_bin_count, start_index = 0; + const struct psp_fw_bin_desc *fw_bin; uint8_t *ucode_array_start_addr; - int fw_index = 0; + int err = 0; err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, "amdgpu/%s_sos.bin", chip_name); if (err) @@ -3478,15 +3480,30 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name) case 2: sos_hdr_v2_0 = (const struct psp_firmware_header_v2_0 *)adev->psp.sos_fw->data; - if (le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count) >= UCODE_MAX_PSP_PACKAGING) { + fw_bin_count = le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count); + + if (fw_bin_count >= UCODE_MAX_PSP_PACKAGING) { dev_err(adev->dev, "packed SOS count exceeds maximum limit\n"); err = -EINVAL; goto out; } - for (fw_index = 0; fw_index < le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count); fw_index++) { - err = parse_sos_bin_descriptor(psp, - &sos_hdr_v2_0->psp_fw_bin[fw_index], + if (sos_hdr_v2_0->header.header_version_minor == 1) { + sos_hdr_v2_1 = (const struct psp_firmware_header_v2_1 *)adev->psp.sos_fw->data; + + fw_bin = sos_hdr_v2_1->psp_fw_bin; + + if (psp_is_aux_sos_load_required(psp)) + start_index = le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index); + else + fw_bin_count -= le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index); + + } else { + fw_bin = sos_hdr_v2_0->psp_fw_bin; + } + + for (fw_index = start_index; fw_index < fw_bin_count; fw_index++) { + err = parse_sos_bin_descriptor(psp, fw_bin + fw_index, sos_hdr_v2_0); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 5bc37acd3981..36b14c1b94b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -136,6 +136,14 @@ struct psp_firmware_header_v2_0 { struct psp_fw_bin_desc psp_fw_bin[]; }; +/* version_major=2, version_minor=1 */ +struct psp_firmware_header_v2_1 { + struct common_firmware_header header; + uint32_t psp_fw_bin_count; + uint32_t psp_aux_fw_bin_index; + struct psp_fw_bin_desc psp_fw_bin[]; +}; + /* version_major=1, version_minor=0 */ struct ta_firmware_header_v1_0 { struct common_firmware_header header; @@ -426,6 +434,7 @@ union amdgpu_firmware_header { struct psp_firmware_header_v1_1 psp_v1_1; struct psp_firmware_header_v1_3 psp_v1_3; struct psp_firmware_header_v2_0 psp_v2_0; + struct psp_firmware_header_v2_0 psp_v2_1; struct ta_firmware_header_v1_0 ta; struct ta_firmware_header_v2_0 ta_v2_0; struct gfx_firmware_header_v1_0 gfx; @@ -447,7 +456,7 @@ union amdgpu_firmware_header { uint8_t raw[0x100]; }; -#define UCODE_MAX_PSP_PACKAGING ((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc)) +#define UCODE_MAX_PSP_PACKAGING ((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc)) * 2 /* * fw loading support -- 2.43.2
[PATCH 1/2] drm/amdgpu: add psp funcs callback to check if aux fw is needed
Query pmfw version to determine if aux sos fw needs to be loaded in psp v13.0. v2: refine callback to check if aux_fw loading is needed instead of getting pmfw version barely v3: return the comparison directly Signed-off-by: Le Ma Reviewed-by: Lijo Lazar --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 4 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 17 + 2 files changed, 21 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 74a96516c913..e8abbbcb4326 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -138,6 +138,7 @@ struct psp_funcs { int (*vbflash_stat)(struct psp_context *psp); int (*fatal_error_recovery_quirk)(struct psp_context *psp); bool (*get_ras_capability)(struct psp_context *psp); + bool (*is_aux_sos_load_required)(struct psp_context *psp); }; struct ta_funcs { @@ -464,6 +465,9 @@ struct amdgpu_psp_funcs { ((psp)->funcs->fatal_error_recovery_quirk ? \ (psp)->funcs->fatal_error_recovery_quirk((psp)) : 0) +#define psp_is_aux_sos_load_required(psp) \ + ((psp)->funcs->is_aux_sos_load_required ? (psp)->funcs->is_aux_sos_load_required((psp)) : 0) + extern const struct amd_ip_funcs psp_ip_funcs; extern const struct amdgpu_ip_block_version psp_v3_1_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 1251ee38a676..51e470e8d67d 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -81,6 +81,8 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_4_ta.bin"); /* memory training timeout define */ #define MEM_TRAIN_SEND_MSG_TIMEOUT_US 300 +#define regMP1_PUB_SCRATCH00x3b10090 + static int psp_v13_0_init_microcode(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -807,6 +809,20 @@ static bool psp_v13_0_get_ras_capability(struct psp_context *psp) } } +static bool psp_v13_0_is_aux_sos_load_required(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + u32 pmfw_ver; + + if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) + return false; + + /* load 4e version of sos if pmfw version less than 85.115.0 */ + pmfw_ver = RREG32(regMP1_PUB_SCRATCH0 / 4); + + return (pmfw_ver < 0x557300); +} + static const struct psp_funcs psp_v13_0_funcs = { .init_microcode = psp_v13_0_init_microcode, .wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state, @@ -830,6 +846,7 @@ static const struct psp_funcs psp_v13_0_funcs = { .vbflash_stat = psp_v13_0_vbflash_status, .fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk, .get_ras_capability = psp_v13_0_get_ras_capability, + .is_aux_sos_load_required = psp_v13_0_is_aux_sos_load_required, }; void psp_v13_0_set_psp_funcs(struct psp_context *psp) -- 2.43.2
[PATCH] drm/amdgpu: init microcode chip name from ip versions
To adapt to different gc versions in gfx_v9_4_3.c file. Change-Id: Ib4465aade0dcbbcc43318c6dc865f813c5411097 Signed-off-by: Le Ma Reviewed-by: Hawking Zhang --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 835004187a58..ec4d3fa87e4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -431,16 +431,16 @@ static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev, static int gfx_v9_4_3_init_microcode(struct amdgpu_device *adev) { - const char *chip_name; + char ucode_prefix[30]; int r; - chip_name = "gc_9_4_3"; + amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - r = gfx_v9_4_3_init_rlc_microcode(adev, chip_name); + r = gfx_v9_4_3_init_rlc_microcode(adev, ucode_prefix); if (r) return r; - r = gfx_v9_4_3_init_cp_compute_microcode(adev, chip_name); + r = gfx_v9_4_3_init_cp_compute_microcode(adev, ucode_prefix); if (r) return r; -- 2.43.2
[PATCH 1/1] drm/amdgpu: drop setting buffer funcs in sdma442
To fix the entity rq NULL issue. This setting has been moved to upper level. Fixes b70438004a14 ("drm/amdgpu: move buffer funcs setting up a level") Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 20 +--- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index eaa4f5f49949..589a734982a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -431,16 +431,11 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 doorbell_offset, doorbell; u32 rb_cntl, ib_cntl; - int i, unset = 0; + int i; for_each_inst(i, inst_mask) { sdma[i] = &adev->sdma.instance[i].ring; - if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) { - amdgpu_ttm_set_buffer_funcs_status(adev, false); - unset = 1; - } - rb_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_ENABLE, 0); WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl); @@ -490,17 +485,10 @@ static void sdma_v4_4_2_inst_page_stop(struct amdgpu_device *adev, struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 rb_cntl, ib_cntl; int i; - bool unset = false; for_each_inst(i, inst_mask) { sdma[i] = &adev->sdma.instance[i].page; - if ((adev->mman.buffer_funcs_ring == sdma[i]) && - (!unset)) { - amdgpu_ttm_set_buffer_funcs_status(adev, false); - unset = true; - } - rb_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL, RB_ENABLE, 0); @@ -950,13 +938,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, r = amdgpu_ring_test_helper(page); if (r) return r; - - if (adev->mman.buffer_funcs_ring == page) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } - - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } return r; -- 2.43.2
[PATCH v2] drm/amdgpu: move the drm client creation behind drm device registration
This patch is to eliminate interrupt warning below: "[drm] Fence fallback timer expired on ring sdma0.0". An early vm pt clearing job is sent to SDMA ahead of interrupt enabled, introduced by patch below: - drm/amdkfd: Export DMABufs from KFD using GEM handles And re-locating the drm client creation following after drm_dev_register looks like a more proper flow. v2: wrap the drm client creation Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 4 +++ 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 77e263660288..41db030ddc4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -141,11 +141,31 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work) static const struct drm_client_funcs kfd_client_funcs = { .unregister = drm_client_release, }; + +int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev) +{ + int ret; + + if (!adev->kfd.init_complete) + return 0; + + ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", + &kfd_client_funcs); + if (ret) { + dev_err(adev->dev, "Failed to init DRM client: %d\n", + ret); + return ret; + } + + drm_client_register(&adev->kfd.client); + + return 0; +} + void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i; int last_valid_bit; - int ret; amdgpu_amdkfd_gpuvm_init_mem_limits(); @@ -164,12 +184,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) .enable_mes = adev->enable_mes, }; - ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs); - if (ret) { - dev_err(adev->dev, "Failed to init DRM client: %d\n", ret); - return; - } - /* this is going to have a few of the MSBs set that we need to * clear */ @@ -208,10 +222,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, &gpu_resources); - if (adev->kfd.init_complete) - drm_client_register(&adev->kfd.client); - else - drm_client_release(&adev->kfd.client); amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 584a0cea5572..da175c384adf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -182,6 +182,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, struct mm_struct *mm, struct svm_range_bo *svm_bo); + +int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev); #if defined(CONFIG_DEBUG_FS) int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 475bd59c9ac2..91d5d9435067 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2255,6 +2255,10 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, if (ret) goto err_pci; + ret = amdgpu_amdkfd_drm_client_create(adev); + if (ret) + goto err_pci; + /* * 1. don't init fbdev on hw without DCE * 2. don't init fbdev if there are no connectors -- 2.38.1
[PATCH 1/1] drm/amdgpu: move the drm client creation behind drm device registration
This patch is to eliminate interrupt warning below: "[drm] Fence fallback timer expired on ring sdma0.0". An early vm pt clearing job is sent to SDMA ahead of interrupt enabled, introduced by patch below: - drm/amdkfd: Export DMABufs from KFD using GEM handles And re-locating the drm client creation following after drm_dev_register looks like a more proper flow. Change-Id: I0fece177b78345187068f92a823d96b3b7581140 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 13 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 11 +++ 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index add315644773..69eb0f5574d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -139,14 +139,13 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work) amdgpu_device_gpu_recover(adev, NULL, &reset_context); } -static const struct drm_client_funcs kfd_client_funcs = { +const struct drm_client_funcs kfd_client_funcs = { .unregister = drm_client_release, }; void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i; int last_valid_bit; - int ret; amdgpu_amdkfd_gpuvm_init_mem_limits(); @@ -165,12 +164,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) .enable_mes = adev->enable_mes, }; - ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs); - if (ret) { - dev_err(adev->dev, "Failed to init DRM client: %d\n", ret); - return; - } - /* this is going to have a few of the MSBs set that we need to * clear */ @@ -209,10 +202,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, &gpu_resources); - if (adev->kfd.init_complete) - drm_client_register(&adev->kfd.client); - else - drm_client_release(&adev->kfd.client); amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 00eed8c10cd4..b2c6f2b3c0fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -40,6 +40,8 @@ extern uint64_t amdgpu_amdkfd_total_mem_size; +extern const struct drm_client_funcs kfd_client_funcs; + enum TLB_FLUSH_TYPE { TLB_FLUSH_LEGACY = 0, TLB_FLUSH_LIGHTWEIGHT, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 0d0aa4b798ac..d0b98343481d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2293,6 +2293,17 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, drm_fbdev_generic_setup(adev_to_drm(adev), 32); } + if (adev->kfd.init_complete) { + ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", + &kfd_client_funcs); + if (ret) { + dev_err(adev->dev, "Failed to init DRM client: %d\n", + ret); + goto err_pci; + } + drm_client_register(&adev->kfd.client); + } + ret = amdgpu_debugfs_init(adev); if (ret) DRM_ERROR("Creating debugfs files failed (%d).\n", ret); -- 2.38.1
[PATCH 1/1] drm/amdgpu: enable interrupt prior to kfd device_init
This patch is to eliminate interrupt warning below: "[drm] Fence fallback timer expired on ring sdma0.0". An early vm pt clearing job is sent to SDMA ahead of interrupt enabled, introduced by patch below: - drm/amdkfd: Export DMABufs from KFD using GEM handles Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 56d9dfa61290..c8aa07282366 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2833,12 +2833,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) goto init_failed; - /* Don't init kfd if whole hive need to be reset during init */ - if (!adev->gmc.xgmi.pending_reset) { - kgd2kfd_init_zone_device(adev); - amdgpu_amdkfd_device_init(adev); - } - amdgpu_fru_get_product_info(adev); init_failed: @@ -4204,6 +4198,12 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_fence_driver_hw_init(adev); + /* Don't init kfd if whole hive need to be reset during init */ + if (!adev->gmc.xgmi.pending_reset) { + kgd2kfd_init_zone_device(adev); + amdgpu_amdkfd_device_init(adev); + } + dev_info(adev->dev, "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n", adev->gfx.config.max_shader_engines, -- 2.38.1
[PATCH 2/3 v2] drm/amdgpu: add debug flag to place fw bo on vram for frontdoor loading
Use debug_mask=0x8 param to help isolating data path issues on new systems in early phase. v2: rename the flag for explicitness (lijo) Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 6 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 3 ++- 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 616b6c911767..3d8a48f46b01 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1144,6 +1144,7 @@ struct amdgpu_device { booldebug_vm; booldebug_largebar; booldebug_disable_soft_recovery; + booldebug_use_vram_fw_buf; }; static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 880137774b4e..0776b0c5e4e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -128,6 +128,7 @@ enum AMDGPU_DEBUG_MASK { AMDGPU_DEBUG_VM = BIT(0), AMDGPU_DEBUG_LARGEBAR = BIT(1), AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2), + AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3), }; unsigned int amdgpu_vram_limit = UINT_MAX; @@ -2117,6 +2118,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) pr_info("debug: soft reset for GPU recovery disabled\n"); adev->debug_disable_soft_recovery = true; } + + if (amdgpu_debug_mask & AMDGPU_DEBUG_USE_VRAM_FW_BUF) { + pr_info("debug: place fw in vram for frontdoor loading\n"); + adev->debug_use_vram_fw_buf = true; + } } static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 51bfe3757c89..215994409ac1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -467,7 +467,7 @@ static int psp_sw_init(void *handle) } ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - amdgpu_sriov_vf(adev) ? + (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, &psp->fw_pri_bo, &psp->fw_pri_mc_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 0efb2568cb65..3e12763e477a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -1062,7 +1062,8 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev) { if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) { amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE, - amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, + (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ? + AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, &adev->firmware.fw_buf, &adev->firmware.fw_buf_mc, &adev->firmware.fw_buf_ptr); -- 2.38.1
[PATCH 3/3] drm/amdgpu: move debug options init prior to amdgpu device init
To bring debug options into effect in early initialization phase Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 3e0e39a1b5ba..b67ffc3a9a3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2234,6 +2234,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, ddev); + amdgpu_init_debug_options(adev); + ret = amdgpu_driver_load_kms(adev, flags); if (ret) goto err_pci; @@ -2314,8 +2316,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, amdgpu_get_secondary_funcs(adev); } - amdgpu_init_debug_options(adev); - return 0; err_pci: -- 2.38.1
[PATCH 2/3] drm/amdgpu: add debug flag to change fw bo placement for frontdoor loading
se debug_mask=0x8 param to help isolating data path issues on new systems in early phase. Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 6 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 3 ++- 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 616b6c911767..c740825cf5dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1144,6 +1144,7 @@ struct amdgpu_device { booldebug_vm; booldebug_largebar; booldebug_disable_soft_recovery; + booldebug_change_fw_placement; }; static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 880137774b4e..3e0e39a1b5ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -128,6 +128,7 @@ enum AMDGPU_DEBUG_MASK { AMDGPU_DEBUG_VM = BIT(0), AMDGPU_DEBUG_LARGEBAR = BIT(1), AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2), + AMDGPU_DEBUG_CHANGE_FW_PLACEMENT = BIT(3), }; unsigned int amdgpu_vram_limit = UINT_MAX; @@ -2117,6 +2118,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) pr_info("debug: soft reset for GPU recovery disabled\n"); adev->debug_disable_soft_recovery = true; } + + if (amdgpu_debug_mask & AMDGPU_DEBUG_CHANGE_FW_PLACEMENT) { + pr_info("debug: place fw in vram for frontdoor loading\n"); + adev->debug_change_fw_placement = true; + } } static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 51bfe3757c89..a9f2d33fa54b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -467,7 +467,7 @@ static int psp_sw_init(void *handle) } ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - amdgpu_sriov_vf(adev) ? + (amdgpu_sriov_vf(adev) || adev->debug_change_fw_placement) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, &psp->fw_pri_bo, &psp->fw_pri_mc_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 0efb2568cb65..8d36a7c1c789 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -1062,7 +1062,8 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev) { if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) { amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE, - amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, + (amdgpu_sriov_vf(adev) || adev->debug_change_fw_placement) ? + AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, &adev->firmware.fw_buf, &adev->firmware.fw_buf_mc, &adev->firmware.fw_buf_ptr); -- 2.38.1
[PATCH 1/3] Revert "drm/amdgpu: add param to specify fw bo location for front-door loading"
This reverts commit 1797d97a977280ac2cec95b1b59acc5856fea11e. Will use debug module param instead of independent module param. Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 - drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 3 +-- 4 files changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9da14436a373..616b6c911767 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -254,8 +254,6 @@ extern int amdgpu_agp; extern int amdgpu_wbrf; -extern int fw_bo_location; - #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD(256*1024*1024) #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 852cec98ff26..880137774b4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -210,7 +210,6 @@ int amdgpu_seamless = -1; /* auto */ uint amdgpu_debug_mask; int amdgpu_agp = -1; /* auto */ int amdgpu_wbrf = -1; -int fw_bo_location = -1; static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); @@ -990,10 +989,6 @@ MODULE_PARM_DESC(wbrf, "Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)"); module_param_named(wbrf, amdgpu_wbrf, int, 0444); -MODULE_PARM_DESC(fw_bo_location, - "location to put firmware bo for frontdoor loading (-1 = auto (default), 0 = on ram, 1 = on vram"); -module_param(fw_bo_location, int, 0644); - /* These devices are not supported by amdgpu. * They are supported by the mach64, r128, radeon drivers */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 0993ba4a4476..51bfe3757c89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -467,7 +467,7 @@ static int psp_sw_init(void *handle) } ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - (amdgpu_sriov_vf(adev) || fw_bo_location == 1) ? + amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, &psp->fw_pri_bo, &psp->fw_pri_mc_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index d334e42fe0eb..0efb2568cb65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -1062,8 +1062,7 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev) { if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) { amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE, - (amdgpu_sriov_vf(adev) || fw_bo_location == 1) ? - AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, + amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, &adev->firmware.fw_buf, &adev->firmware.fw_buf_mc, &adev->firmware.fw_buf_ptr); -- 2.38.1
[PATCH 1/1] drm/amdgpu: add param to specify fw bo location for front-door loading
This param can help isolating data path issues on new systems in early phase. Change-Id: I0a972dd74fe2aad6b56628cea32ad72dcd17e283 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 + drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 3 ++- 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 616b6c911767..9da14436a373 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -254,6 +254,8 @@ extern int amdgpu_agp; extern int amdgpu_wbrf; +extern int fw_bo_location; + #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD(256*1024*1024) #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 880137774b4e..852cec98ff26 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -210,6 +210,7 @@ int amdgpu_seamless = -1; /* auto */ uint amdgpu_debug_mask; int amdgpu_agp = -1; /* auto */ int amdgpu_wbrf = -1; +int fw_bo_location = -1; static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); @@ -989,6 +990,10 @@ MODULE_PARM_DESC(wbrf, "Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)"); module_param_named(wbrf, amdgpu_wbrf, int, 0444); +MODULE_PARM_DESC(fw_bo_location, + "location to put firmware bo for frontdoor loading (-1 = auto (default), 0 = on ram, 1 = on vram"); +module_param(fw_bo_location, int, 0644); + /* These devices are not supported by amdgpu. * They are supported by the mach64, r128, radeon drivers */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 1bf975b8d083..2addbdf88394 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -466,7 +466,7 @@ static int psp_sw_init(void *handle) } ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - amdgpu_sriov_vf(adev) ? + (amdgpu_sriov_vf(adev) || fw_bo_location == 1) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, &psp->fw_pri_bo, &psp->fw_pri_mc_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index b14127429f30..1f67914568f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -1062,7 +1062,8 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev) { if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) { amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE, - amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, + (amdgpu_sriov_vf(adev) || fw_bo_location == 1) ? + AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, &adev->firmware.fw_buf, &adev->firmware.fw_buf_mc, &adev->firmware.fw_buf_ptr); -- 2.38.1
[PATCH 1/1] drm/amdgpu: finalizing mem_partitions at the end of GMC v9 sw_fini
The valid num_mem_partitions is required during ttm pool fini, thus move the cleanup at the end of the function. Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index bde25eb4ed8e..c1f2f166f064 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -2170,8 +2170,6 @@ static int gmc_v9_0_sw_fini(void *handle) if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) amdgpu_gmc_sysfs_fini(adev); - adev->gmc.num_mem_partitions = 0; - kfree(adev->gmc.mem_partitions); amdgpu_gmc_ras_fini(adev); amdgpu_gem_force_release(adev); @@ -2185,6 +2183,9 @@ static int gmc_v9_0_sw_fini(void *handle) amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0); amdgpu_bo_fini(adev); + adev->gmc.num_mem_partitions = 0; + kfree(adev->gmc.mem_partitions); + return 0; } -- 2.38.1
[PATCH 1/1] drm/amd/pm: raise the deep sleep clock threshold for smu 13.0.6
The DS clock may exceed the limit as sclk dfll divider is 16 to target freq. Signed-off-by: Le Ma Reviewed-by: Lijo Lazar --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 20f66e696f87..83e1228e6eee 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -94,7 +94,7 @@ MODULE_FIRMWARE("amdgpu/smu_13_0_6.bin"); #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0x5 #define LINK_SPEED_MAX 4 -#define SMU_13_0_6_DSCLK_THRESHOLD 100 +#define SMU_13_0_6_DSCLK_THRESHOLD 140 #define MCA_BANK_IPID(_ip, _hwid, _type) \ [AMDGPU_MCA_IP_##_ip] = { .hwid = _hwid, .mcatype = _type, } -- 2.38.1
[PATCH 1/1] drm/amd/pm: fix the print_clk_levels issue for SMU v13.0.6
Pass the correct size to smu_v13_0_6_print_clks, otherwise the same place in buf will be re-written. Change-Id: Ia0e12430d01146a11490204c1bab4b4f06cd17ea Signed-off-by: Le Ma --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 24 +-- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 11a6cd96c601..19c117eb5ebe 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -821,12 +821,12 @@ static int smu_v13_0_6_get_current_clk_freq_by_table(struct smu_context *smu, return smu_v13_0_6_get_smu_metrics_data(smu, member_type, value); } -static int smu_v13_0_6_print_clks(struct smu_context *smu, char *buf, +static int smu_v13_0_6_print_clks(struct smu_context *smu, char *buf, int size, struct smu_13_0_dpm_table *single_dpm_table, uint32_t curr_clk, const char *clk_name) { struct pp_clock_levels_with_latency clocks; - int i, ret, size = 0, level = -1; + int i, ret, level = -1; uint32_t clk1, clk2; ret = smu_v13_0_6_get_clk_table(smu, &clocks, single_dpm_table); @@ -947,8 +947,8 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context *smu, single_dpm_table = &(dpm_context->dpm_tables.uclk_table); - return smu_v13_0_6_print_clks(smu, buf, single_dpm_table, now, - "mclk"); + return smu_v13_0_6_print_clks(smu, buf, size, single_dpm_table, + now, "mclk"); case SMU_SOCCLK: ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_SOCCLK, @@ -961,8 +961,8 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context *smu, single_dpm_table = &(dpm_context->dpm_tables.soc_table); - return smu_v13_0_6_print_clks(smu, buf, single_dpm_table, now, - "socclk"); + return smu_v13_0_6_print_clks(smu, buf, size, single_dpm_table, + now, "socclk"); case SMU_FCLK: ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_FCLK, @@ -975,8 +975,8 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context *smu, single_dpm_table = &(dpm_context->dpm_tables.fclk_table); - return smu_v13_0_6_print_clks(smu, buf, single_dpm_table, now, - "fclk"); + return smu_v13_0_6_print_clks(smu, buf, size, single_dpm_table, + now, "fclk"); case SMU_VCLK: ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_VCLK, @@ -989,8 +989,8 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context *smu, single_dpm_table = &(dpm_context->dpm_tables.vclk_table); - return smu_v13_0_6_print_clks(smu, buf, single_dpm_table, now, - "vclk"); + return smu_v13_0_6_print_clks(smu, buf, size, single_dpm_table, + now, "vclk"); case SMU_DCLK: ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_DCLK, @@ -1003,8 +1003,8 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context *smu, single_dpm_table = &(dpm_context->dpm_tables.dclk_table); - return smu_v13_0_6_print_clks(smu, buf, single_dpm_table, now, - "dclk"); + return smu_v13_0_6_print_clks(smu, buf, size, single_dpm_table, + now, "dclk"); default: break; -- 2.38.1
[PATCH 7/7] drm/amd/pm: deprecate allow_xgmi_power_down interface
Replace with set_plpd_mode uniformly for places to use. Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 4 ++-- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 14 -- drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 2 -- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 17 - drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 8 5 files changed, 2 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 061534e845a7..1fb3f1ecfa7d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1072,7 +1072,7 @@ static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) dev_warn(adev->dev, "Failed to disallow df cstate"); - if (amdgpu_dpm_allow_xgmi_power_down(adev, false)) + if (amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DISALLOW)) dev_warn(adev->dev, "Failed to disallow XGMI power down"); ret = psp_ras_trigger_error(&adev->psp, block_info, instance_mask); @@ -1080,7 +1080,7 @@ static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, if (amdgpu_ras_intr_triggered()) return ret; - if (amdgpu_dpm_allow_xgmi_power_down(adev, true)) + if (amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DEFAULT)) dev_warn(adev->dev, "Failed to allow XGMI power down"); if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW)) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 9a157fe4cbc7..1b17a71ed45e 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -351,20 +351,6 @@ int amdgpu_dpm_set_df_cstate(struct amdgpu_device *adev, return ret; } -int amdgpu_dpm_allow_xgmi_power_down(struct amdgpu_device *adev, bool en) -{ - struct smu_context *smu = adev->powerplay.pp_handle; - int ret = 0; - - if (is_support_sw_smu(adev)) { - mutex_lock(&adev->pm.mutex); - ret = smu_allow_xgmi_power_down(smu, en); - mutex_unlock(&adev->pm.mutex); - } - - return ret; -} - int amdgpu_dpm_get_xgmi_plpd_mode(struct amdgpu_device *adev, char **mode_desc) { struct smu_context *smu = adev->powerplay.pp_handle; diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 564494f29717..feccd2a7120d 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -424,8 +424,6 @@ int amdgpu_dpm_baco_enter(struct amdgpu_device *adev); int amdgpu_dpm_set_df_cstate(struct amdgpu_device *adev, uint32_t cstate); -int amdgpu_dpm_allow_xgmi_power_down(struct amdgpu_device *adev, bool en); - int amdgpu_dpm_get_xgmi_plpd_mode(struct amdgpu_device *adev, char **mode); diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 1c6b22638bf4..33eaf0d77163 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2178,23 +2178,6 @@ static int smu_set_df_cstate(void *handle, return ret; } -int smu_allow_xgmi_power_down(struct smu_context *smu, bool en) -{ - int ret = 0; - - if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) - return -EOPNOTSUPP; - - if (!smu->ppt_funcs || !smu->ppt_funcs->allow_xgmi_power_down) - return 0; - - ret = smu->ppt_funcs->allow_xgmi_power_down(smu, en); - if (ret) - dev_err(smu->adev->dev, "[AllowXgmiPowerDown] failed!\n"); - - return ret; -} - int smu_write_watermarks_table(struct smu_context *smu) { if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index e17169f681e8..4f6df3558b9b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -834,12 +834,6 @@ struct pptable_funcs { */ int (*set_df_cstate)(struct smu_context *smu, enum pp_df_cstate state); - /** -* @allow_xgmi_power_down: Enable/disable external global memory -* interconnect power down. -*/ - int (*allow_xgmi_power_down)(struct smu_context *smu, bool en); - /** * @select_xgmi_plpd_policy: Select xgmi per-link power down policy. */ @@ -1491,8 +1485,6 @@ int smu_set_gfx_power_up_by_imu(struct smu_context *smu); int smu_set_ac_dc(struct smu_context *smu); -int smu_allow_xgmi_power_down(struct smu_context *smu, bool
[PATCH 6/7] drm/amd/pm: integrate plpd allow/disallow into select_xgmi_plpd_policy in ppt level
The allow_xgmi_power_down(true/false) will be generally replaced by: - allow: select_xgmi_plpd_policy(XGMI_PLPD_DEFAULT) - disallow: select_xgmi_plpd_policy(XGMI_PLPD_DISALLOW) Signed-off-by: Le Ma --- .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 21 + .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c| 23 --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 22 ++ 3 files changed, 38 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 080140a0f673..6e2e665ad383 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -2227,7 +2227,8 @@ static int arcturus_set_df_cstate(struct smu_context *smu, return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_DFCstateControl, state, NULL); } -static int arcturus_allow_xgmi_power_down(struct smu_context *smu, bool en) +static int arcturus_select_xgmi_plpd_policy(struct smu_context *smu, + enum pp_xgmi_plpd_mode mode) { uint32_t smu_version; int ret; @@ -2244,16 +2245,16 @@ static int arcturus_allow_xgmi_power_down(struct smu_context *smu, bool en) return -EINVAL; } - if (en) + if (mode == XGMI_PLPD_DEFAULT) return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GmiPwrDnControl, - 1, - NULL); - - return smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_GmiPwrDnControl, - 0, - NULL); + 1, NULL); + else if (mode == XGMI_PLPD_DISALLOW) + return smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_GmiPwrDnControl, + 0, NULL); + else + return -EINVAL; } static const struct throttling_logging_label { @@ -2455,7 +2456,7 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq, .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range, .set_df_cstate = arcturus_set_df_cstate, - .allow_xgmi_power_down = arcturus_allow_xgmi_power_down, + .select_xgmi_plpd_policy = arcturus_select_xgmi_plpd_policy, .log_thermal_throttling_event = arcturus_log_thermal_throttling_event, .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, .set_pp_feature_mask = smu_cmn_set_pp_feature_mask, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 23820204efd7..b57184a3e24f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1604,20 +1604,27 @@ static int aldebaran_set_df_cstate(struct smu_context *smu, return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_DFCstateControl, state, NULL); } -static int aldebaran_allow_xgmi_power_down(struct smu_context *smu, bool en) +static int aldebaran_select_xgmi_plpd_policy(struct smu_context *smu, +enum pp_xgmi_plpd_mode mode) { struct amdgpu_device *adev = smu->adev; /* The message only works on master die and NACK will be sent back for other dies, only send it on master die */ - if (!adev->smuio.funcs->get_socket_id(adev) && - !adev->smuio.funcs->get_die_id(adev)) + if (adev->smuio.funcs->get_socket_id(adev) || + adev->smuio.funcs->get_die_id(adev)) + return 0; + + if (mode == XGMI_PLPD_DEFAULT) + return smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_GmiPwrDnControl, + 0, NULL); + else if (mode == XGMI_PLPD_DISALLOW) return smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_GmiPwrDnControl, - en ? 0 : 1, - NULL); + SMU_MSG_GmiPwrDnControl, + 1, NULL); else - return 0; + return -EINVAL; } static const struct throttling_logging_label { @@ -2072,7 +2079,7 @@ static const struct pptable_funcs aldebaran_ppt_funcs = { .set_soft_freq_limited_range = aldebaran_set_soft_freq_limited_range, .od_edit_dpm_table = aldebaran_usr_edit_dpm_table, .set_
[PATCH 4/7] drm/amd/pm: add xgmi_plpd_policy sysfs node for user to change plpd policy
Add xgmi_plpd_policy sysfs node for users to check and select xgmi per-link power down policy: - arg 0: disallow plpd - arg 1: default policy - arg 2: optimized policy v2: split from smu v13.0.6 code and miscellaneous updates v3: add usage comments around set/get functions Signed-off-by: Le Ma Reviewed-by: Asad Kamal --- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 43 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 68 + drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 5 ++ 3 files changed, 116 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 07853162..9a157fe4cbc7 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -365,6 +365,49 @@ int amdgpu_dpm_allow_xgmi_power_down(struct amdgpu_device *adev, bool en) return ret; } +int amdgpu_dpm_get_xgmi_plpd_mode(struct amdgpu_device *adev, char **mode_desc) +{ + struct smu_context *smu = adev->powerplay.pp_handle; + int mode = XGMI_PLPD_NONE; + + if (is_support_sw_smu(adev)) { + mode = smu->plpd_mode; + if (mode_desc == NULL) + return mode; + switch (smu->plpd_mode) { + case XGMI_PLPD_DISALLOW: + *mode_desc = "disallow"; + break; + case XGMI_PLPD_DEFAULT: + *mode_desc = "default"; + break; + case XGMI_PLPD_OPTIMIZED: + *mode_desc = "optimized"; + break; + case XGMI_PLPD_NONE: + default: + *mode_desc = "none"; + break; + } + } + + return mode; +} + +int amdgpu_dpm_set_xgmi_plpd_mode(struct amdgpu_device *adev, int mode) +{ + struct smu_context *smu = adev->powerplay.pp_handle; + int ret = -EOPNOTSUPP; + + if (is_support_sw_smu(adev)) { + mutex_lock(&adev->pm.mutex); + ret = smu_set_xgmi_plpd_mode(smu, mode); + mutex_unlock(&adev->pm.mutex); + } + + return ret; +} + int amdgpu_dpm_enable_mgpu_fan_boost(struct amdgpu_device *adev) { void *pp_handle = adev->powerplay.pp_handle; diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 2807f3bd9ebe..745f5b6a2826 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -1991,6 +1991,70 @@ static int ss_bias_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ return 0; } +/* Following items will be read out to indicate current plpd policy: + * - -1: none + * - 0: disallow + * - 1: default + * - 2: optimized + */ +static ssize_t amdgpu_get_xgmi_plpd_policy(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + char *mode_desc = "none"; + int mode; + + if (amdgpu_in_reset(adev)) + return -EPERM; + if (adev->in_suspend && !adev->in_runpm) + return -EPERM; + + mode = amdgpu_dpm_get_xgmi_plpd_mode(adev, &mode_desc); + + return sysfs_emit(buf, "%d: %s\n", mode, mode_desc); +} + +/* Following argument value is expected from user to change plpd policy + * - arg 0: disallow plpd + * - arg 1: default policy + * - arg 2: optimized policy + */ +static ssize_t amdgpu_set_xgmi_plpd_policy(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + int mode, ret; + + if (amdgpu_in_reset(adev)) + return -EPERM; + if (adev->in_suspend && !adev->in_runpm) + return -EPERM; + + ret = kstrtos32(buf, 0, &mode); + if (ret) + return -EINVAL; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); + return ret; + } + + ret = amdgpu_dpm_set_xgmi_plpd_mode(adev, mode); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + if (ret) + return ret; + + return count; +} + static struct amdgpu_device_attr amdgpu_device_attrs[] = { AMDGPU_DEVICE_ATTR_RW(power_dpm_state, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), AMDGPU_DEVICE_ATTR_RW(power_dpm_f
[PATCH 5/7] drm/amd/pm: init plpd_mode properly for different asics
Assign DEFAULT mode if it supports plpd, otherwise keeps NONE v2: reduce ip version checks Signed-off-by: Le Ma --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 17 + 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 893359b26418..1c6b22638bf4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1130,6 +1130,21 @@ static void smu_swctf_delayed_work_handler(struct work_struct *work) orderly_poweroff(true); } +static void smu_init_xgmi_plpd_mode(struct smu_context *smu) +{ + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(11, 0, 2)) { + smu->plpd_mode = XGMI_PLPD_DEFAULT; + return; + } + + /* PMFW put PLPD into default policy after enabling the feature */ + if (smu_feature_is_enabled(smu, + SMU_FEATURE_XGMI_PER_LINK_PWR_DWN_BIT)) + smu->plpd_mode = XGMI_PLPD_DEFAULT; + else + smu->plpd_mode = XGMI_PLPD_NONE; +} + static int smu_sw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1361,6 +1376,8 @@ static int smu_smc_hw_setup(struct smu_context *smu) return ret; } + smu_init_xgmi_plpd_mode(smu); + ret = smu_feature_get_enabled_mask(smu, &features_supported); if (ret) { dev_err(adev->dev, "Failed to retrieve supported dpm features!\n"); -- 2.38.1
[PATCH 3/7] drm/amd/pm: add xgmi plpd mode selecting interface for smu v13.0.6
Add the interface to change xgmi per-link power down policy. v2: split from sysfs interface code and miscellaneous updates v3: check against XGMI_PLPD_DEFAULT/XGMI_PLPD_OPTIMIZED and pass PPSMC param Signed-off-by: Le Ma Reviewed-by: Asad Kamal --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 24 ++ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 9 ++ drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 3 +- .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 31 +++ 4 files changed, 66 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index e6f1620acdd4..893359b26418 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -3157,6 +3157,30 @@ static int smu_get_prv_buffer_details(void *handle, void **addr, size_t *size) return 0; } +int smu_set_xgmi_plpd_mode(struct smu_context *smu, + enum pp_xgmi_plpd_mode mode) +{ + int ret = -EOPNOTSUPP; + + if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) + return ret; + + /* PLPD policy is not supported if it's NONE */ + if (smu->plpd_mode == XGMI_PLPD_NONE) + return ret; + + if (smu->plpd_mode == mode) + return 0; + + if (smu->ppt_funcs && smu->ppt_funcs->select_xgmi_plpd_policy) + ret = smu->ppt_funcs->select_xgmi_plpd_policy(smu, mode); + + if (!ret) + smu->plpd_mode = mode; + + return ret; +} + static const struct amd_pm_funcs swsmu_pm_funcs = { /* export for sysfs */ .set_fan_control_mode= smu_set_fan_control_mode, diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 5356b91c6292..e17169f681e8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -840,6 +840,12 @@ struct pptable_funcs { */ int (*allow_xgmi_power_down)(struct smu_context *smu, bool en); + /** +* @select_xgmi_plpd_policy: Select xgmi per-link power down policy. +*/ + int (*select_xgmi_plpd_policy)(struct smu_context *smu, + enum pp_xgmi_plpd_mode mode); + /** * @update_pcie_parameters: Update and upload the system's PCIe * capabilites to the SMU. @@ -1487,6 +1493,9 @@ int smu_set_ac_dc(struct smu_context *smu); int smu_allow_xgmi_power_down(struct smu_context *smu, bool en); +int smu_set_xgmi_plpd_mode(struct smu_context *smu, + enum pp_xgmi_plpd_mode mode); + int smu_get_entrycount_gfxoff(struct smu_context *smu, u64 *value); int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value); diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index 7c300b4d95c7..4850e48bbef5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -252,7 +252,8 @@ __SMU_DUMMY_MAP(QueryValidMcaCount),\ __SMU_DUMMY_MAP(QueryValidMcaCeCount), \ __SMU_DUMMY_MAP(McaBankDumpDW), \ - __SMU_DUMMY_MAP(McaBankCeDumpDW), + __SMU_DUMMY_MAP(McaBankCeDumpDW), \ + __SMU_DUMMY_MAP(SelectPLPDMode), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 11a6cd96c601..b137c37903fc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -170,6 +170,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(QueryValidMcaCeCount, PPSMC_MSG_QueryValidMcaCeCount,0), MSG_MAP(McaBankDumpDW, PPSMC_MSG_McaBankDumpDW, 0), MSG_MAP(McaBankCeDumpDW, PPSMC_MSG_McaBankCeDumpDW, 0), + MSG_MAP(SelectPLPDMode, PPSMC_MSG_SelectPLPDMode, 0), }; static const struct cmn2asic_mapping smu_v13_0_6_clk_map[SMU_CLK_COUNT] = { @@ -2716,6 +2717,35 @@ static const struct amdgpu_mca_smu_funcs smu_v13_0_6_mca_smu_funcs = { .mca_get_ras_mca_idx_array = mca_smu_get_ras_mca_idx_array, }; +static int smu_v13_0_6_select_xgmi_plpd_policy(struct smu_context *smu, + enum pp_xgmi_plpd_mode mode) +{ + struct amdgpu_device *adev = smu->adev; + int ret, param; + + switch (mode) { + case XGMI_PLPD_DEFAULT: + param = PPSMC_PLPD_MODE_DEFAULT; + break; + case XGMI_PLPD_OPTIMIZED: + pa
[PATCH 2/7] drm/amd/pm: add plpd_mode in smu_context to indicate current mode
Add enum pp_xgmi_plpd_mode to describe PLPD policies. v2: move the enum from amdgpu_smu.h to kgd_pp_interface.h Signed-off-by: Le Ma Reviewed-by: Asad Kamal --- drivers/gpu/drm/amd/include/kgd_pp_interface.h | 8 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 2 ++ 2 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 5a889f733462..e0bb6d39f0c3 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -265,6 +265,14 @@ enum pp_power_type PP_PWR_TYPE_FAST, }; +enum pp_xgmi_plpd_mode { + XGMI_PLPD_NONE = -1, + XGMI_PLPD_DISALLOW, + XGMI_PLPD_DEFAULT, + XGMI_PLPD_OPTIMIZED, + XGMI_PLPD_COUNT, +}; + #define PP_GROUP_MASK0xF000 #define PP_GROUP_SHIFT 28 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 5a52098bcf16..5356b91c6292 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -563,6 +563,8 @@ struct smu_context { u32 debug_resp_reg; struct delayed_work swctf_delayed_work; + + enum pp_xgmi_plpd_mode plpd_mode; }; struct i2c_adapter; -- 2.38.1
[PATCH 1/7] drm/amd/pm: update pmfw headers for version 85.73.0
To add message to select PLPD mode. Signed-off-by: Le Ma Reviewed-by: Asad Kamal --- drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h index 021dcbe58473..509e3cd483fb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h @@ -90,6 +90,7 @@ #define PPSMC_MSG_ClearMcaOnRead0x39 #define PPSMC_MSG_QueryValidMcaCeCount 0x3A #define PPSMC_MSG_McaBankCeDumpDW 0x3B +#define PPSMC_MSG_SelectPLPDMode0x40 #define PPSMC_Message_Count 0x41 //PPSMC Reset Types for driver msg argument @@ -107,6 +108,10 @@ #define PPSMC_XCD_THM_TYPE 0x3 #define PPSMC_HBM_THM_TYPE 0x4 +//PLPD modes +#define PPSMC_PLPD_MODE_DEFAULT 0x1 +#define PPSMC_PLPD_MODE_OPTIMIZED 0x2 + typedef uint32_t PPSMC_Result; typedef uint32_t PPSMC_MSG; -- 2.38.1
[PATCH 2/2] drm/amdgpu: update gc_info v2_1 from discovery
Several new fields are exposed in gc_info v2_1 Change-Id: Ib320e8ca70de8960634e1a22aaf2611ba7ebbaeb Signed-off-by: Le Ma Reviewed-by: Shiwu Zhang --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 10 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 3 ++ drivers/gpu/drm/amd/include/discovery.h | 30 +++ 3 files changed, 43 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index f4cd43ce251b..6ffdfc5a7c0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1390,6 +1390,7 @@ union gc_info { struct gc_info_v1_1 v1_1; struct gc_info_v1_2 v1_2; struct gc_info_v2_0 v2; + struct gc_info_v2_1 v2_1; }; static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) @@ -1465,6 +1466,15 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) / le32_to_cpu(gc_info->v2.gc_num_sh_per_se); adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc); + if (gc_info->v2.header.version_minor == 1) { + adev->gfx.config.gc_num_tcp_per_sa = le32_to_cpu(gc_info->v2_1.gc_num_tcp_per_sh); + adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v2_1.gc_tcp_size_per_cu); + adev->gfx.config.gc_num_sdp_interface = le32_to_cpu(gc_info->v2_1.gc_num_sdp_interface); /* per XCD */ + adev->gfx.config.gc_num_cu_per_sqc = le32_to_cpu(gc_info->v2_1.gc_num_cu_per_sqc); + adev->gfx.config.gc_l1_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v2_1.gc_instruction_cache_size_per_sqc); + adev->gfx.config.gc_l1_data_cache_size_per_sqc = le32_to_cpu(gc_info->v2_1.gc_scalar_data_cache_size_per_sqc); + adev->gfx.config.gc_tcc_size = le32_to_cpu(gc_info->v2_1.gc_tcc_size); /* per XCD */ + } break; default: dev_err(adev->dev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index a4ff515ce896..395c1768b9fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -241,6 +241,9 @@ struct amdgpu_gfx_config { uint32_t gc_gl1c_per_sa; uint32_t gc_gl1c_size_per_instance; uint32_t gc_gl2c_per_gpu; + uint32_t gc_tcp_size_per_cu; + uint32_t gc_num_cu_per_sqc; + uint32_t gc_tcc_size; }; struct amdgpu_cu_info { diff --git a/drivers/gpu/drm/amd/include/discovery.h b/drivers/gpu/drm/amd/include/discovery.h index b9884e576f98..7a9d473d0917 100644 --- a/drivers/gpu/drm/amd/include/discovery.h +++ b/drivers/gpu/drm/amd/include/discovery.h @@ -280,6 +280,36 @@ struct gc_info_v2_0 { uint32_t gc_num_packer_per_sc; }; +struct gc_info_v2_1 { + struct gpu_info_header header; + + uint32_t gc_num_se; + uint32_t gc_num_cu_per_sh; + uint32_t gc_num_sh_per_se; + uint32_t gc_num_rb_per_se; + uint32_t gc_num_tccs; + uint32_t gc_num_gprs; + uint32_t gc_num_max_gs_thds; + uint32_t gc_gs_table_depth; + uint32_t gc_gsprim_buff_depth; + uint32_t gc_parameter_cache_depth; + uint32_t gc_double_offchip_lds_buffer; + uint32_t gc_wave_size; + uint32_t gc_max_waves_per_simd; + uint32_t gc_max_scratch_slots_per_cu; + uint32_t gc_lds_size; + uint32_t gc_num_sc_per_se; + uint32_t gc_num_packer_per_sc; + /* new for v2_1 */ + uint32_t gc_num_tcp_per_sh; + uint32_t gc_tcp_size_per_cu; + uint32_t gc_num_sdp_interface; + uint32_t gc_num_cu_per_sqc; + uint32_t gc_instruction_cache_size_per_sqc; + uint32_t gc_scalar_data_cache_size_per_sqc; + uint32_t gc_tcc_size; +}; + typedef struct harvest_info_header { uint32_t signature; /* Table Signature */ uint32_t version; /* Table Version */ -- 2.38.1
[PATCH 1/2] drm/amdgpu: update mall info v2 from discovery
Mall info v2 is introduced in ip discovery Change-Id: Ia2e49e7679c578065f85059a077fc08c9f84615c Signed-off-by: Le Ma Reviewed-by: Shiwu Zhang --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 5 + drivers/gpu/drm/amd/include/discovery.h | 8 +++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 9d8d08daca57..f4cd43ce251b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1478,6 +1478,7 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) union mall_info { struct mall_info_v1_0 v1; + struct mall_info_v2_0 v2; }; static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev) @@ -1518,6 +1519,10 @@ static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev) adev->gmc.mall_size = mall_size; adev->gmc.m_half_use = half_use; break; + case 2: + mall_size_per_umc = le32_to_cpu(mall_info->v2.mall_size_per_umc); + adev->gmc.mall_size = mall_size_per_umc * adev->gmc.num_umc; + break; default: dev_err(adev->dev, "Unhandled MALL info table %d.%d\n", diff --git a/drivers/gpu/drm/amd/include/discovery.h b/drivers/gpu/drm/amd/include/discovery.h index f43e29722ef7..b9884e576f98 100644 --- a/drivers/gpu/drm/amd/include/discovery.h +++ b/drivers/gpu/drm/amd/include/discovery.h @@ -30,7 +30,7 @@ #define GC_TABLE_ID 0x4347 #define HARVEST_TABLE_SIGNATURE 0x56524148 #define VCN_INFO_TABLE_ID 0x004E4356 -#define MALL_INFO_TABLE_ID 0x4D414C4C +#define MALL_INFO_TABLE_ID 0x4C4C414D typedef enum { @@ -312,6 +312,12 @@ struct mall_info_v1_0 { uint32_t reserved[5]; }; +struct mall_info_v2_0 { + struct mall_info_header header; + uint32_t mall_size_per_umc; + uint32_t reserved[8]; +}; + #define VCN_INFO_TABLE_MAX_NUM_INSTANCES 4 struct vcn_info_header { -- 2.38.1
[PATCH 1/1] drm/amdgpu: remove duplicated doorbell range init for sdma v4.4.2
Handled in earlier phase Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 5 - 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 6be19ffc502b..f413898dda37 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -902,11 +902,6 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, WREG32_SDMA(i, regSDMA_CNTL, temp); if (!amdgpu_sriov_vf(adev)) { - ring = &adev->sdma.instance[i].ring; - adev->nbio.funcs->sdma_doorbell_range(adev, i, - ring->use_doorbell, ring->doorbell_index, - adev->doorbell_index.sdma_doorbell_range); - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { /* unhalt engine */ temp = RREG32_SDMA(i, regSDMA_F32_CNTL); -- 2.38.1
[PATCH 1/1] drm/amdgpu/pm: notify driver unloading to PMFW for SMU v13.0.6 dGPU
Per requested, follow the same sequence as APU to send only PPSMC_MSG_PrepareForDriverUnload to PMFW during driver unloading. Change-Id: I2dc8495572b0bce6e21eafb51b215c83d94ac647 Signed-off-by: Le Ma Reviewed-by: Shiwu Zhang Reviewed-by: Lijo Lazar --- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 16 +++- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 3da614faf75d..392ccebc8dac 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -1409,18 +1409,16 @@ static int smu_v13_0_6_system_features_control(struct smu_context *smu, bool enable) { struct amdgpu_device *adev = smu->adev; - int ret; - - /* On APUs, notify FW that the device is no longer driver managed */ - if (adev->flags & AMD_IS_APU) { - if (!enable) - smu_v13_0_6_notify_unload(smu); + int ret = 0; - return 0; + if (enable) { + if (!(adev->flags & AMD_IS_APU)) + ret = smu_v13_0_system_features_control(smu, enable); + } else { + /* Notify FW that the device is no longer driver managed */ + smu_v13_0_6_notify_unload(smu); } - ret = smu_v13_0_system_features_control(smu, enable); - return ret; } -- 2.38.1
[PATCH] drm/amdgpu: correct the memcpy size for ip discovery firmware
Use fw->size instead of discovery_tmr_size for fallback path. Change-Id: I61f1ec55314ea5948ed3ef821becfdd63d876272 Signed-off-by: Le Ma Acked-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 309d35026222..0b4f4d2f8d32 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -234,7 +234,7 @@ static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, ui return r; } - memcpy((u8 *)binary, (u8 *)fw->data, adev->mman.discovery_tmr_size); + memcpy((u8 *)binary, (u8 *)fw->data, fw->size); release_firmware(fw); return 0; -- 2.17.1
[PATCH] drm/amdgpu: correct register access for RLC_JUMP_TABLE_RESTORE
From: Le Ma should count on GC IP base address Signed-off-by: Le Ma Signed-off-by: Hawking Zhang Reviewed-by: Hawking Zhang --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index b305fd39874f..edb3e3b08eed 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3070,8 +3070,8 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev) AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_GDS | AMD_PG_SUPPORT_RLC_SMU_HS)) { - WREG32(mmRLC_JUMP_TABLE_RESTORE, - adev->gfx.rlc.cp_table_gpu_addr >> 8); + WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE, +adev->gfx.rlc.cp_table_gpu_addr >> 8); gfx_v9_0_init_gfx_power_gating(adev); } } -- 2.17.1
[PATCH 1/1] drm/amdgpu: fix ctx init failure for asics without gfx ring
This workaround does not affect other asics because amdgpu only need expose one gfx sched to user for now. Change-Id: Ica92b8565a89899aebe0eba7b2b5a25159b411d3 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 63f6365..64e2bab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -127,7 +127,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, switch (i) { case AMDGPU_HW_IP_GFX: - scheds = adev->gfx.gfx_sched; + sched = &adev->gfx.gfx_ring[0].sched; + scheds = &sched; num_scheds = 1; break; case AMDGPU_HW_IP_COMPUTE: -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 07/10 v2] drm/amdgpu: add concurrent baco reset support for XGMI
Currently each XGMI node reset wq does not run in parrallel because same work item bound to same cpu runs in sequence. So change to bound the xgmi_reset_work item to different cpus. XGMI requires all nodes enter into baco within very close proximity before any node exit baco. So schedule the xgmi_reset_work wq twice for enter/exit baco respectively. To use baco for XGMI, PMFW supported for baco on XGMI needs to be involved. The case that PSP reset and baco reset coexist within an XGMI hive never exist and is not in the consideration. v2: define use_baco flag to simplify the code for xgmi baco sequence Change-Id: I9c08cf90134f940b42e20d2129ff87fba761c532 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 82 +- 2 files changed, 72 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d120fe5..08929e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -998,6 +998,8 @@ struct amdgpu_device { int pstate; /* enable runtime pm on the device */ boolrunpm; + + boolin_baco; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index bd387bb..5367134 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2654,7 +2654,13 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) struct amdgpu_device *adev = container_of(__work, struct amdgpu_device, xgmi_reset_work); - adev->asic_reset_res = amdgpu_asic_reset(adev); + if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) + adev->asic_reset_res = (adev->in_baco == false) ? + amdgpu_device_baco_enter(adev->ddev) : + amdgpu_device_baco_exit(adev->ddev); + else + adev->asic_reset_res = amdgpu_asic_reset(adev); + if (adev->asic_reset_res) DRM_WARN("ASIC reset failed with error, %d for drm dev, %s", adev->asic_reset_res, adev->ddev->unique); @@ -3789,13 +3795,18 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, return r; } -static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, +static int amdgpu_do_asic_reset(struct amdgpu_device *adev, + struct amdgpu_hive_info *hive, struct list_head *device_list_handle, bool *need_full_reset_arg) { struct amdgpu_device *tmp_adev = NULL; bool need_full_reset = *need_full_reset_arg, vram_lost = false; int r = 0; + int cpu = smp_processor_id(); + bool use_baco = + (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ? + true : false; /* * ASIC reset has to be done on all HGMI hive nodes ASAP @@ -3803,21 +3814,24 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, */ if (need_full_reset) { list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { - /* For XGMI run all resets in parallel to speed up the process */ + /* +* For XGMI run all resets in parallel to speed up the +* process by scheduling the highpri wq on different +* cpus. For XGMI with baco reset, all nodes must enter +* baco within close proximity before anyone exit. +*/ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { - if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work)) + if (!queue_work_on(cpu, system_highpri_wq, + &tmp_adev->xgmi_reset_work)) r = -EALREADY; + cpu = cpumask_next(cpu, cpu_online_mask); } else r = amdgpu_asic_reset(tmp_adev); - - if (r) { - DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s", -r, tmp_adev->ddev->unique); + if (r) break; - } } - /* For XGMI wait for all PSP resets to complete before proceed */ + /* For
[PATCH 06/10 v2] drm/amdgpu: add condition to enable baco for ras recovery
Switch to baco reset method for ras recovery if baco-supported PMFW ready. If not, keep the original reset method. Change-Id: I07c3e6862be03e068745c73db8ea71f428ecba6b Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/soc15.c | 18 -- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 951327f..e8c5a00 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -557,7 +557,8 @@ static int soc15_mode2_reset(struct amdgpu_device *adev) static enum amd_reset_method soc15_asic_reset_method(struct amdgpu_device *adev) { - bool baco_reset; + bool baco_reset = false; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); switch (adev->asic_type) { case CHIP_RAVEN: @@ -571,18 +572,15 @@ soc15_asic_reset_method(struct amdgpu_device *adev) case CHIP_VEGA20: if (adev->psp.sos_fw_version >= 0x80067) soc15_asic_get_baco_capability(adev, &baco_reset); - else - baco_reset = false; - if (baco_reset) { - struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); - struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - if (hive || (ras && ras->supported)) - baco_reset = false; - } + /* +* 1. PMFW version > 0x284300: all cases use baco +* 2. PMFW version <= 0x284300: only sGPU w/o RAS use baco +*/ + if ((ras && ras->supported) && adev->pm.fw_version <= 0x283400) + baco_reset = false; break; default: - baco_reset = false; break; } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 10/10 v3] drm/amdgpu: reduce redundant uvd context lost warning message
Move the print out of uvd instance loop in amdgpu_uvd_suspend v2: drop unnecessary brackets v3: grab ras_intr state once for multiple times use Change-Id: Ifad997debd84763e1b55d668e144b729598f115e Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index e324bfe..d587ffe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -349,6 +349,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) unsigned size; void *ptr; int i, j; + bool in_ras_intr = amdgpu_ras_intr_triggered(); cancel_delayed_work_sync(&adev->uvd.idle_work); @@ -376,13 +377,15 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) return -ENOMEM; /* re-write 0 since err_event_athub will corrupt VCPU buffer */ - if (amdgpu_ras_intr_triggered()) { - DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n"); + if (in_ras_intr) memset(adev->uvd.inst[j].saved_bo, 0, size); - } else { + else memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size); - } } + + if (in_ras_intr) + DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n"); + return 0; } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 10/10 v2] drm/amdgpu: reduce redundant uvd context lost warning message
Move the print out of uvd instance loop in amdgpu_uvd_suspend v2: drop unnecessary brackets Change-Id: Ifad997debd84763e1b55d668e144b729598f115e Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index e324bfe..69248ecb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -376,13 +376,15 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) return -ENOMEM; /* re-write 0 since err_event_athub will corrupt VCPU buffer */ - if (amdgpu_ras_intr_triggered()) { - DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n"); + if (amdgpu_ras_intr_triggered()) memset(adev->uvd.inst[j].saved_bo, 0, size); - } else { + else memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size); - } } + + if (amdgpu_ras_intr_triggered()) + DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n"); + return 0; } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 08/10] drm/amdgpu: support full gpu reset workflow when ras err_event_athub occurs
This athub fatal error can be recovered by baco without system-level reboot, so add a mode to use baco for the recovery. Not affect the default psp reset situations for now. Change-Id: Ib17f2a39254ff6b0473a785752adfdfea79d0e0d Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 +++-- 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 71abfe9..53e9590 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4021,12 +4021,15 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_device *tmp_adev = NULL; int i, r = 0; bool in_ras_intr = amdgpu_ras_intr_triggered(); + bool use_baco = + (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ? + true : false; /* * Flush RAM to disk so that after reboot * the user can read log and see why the system rebooted. */ - if (in_ras_intr && amdgpu_ras_get_context(adev)->reboot) { + if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) { DRM_WARN("Emergency reboot."); @@ -4037,7 +4040,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, need_full_reset = job_signaled = false; INIT_LIST_HEAD(&device_list); - dev_info(adev->dev, "GPU %s begin!\n", in_ras_intr ? "jobs stop":"reset"); + dev_info(adev->dev, "GPU %s begin!\n", + (in_ras_intr && !use_baco) ? "jobs stop":"reset"); cancel_delayed_work_sync(&adev->delayed_init_work); @@ -4104,7 +4108,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, amdgpu_unregister_gpu_instance(tmp_adev); /* disable ras on ALL IPs */ - if (!in_ras_intr && amdgpu_device_ip_need_full_reset(tmp_adev)) + if (!(in_ras_intr && !use_baco) && + amdgpu_device_ip_need_full_reset(tmp_adev)) amdgpu_ras_suspend(tmp_adev); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { @@ -4115,13 +4120,13 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, drm_sched_stop(&ring->sched, job ? &job->base : NULL); - if (in_ras_intr) + if (in_ras_intr && !use_baco) amdgpu_job_stop_all_jobs_on_sched(&ring->sched); } } - if (in_ras_intr) + if (in_ras_intr && !use_baco) goto skip_sched_resume; /* @@ -4214,7 +4219,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, skip_sched_resume: list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { /*unlock kfd: SRIOV would do it separately */ - if (!in_ras_intr && !amdgpu_sriov_vf(tmp_adev)) + if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev)) amdgpu_amdkfd_post_reset(tmp_adev); amdgpu_device_unlock_adev(tmp_adev); } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 07/10] drm/amdgpu: add concurrent baco reset support for XGMI
Currently each XGMI node reset wq does not run in parrallel because same work item bound to same cpu runs in sequence. So change to bound the xgmi_reset_work item to different cpus. XGMI requires all nodes enter into baco within very close proximity before any node exit baco. So schedule the xgmi_reset_work wq twice for enter/exit baco respectively. The default reset code path and methods do not change for vega20 production: - baco reset without xgmi/ras - psp reset with xgmi/ras To enable baco for XGMI/RAS case, both 2 conditions below are needed: - amdgpu_ras_enable=2 - baco-supported smu firmware The case that PSP reset and baco reset coexist within an XGMI hive is not in the consideration. Change-Id: I9c08cf90134f940b42e20d2129ff87fba761c532 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 78 ++ 2 files changed, 70 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d120fe5..08929e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -998,6 +998,8 @@ struct amdgpu_device { int pstate; /* enable runtime pm on the device */ boolrunpm; + + boolin_baco; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index bd387bb..71abfe9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2654,7 +2654,13 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) struct amdgpu_device *adev = container_of(__work, struct amdgpu_device, xgmi_reset_work); - adev->asic_reset_res = amdgpu_asic_reset(adev); + if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) + adev->asic_reset_res = (adev->in_baco == false) ? + amdgpu_device_baco_enter(adev->ddev) : + amdgpu_device_baco_exit(adev->ddev); + else + adev->asic_reset_res = amdgpu_asic_reset(adev); + if (adev->asic_reset_res) DRM_WARN("ASIC reset failed with error, %d for drm dev, %s", adev->asic_reset_res, adev->ddev->unique); @@ -3796,6 +3802,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, struct amdgpu_device *tmp_adev = NULL; bool need_full_reset = *need_full_reset_arg, vram_lost = false; int r = 0; + int cpu = smp_processor_id(); /* * ASIC reset has to be done on all HGMI hive nodes ASAP @@ -3803,21 +3810,24 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, */ if (need_full_reset) { list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { - /* For XGMI run all resets in parallel to speed up the process */ + /* +* For XGMI run all resets in parallel to speed up the +* process by scheduling the highpri wq on different +* cpus. For XGMI with baco reset, all nodes must enter +* baco within close proximity before anyone exit. +*/ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { - if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work)) + if (!queue_work_on(cpu, system_highpri_wq, + &tmp_adev->xgmi_reset_work)) r = -EALREADY; + cpu = cpumask_next(cpu, cpu_online_mask); } else r = amdgpu_asic_reset(tmp_adev); - - if (r) { - DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s", -r, tmp_adev->ddev->unique); + if (r) break; - } } - /* For XGMI wait for all PSP resets to complete before proceed */ + /* For XGMI wait for all work to complete before proceed */ if (!r) { list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { @@ -3826,11 +3836,59 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, r = tmp_adev->asic_reset_res;
[PATCH 02/10] drm/amdgpu: export amdgpu_ras_find_obj to use externally
Change it to external interface. Change-Id: I2ab61f149c84a05a6f883a4c7415ea8012ec03a6 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 5 + drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 +++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 1593564..04394c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -198,9 +198,6 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, return 0; } -static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, - struct ras_common_if *head); - /** * DOC: AMDGPU RAS debugfs control interface * @@ -445,7 +442,7 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev, } /* return an obj equal to head, or the first when head is NULL */ -static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, +struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, struct ras_common_if *head) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index f80fd34..a2c1ac1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -611,6 +611,9 @@ int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev, int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev, struct ras_dispatch_if *info); +struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, + struct ras_common_if *head); + extern atomic_t amdgpu_ras_in_intr; static inline bool amdgpu_ras_intr_triggered(void) -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 09/10] drm/amdgpu: clear err_event_athub flag after reset exit
Otherwise next err_event_athub error cannot call gpu reset. And following resume sequence will not be affected by this flag. v2: create function to clear amdgpu_ras_in_intr for modularity of ras driver Change-Id: I5cd293f30f23876bf2a1860681bcb50f47713ecd Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h| 5 + 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 53e9590..8387b44 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3890,6 +3890,9 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, } } + if (!r && amdgpu_ras_intr_triggered()) + amdgpu_ras_intr_cleared(); + list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { if (need_full_reset) { /* post card */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index a2c1ac1..d4ade47 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -621,6 +621,11 @@ static inline bool amdgpu_ras_intr_triggered(void) return !!atomic_read(&amdgpu_ras_in_intr); } +static inline void amdgpu_ras_intr_cleared(void) +{ + atomic_set(&amdgpu_ras_in_intr, 0); +} + void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev); #endif -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 05/10] drm/amdgpu: enable/disable doorbell interrupt in baco entry/exit helper
This operation is needed when baco entry/exit for ras recovery Change-Id: I535c7231693f3138a8e3d5acd55672e2ac68232f Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 19 --- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b1408c5..bd387bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4308,10 +4308,14 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) int amdgpu_device_baco_enter(struct drm_device *dev) { struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); if (!amdgpu_device_supports_baco(adev->ddev)) return -ENOTSUPP; + if (ras && ras->supported) + adev->nbio.funcs->enable_doorbell_interrupt(adev, false); + if (is_support_sw_smu(adev)) { struct smu_context *smu = &adev->smu; int ret; @@ -4319,8 +4323,6 @@ int amdgpu_device_baco_enter(struct drm_device *dev) ret = smu_baco_enter(smu); if (ret) return ret; - - return 0; } else { void *pp_handle = adev->powerplay.pp_handle; const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; @@ -4331,14 +4333,15 @@ int amdgpu_device_baco_enter(struct drm_device *dev) /* enter BACO state */ if (pp_funcs->set_asic_baco_state(pp_handle, 1)) return -EIO; - - return 0; } + + return 0; } int amdgpu_device_baco_exit(struct drm_device *dev) { struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); if (!amdgpu_device_supports_baco(adev->ddev)) return -ENOTSUPP; @@ -4351,7 +4354,6 @@ int amdgpu_device_baco_exit(struct drm_device *dev) if (ret) return ret; - return 0; } else { void *pp_handle = adev->powerplay.pp_handle; const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; @@ -4362,7 +4364,10 @@ int amdgpu_device_baco_exit(struct drm_device *dev) /* exit BACO state */ if (pp_funcs->set_asic_baco_state(pp_handle, 0)) return -EIO; - - return 0; } + + if (ras && ras->supported) + adev->nbio.funcs->enable_doorbell_interrupt(adev, false); + + return 0; } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 01/10] drm/amdgpu: remove ras global recovery handling from ras_controller_int handler
From: Le Ma v2: add notification when ras controller interrupt generates Change-Id: Ic03e42e9d1c4dab1fa7f4817c191a16e485b48a9 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 0db458f..25231d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -324,7 +324,12 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device RAS_CNTLR_INTERRUPT_CLEAR, 1); WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); - amdgpu_ras_global_ras_isr(adev); + DRM_WARN("RAS controller interrupt triggered by NBIF error\n"); + + /* ras_controller_int is dedicated for nbif ras error, +* not the global interrupt for sync flood +*/ + amdgpu_ras_reset_gpu(adev, true); } } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 10/10] drm/amdgpu: reduce redundant uvd context lost warning message
Move the print out of uvd instance loop in amdgpu_uvd_suspend Change-Id: Ifad997debd84763e1b55d668e144b729598f115e Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index e324bfe..ac7c7795 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -377,12 +377,15 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) /* re-write 0 since err_event_athub will corrupt VCPU buffer */ if (amdgpu_ras_intr_triggered()) { - DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n"); memset(adev->uvd.inst[j].saved_bo, 0, size); } else { memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size); } } + + if (amdgpu_ras_intr_triggered()) { + DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n"); + return 0; } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 06/10] drm/amdgpu: add condition to enable baco for xgmi/ras case
Avoid to change default reset behavior for production card by checking amdgpu_ras_enable equal to 2. And only new enough smu ucode can support baco for xgmi/ras case. Change-Id: I07c3e6862be03e068745c73db8ea71f428ecba6b Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/soc15.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 951327f..6202333 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -577,7 +577,9 @@ soc15_asic_reset_method(struct amdgpu_device *adev) struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - if (hive || (ras && ras->supported)) + if ((hive || (ras && ras->supported)) && + (amdgpu_ras_enable != 2 || + adev->pm.fw_version <= 0x283400)) baco_reset = false; } break; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 03/10] drm/amdgpu: clear ras controller status registers when interrupt occurs
To fix issue that ras controller interrupt cannot be triggered anymore after one time nbif uncorrectable error. And error count is stored in nbif ras object for query. Change-Id: Iba482c169fdff3e9c390072c0289a622a522133c Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 25231d6..9a3a65a 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -52,6 +52,9 @@ #define BIF_MMSCH1_DOORBELL_RANGE__OFFSET_MASK 0x0FFCL #define BIF_MMSCH1_DOORBELL_RANGE__SIZE_MASK0x001FL +static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status); + static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev) { WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, @@ -314,6 +317,7 @@ static void nbio_v7_4_init_registers(struct amdgpu_device *adev) static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev) { uint32_t bif_doorbell_intr_cntl; + struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if); bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); if (REG_GET_FIELD(bif_doorbell_intr_cntl, @@ -324,6 +328,12 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device RAS_CNTLR_INTERRUPT_CLEAR, 1); WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); + /* +* clear error status after ras_controller_intr according to +* hw team and count ue number for query +*/ + nbio_v7_4_query_ras_error_count(adev, &obj->err_data); + DRM_WARN("RAS controller interrupt triggered by NBIF error\n"); /* ras_controller_int is dedicated for nbif ras error, -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/1] drm/amdgpu: fix no ACK from LDS read during stress test for Arcturus
Set mmSQ_CONFIG.DISABLE_SMEM_SOFT_CLAUSE as W/R. Change-Id: I6225909fd62702427fbb807e0c6ba6bafcfa41d5 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 5e7a01c..07962ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -699,6 +699,7 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fff, 0x2ebd9fe3), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fff, 0xb90f5b1), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0x, 0x011A), }; static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/1] drm/amdgpu: add missing amdgpu_ras.h header include
Fix compilation error. Change-Id: I461c558778f9a52378269324dc41b8d639f3ccbe Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index fce206f..bbe9ac7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -34,6 +34,8 @@ #include "psp_v11_0.h" #include "psp_v12_0.h" +#include "amdgpu_ras.h" + static void psp_set_funcs(struct amdgpu_device *adev); static int psp_early_init(void *handle) -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 4/4] drm/amdgpu: remove ras global recovery handling from ras_controller_int handler
From: Le Ma Change-Id: Ia8a61a4b3bd529f0f691e43e69b299d7d151c0c2 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 0db458f..876690a 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -324,7 +324,11 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device RAS_CNTLR_INTERRUPT_CLEAR, 1); WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); - amdgpu_ras_global_ras_isr(adev); + /* +* ras_controller_int is dedicated for nbif ras error, +* not the global interrupt for sync flood +*/ + amdgpu_ras_reset_gpu(adev, true); } } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/4] drm/amdgpu: clear UVD VCPU buffer when err_event_athub generated
The err_event_athub error will mess up the buffer and cause UVD resume hang. Change-Id: If17a2161fb9b1b52eac08de00d2e935191bdbf99 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index b2c364b..b4dd89a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -39,6 +39,8 @@ #include "cikd.h" #include "uvd/uvd_4_2_d.h" +#include "amdgpu_ras.h" + /* 1 second timeout */ #define UVD_IDLE_TIMEOUT msecs_to_jiffies(1000) @@ -372,7 +374,13 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) if (!adev->uvd.inst[j].saved_bo) return -ENOMEM; - memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size); + /* re-write 0 since err_event_athub will corrupt VCPU buffer */ + if (amdgpu_ras_intr_triggered()) { + DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n"); + memset(adev->uvd.inst[j].saved_bo, 0, size); + } else { + memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size); + } } return 0; } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 3/4] drm/amdgpu: bypass some cleanup work after err_event_athub
PSP lost connection when err_event_athub occurs. These cleanup work can be skipped in BACO reset. Change-Id: If54a3735edd6ccbb58d40a5f8833392981f8ce37 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c| 7 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 20 +++- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 -- 4 files changed, 28 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 51d74bb..72d9892 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2274,6 +2274,12 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) /* displays are handled in phase1 */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) continue; + /* PSP lost connection when err_event_athub occurs */ + if (amdgpu_ras_intr_triggered() && + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { + adev->ip_blocks[i].status.hw = false; + continue; + } /* XXX handle errors */ r = adev->ip_blocks[i].version->funcs->suspend(adev); /* XXX handle errors */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index fd7a73f..fce206f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -167,6 +167,13 @@ psp_cmd_submit_buf(struct psp_context *psp, while (*((unsigned int *)psp->fence_buf) != index) { if (--timeout == 0) break; + /* +* Shouldn't wait for timeout when err_event_athub occurs, +* because gpu reset thread triggered and lock resource should +* be released for psp resume sequence. +*/ + if (amdgpu_ras_intr_triggered()) + break; msleep(1); amdgpu_asic_invalidate_hdp(psp->adev, NULL); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 796326b..dab90c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -558,15 +558,17 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) return 0; - ret = psp_ras_enable_features(&adev->psp, &info, enable); - if (ret) { - DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n", - enable ? "enable":"disable", - ras_block_str(head->block), - ret); - if (ret == TA_RAS_STATUS__RESET_NEEDED) - return -EAGAIN; - return -EINVAL; + if (!amdgpu_ras_intr_triggered()) { + ret = psp_ras_enable_features(&adev->psp, &info, enable); + if (ret) { + DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n", + enable ? "enable":"disable", + ras_block_str(head->block), + ret); + if (ret == TA_RAS_STATUS__RESET_NEEDED) + return -EAGAIN; + return -EINVAL; + } } /* setup the obj */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 9fe95e7..9c2dba62 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3736,8 +3736,10 @@ static int gfx_v9_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); - /* disable KCQ to avoid CPC touch memory not valid anymore */ - gfx_v9_0_kcq_disable(adev); + /* DF freeze and kcq disable will fail */ + if (!amdgpu_ras_intr_triggered()) + /* disable KCQ to avoid CPC touch memory not valid anymore */ + gfx_v9_0_kcq_disable(adev); if (amdgpu_sriov_vf(adev)) { gfx_v9_0_cp_gfx_enable(adev, false); -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/4] drm/amdgpu: reset err_event_athub flag if gpu recovery succeeded
Otherwise next err_event_athub error cannot call gpu reset. Change-Id: I5cd293f30f23876bf2a1860681bcb50f47713ecd Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 676cad1..51d74bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4089,6 +4089,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, } } + if (!r && in_ras_intr) + atomic_set(&amdgpu_ras_in_intr, 0); + skip_sched_resume: list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { /*unlock kfd: SRIOV would do it separately */ -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/4] drm/amdgpu/soc15: disable doorbell interrupt as part of BACO entry sequence
Workaround to make RAS recovery work in BACO reset. Change-Id: I4e4a81f719dcc88dfd49f583c4be3a373b5eab2c Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h | 2 ++ drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 8 drivers/gpu/drm/amd/amdgpu/soc15.c | 9 + 3 files changed, 19 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 1f26a17..919bd56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -67,6 +67,8 @@ struct amdgpu_nbio_funcs { bool enable); void (*ih_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell, int doorbell_index); + void (*enable_doorbell_interrupt)(struct amdgpu_device *adev, + bool enable); void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, bool enable); void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 238c248..0db458f 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -502,6 +502,13 @@ static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev, } } +static void nbio_v7_4_enable_doorbell_interrupt(struct amdgpu_device *adev, + bool enable) +{ + WREG32_FIELD15(NBIO, 0, BIF_DOORBELL_INT_CNTL, + DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1); +} + const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { .get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset, @@ -516,6 +523,7 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { .enable_doorbell_aperture = nbio_v7_4_enable_doorbell_aperture, .enable_doorbell_selfring_aperture = nbio_v7_4_enable_doorbell_selfring_aperture, .ih_doorbell_range = nbio_v7_4_ih_doorbell_range, + .enable_doorbell_interrupt = nbio_v7_4_enable_doorbell_interrupt, .update_medium_grain_clock_gating = nbio_v7_4_update_medium_grain_clock_gating, .update_medium_grain_light_sleep = nbio_v7_4_update_medium_grain_light_sleep, .get_clockgating_state = nbio_v7_4_get_clockgating_state, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index fc6cfbc..5cf5f11 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -493,10 +493,15 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) { void *pp_handle = adev->powerplay.pp_handle; const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) return -ENOENT; + /* avoid NBIF got stuck when do RAS recovery in BACO reset */ + if (ras && ras->supported) + adev->nbio.funcs->enable_doorbell_interrupt(adev, false); + /* enter BACO state */ if (pp_funcs->set_asic_baco_state(pp_handle, 1)) return -EIO; @@ -505,6 +510,10 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) if (pp_funcs->set_asic_baco_state(pp_handle, 0)) return -EIO; + /* re-enable doorbell interrupt after BACO exit */ + if (ras && ras->supported) + adev->nbio.funcs->enable_doorbell_interrupt(adev, true); + dev_info(adev->dev, "GPU BACO reset\n"); adev->in_baco_reset = 1; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 3/4] drm/amd/powerplay: send EnterBaco msg with argument as RAS recovery flag
1 indicates RAS recovery flag in SMU FW. Change-Id: Icb8c14586fca1b8ae443bbde764570a9e41850fa Signed-off-by: Le Ma --- drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c index b068d1c..9b5e72b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c @@ -89,10 +89,15 @@ int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state) data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL); data |= 0x8000; WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data); - } - if(smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_EnterBaco, 0)) - return -EINVAL; + if(smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_EnterBaco, 0)) + return -EINVAL; + } else { + if(smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_EnterBaco, 1)) + return -EINVAL; + } } else if (state == BACO_STATE_OUT) { if (smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ExitBaco)) -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/4] drm/amd/powerplay: avoid disabling ECC if RAS is enabled for VEGA20
Program THM_BACO_CNTL.SOC_DOMAIN_IDLE=1 will tell VBIOS to disable ECC when BACO exit. This can save BACO exit time by PSP on none-ECC SKU. Drop the setting for ECC supported SKU. Change-Id: I2a82c128fa5e9731b886dd61f1273dc48ea1923c Signed-off-by: Le Ma --- drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c | 12 +++- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c index df6ff92..b068d1c 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c @@ -29,7 +29,7 @@ #include "vega20_baco.h" #include "vega20_smumgr.h" - +#include "amdgpu_ras.h" static const struct soc15_baco_cmd_entry clean_baco_tbl[] = { @@ -74,6 +74,7 @@ int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state) int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state) { struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); enum BACO_STATE cur_state; uint32_t data; @@ -84,10 +85,11 @@ int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state) return 0; if (state == BACO_STATE_IN) { - data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL); - data |= 0x8000; - WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data); - + if (!ras || !ras->supported) { + data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL); + data |= 0x8000; + WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data); + } if(smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_EnterBaco, 0)) return -EINVAL; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 4/4] drm/amd/powerplay: add BACO platformCaps for VEGA20
BACO reset is needed for RAS recovery. Change-Id: I8207fc314744468c89ba4a030cb2bb15b082aac7 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 6629c475..3d3c647 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -183,6 +183,9 @@ static int vega20_set_features_platform_caps(struct pp_hwmgr *hwmgr) PHM_PlatformCaps_TablelessHardwareInterface); phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_BACO); + + phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_EnableSMU7ThermalManagement); if (adev->pg_flags & AMD_PG_SUPPORT_UVD) -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/2] drm/amdgpu: correct condition check for psp rlc autoload
Change-Id: Ia91d0fb7179f6944214e892f370d7ef3d6b7d30e Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index d359f1d..2aa1ae6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1080,7 +1080,8 @@ static int psp_np_fw_load(struct psp_context *psp) return ret; /* Start rlc autoload after psp recieved all the gfx firmware */ - if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) { + if (psp->autoload_supported && ucode->ucode_id == + AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) { ret = psp_rlc_autoload(psp); if (ret) { DRM_ERROR("Failed to start rlc autoload\n"); -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/2] drm/amdgpu: add command id in psp response failure message
From: Hawking Zhang Change-Id: I88649fc5dbc7376f3c90ec2114236294ca9189de Signed-off-by: Hawking Zhang Reviewed-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index f90a0cd..d359f1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -168,8 +168,9 @@ psp_cmd_submit_buf(struct psp_context *psp, if (ucode) DRM_WARN("failed to load ucode id (%d) ", ucode->ucode_id); - DRM_WARN("psp command failed and response status is (0x%X)\n", - psp->cmd_buf_mem->resp.status & GFX_CMD_STATUS_MASK); + DRM_WARN("psp command (0x%X) failed and response status is (0x%X)\n", +psp->cmd_buf_mem->cmd_id, +psp->cmd_buf_mem->resp.status & GFX_CMD_STATUS_MASK); if (!timeout) { mutex_unlock(&psp->mutex); return -EINVAL; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/3] drm/amdgpu: add psp ip block for Arcturus
Change-Id: I6b69bfba66aa12d5486527e29a7c322336c95dd5 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/soc15.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 5bec851..dbd790e 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -755,6 +755,8 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/3] drm/amdgpu: disable vcn ip block for front door loading on Arcturus
Change-Id: Ibf137cd57659e70516bcbbe456a00ad77e60647c Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/soc15.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 7c7e9f5..5bec851 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -760,7 +760,8 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); + if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)) + amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); break; case CHIP_RENOIR: amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 3/3] drm/amdgpu: enable psp front door loading by default on Arcturus
Change-Id: I13a5f590d5a49655965a13eb7ce773d1efffcbd0 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 82f6b41..fce1f71 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -360,6 +360,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) case CHIP_RAVEN: case CHIP_VEGA12: case CHIP_VEGA20: + case CHIP_ARCTURUS: case CHIP_RENOIR: case CHIP_NAVI10: case CHIP_NAVI14: @@ -368,8 +369,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) return AMDGPU_FW_LOAD_DIRECT; else return AMDGPU_FW_LOAD_PSP; - case CHIP_ARCTURUS: - return AMDGPU_FW_LOAD_DIRECT; default: DRM_ERROR("Unknown firmware load type\n"); -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/4] drm/amdgpu: increase CGCG gfx idle threshold for Arcturus
Follow the hw spec, and no need to consider gfxoff on Arcturus Change-Id: Ib9cad79b1b9c096014447fc0a7d29cdb594e15e3 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 78150ff..9b85a73 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4677,8 +4677,12 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev /* enable cgcg FSM(0x363F) */ def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); - data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | - RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + if (adev->asic_type == CHIP_ARCTURUS) + data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + else + data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/1] drm/amdgpu/powerplay: update Arcturus smu version in new place
Follow patch below: drm/amd/powerplay: re-define smu interface version for smu v11 Change-Id: Id78651209adc7a094f4c19ba965dcded37dd3ba7 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/powerplay/arcturus_ppt.c | 1 - drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h | 2 +- drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h| 1 + drivers/gpu/drm/amd/powerplay/smu_v11_0.c| 3 +++ 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index cff3777..e6fcbdf 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -1918,6 +1918,5 @@ void arcturus_set_ppt_funcs(struct smu_context *smu) struct smu_table_context *smu_table = &smu->smu_table; smu->ppt_funcs = &arcturus_ppt_funcs; - smu->smc_if_version = SMU11_DRIVER_IF_VERSION; smu_table->table_count = TABLE_COUNT; } diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h index c7a7953..b99e98c 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h @@ -27,7 +27,7 @@ // *** IMPORTANT *** // SMU TEAM: Always increment the interface version if // any structure is changed in this file -#define SMU11_DRIVER_IF_VERSION 0x08 +//#define SMU11_DRIVER_IF_VERSION 0x08 #define PPTABLE_ARCTURUS_SMU_VERSION 4 diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h index ee8542d..acbb83d 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h @@ -27,6 +27,7 @@ #define SMU11_DRIVER_IF_VERSION_INV 0x #define SMU11_DRIVER_IF_VERSION_VG20 0x13 +#define SMU11_DRIVER_IF_VERSION_ARCT 0x08 #define SMU11_DRIVER_IF_VERSION_NV10 0x33 #define SMU11_DRIVER_IF_VERSION_NV14 0x34 diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index 91dfae1..3b8e58e 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -275,6 +275,9 @@ static int smu_v11_0_check_fw_version(struct smu_context *smu) case CHIP_VEGA20: smu->smc_if_version = SMU11_DRIVER_IF_VERSION_VG20; break; + case CHIP_ARCTURUS: + smu->smc_if_version = SMU11_DRIVER_IF_VERSION_ARCT; + break; case CHIP_NAVI10: smu->smc_if_version = SMU11_DRIVER_IF_VERSION_NV10; break; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 4/4] drm/amdgpu: enable mmhub clock gating for Arcturus
Init MC_MGCG/LS flag. Also apply to athub CG. Change-Id: Ic00cb8e6d69eb75dd32f34f778352cee93063ee0 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 1 - drivers/gpu/drm/amd/amdgpu/soc15.c | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index e52e4d1..0cf7ef4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -615,7 +615,6 @@ int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev, return 0; } -/* TODO: get 2 mmhub instances CG state */ void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags) { int data, data1; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index aecba1c..235cb5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1126,7 +1126,9 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_HDP_MGCG | AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_SDMA_MGCG | - AMD_CG_SUPPORT_SDMA_LS; + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS; adev->pg_flags = 0; adev->external_rev_id = adev->rev_id + 0x32; break; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/4] drm/amdgpu: add GFX_CP_LS flag to Arcturus
Missed AMD_CG_SUPPORT_GFX_CP_LS accidently when commit patch before drm/amdgpu: enable gfx clock gating for Arcturus Change-Id: I9d70319dd07f7d642416cb260f9f5b3342b6f3f2 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/soc15.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 261493a..aecba1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1122,6 +1122,7 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_GFX_MGLS | AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_HDP_MGCG | AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_SDMA_MGCG | -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 3/4] drm/amdgpu: add mmhub clock gating for Arcturus
Add 2 mmhub instances CG Change-Id: I76ab7a50cd9a40de3022f733787b42e4e5c4dbf5 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 12 +-- drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 126 drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h | 3 + 3 files changed, 135 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index cccb6e9..44ac122 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1465,9 +1465,9 @@ static int gmc_v9_0_set_clockgating_state(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; if (adev->asic_type == CHIP_ARCTURUS) - return 0; - - mmhub_v1_0_set_clockgating(adev, state); + mmhub_v9_4_set_clockgating(adev, state); + else + mmhub_v1_0_set_clockgating(adev, state); athub_v1_0_set_clockgating(adev, state); @@ -1479,9 +1479,9 @@ static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags) struct amdgpu_device *adev = (struct amdgpu_device *)handle; if (adev->asic_type == CHIP_ARCTURUS) - return; - - mmhub_v1_0_get_clockgating(adev, flags); + mmhub_v9_4_get_clockgating(adev, flags); + else + mmhub_v1_0_get_clockgating(adev, flags); athub_v1_0_get_clockgating(adev, flags); } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 33b0de5..e52e4d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -515,3 +515,129 @@ void mmhub_v9_4_init(struct amdgpu_device *adev) i * MMHUB_INSTANCE_REGISTER_OFFSET; } } + +static void mmhub_v9_4_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data, def1, data1; + int i, j; + int dist = mmDAGB1_CNTL_MISC2 - mmDAGB0_CNTL_MISC2; + + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { + def = data = RREG32_SOC15_OFFSET(MMHUB, 0, + mmATCL2_0_ATC_L2_MISC_CG, + i * MMHUB_INSTANCE_REGISTER_OFFSET); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) + data |= ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK; + else + data &= ~ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK; + + if (def != data) + WREG32_SOC15_OFFSET(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG, + i * MMHUB_INSTANCE_REGISTER_OFFSET, data); + + for (j = 0; j < 5; j++) { + def1 = data1 = RREG32_SOC15_OFFSET(MMHUB, 0, + mmDAGB0_CNTL_MISC2, + i * MMHUB_INSTANCE_REGISTER_OFFSET + + j * dist); + if (enable && + (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { + data1 &= + ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + } else { + data1 |= + (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + } + + if (def1 != data1) + WREG32_SOC15_OFFSET(MMHUB, 0, + mmDAGB0_CNTL_MISC2, + i * MMHUB_INSTANCE_REGISTER_OFFSET + + j * dist, data1); + + if (i == 1 && j == 3) + break; + } + } +} + +static void mmhub_v9_4_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + int i; + + for (i = 0;
[PATCH 1/1] drm/amdgpu: split athub clock gating from mmhub
Untie the bind of get/set athub CG state from mmhub, for cosmetic fix and Asic not using mmhub 1.0. Besides, also fix wrong athub CG state in amdgpu_pm_info. Change-Id: I4ba970cae558ad5163e93fa9bc77f589196a22b1 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/Makefile | 1 + drivers/gpu/drm/amd/amdgpu/athub_v1_0.c | 103 drivers/gpu/drm/amd/amdgpu/athub_v1_0.h | 30 ++ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 9 ++- drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 55 - 5 files changed, 154 insertions(+), 44 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/athub_v1_0.c create mode 100644 drivers/gpu/drm/amd/amdgpu/athub_v1_0.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 8afa0bc..464bfe5 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -154,6 +154,7 @@ amdgpu-y += \ # add ATHUB block amdgpu-y += \ + athub_v1_0.o \ athub_v2_0.o # add amdkfd interfaces diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c new file mode 100644 index 000..d9cc746 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c @@ -0,0 +1,103 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "athub_v1_0.h" + +#include "athub/athub_1_0_offset.h" +#include "athub/athub_1_0_sh_mask.h" +#include "vega10_enum.h" + +#include "soc15_common.h" + +static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) + data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; + + if (def != data) + WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); +} + +static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && + (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) + data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + + if(def != data) + WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); +} + +int athub_v1_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) +{ + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (adev->asic_type) { + case CHIP_VEGA10: + case CHIP_VEGA12: + case CHIP_VEGA20: + case CHIP_RAVEN: + athub_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + athub_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + + return 0; +} + +void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +{ + int data; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + /* AMD_CG_SUPPORT_ATHUB_MGCG */ + data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_ATHUB_MGCG; + + /* AMD_CG_SUPPORT_ATHUB_LS */ +
[PATCH 4/9] drm/amdgpu: enable hdp clock gating for Arcturus
Init hdp MGCG/LS flag as Vega20 Change-Id: Ia33ca064f79ac409c53d3beb6f01b6e814a92041 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/soc15.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 4fbaca3..6038dce 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1017,7 +1017,9 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_GFX_MGLS | AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS | - AMD_CG_SUPPORT_GFX_CP_LS; + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS; adev->pg_flags = 0; adev->external_rev_id = adev->rev_id + 0x32; break; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 3/9] drm/amdgpu: add hdp clock gating for Arcturus
Add hdp CGLS for Arcturus in set common clockgating function Change-Id: I44e392fa5f7653908b36b0902e721d56eed3eb92 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/soc15.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 00758be..4fbaca3 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1157,7 +1157,8 @@ static void soc15_update_hdp_light_sleep(struct amdgpu_device *adev, bool enable { uint32_t def, data; - if (adev->asic_type == CHIP_VEGA20) { + if (adev->asic_type == CHIP_VEGA20 || + adev->asic_type == CHIP_ARCTURUS) { def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_CTRL)); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) @@ -1289,6 +1290,10 @@ static int soc15_common_set_clockgating_state(void *handle, soc15_update_rom_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); break; + case CHIP_ARCTURUS: + soc15_update_hdp_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; default: break; } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/9] drm/amdgpu: enable gfx clock gating for Arcturus
Init gfx MGCG/LS, CGCG/LS, CP_LS flag. Change-Id: I88db76d1b8f2b2cecce10846a4d22eec638eea8a Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/soc15.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 15f6356..00758be 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1013,7 +1013,11 @@ static int soc15_common_early_init(void *handle) break; case CHIP_ARCTURUS: adev->asic_funcs = &vega20_asic_funcs; - adev->cg_flags = 0; + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_CP_LS; adev->pg_flags = 0; adev->external_rev_id = adev->rev_id + 0x32; break; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 6/9] drm/amdgpu: add sdma clock gating for Arcturus
Add ARCTURUS case in sdma set clockgating function Change-Id: I65a3d99a140a8a76949b4d03c20bc6e0195c9854 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 9e3c63c..185dff0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2157,6 +2157,7 @@ static int sdma_v4_0_set_clockgating_state(void *handle, case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_ARCTURUS: sdma_v4_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); sdma_v4_0_update_medium_grain_light_sleep(adev, -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 7/9] drm/amdgpu: enable sdma clock gating for Arcturus
Init sdma MGCG/LS flag Change-Id: I600b8c67b1dfa74240269f2f028960b2c93a0ec2 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/soc15.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 6038dce..ad64975 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1019,7 +1019,9 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_HDP_MGCG | - AMD_CG_SUPPORT_HDP_LS; + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS; adev->pg_flags = 0; adev->external_rev_id = adev->rev_id + 0x32; break; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/9] drm/amdgpu: add gfx clock gating for Arcturus
Add ARCTURUS case in gfx set clockgating function. No 3d clock on Arcturus. Change-Id: I9893a2afea7f0b5d433baa14f48ae55a36516fac Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index fdd90c1..de3de1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4218,6 +4218,9 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, { uint32_t data, def; + if (adev->asic_type == CHIP_ARCTURUS) + return; + amdgpu_gfx_rlc_enter_safe_mode(adev); /* Enable 3D CGCG/CGLS */ @@ -4410,6 +4413,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle, case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_ARCTURUS: gfx_v9_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); break; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 5/9] drm/amdgpu: support sdma clock gating for more instances
Shorten the code with RREG32_SDMA/WREG32_SDMA macro in CG part. Change-Id: Icbf94169bb703877b105a307f14c708609faaae4 Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 105 +++-- 1 file changed, 34 insertions(+), 71 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 7acf947..9e3c63c 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2084,61 +2084,35 @@ static void sdma_v4_0_update_medium_grain_clock_gating( bool enable) { uint32_t data, def; + int i; if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { - /* enable sdma0 clock gating */ - def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL)); - data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); - if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data); - - if (adev->sdma.num_instances > 1) { - def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL)); - data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK); + for (i = 0; i < adev->sdma.num_instances; i++) { + def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL); + data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data); + WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data); } } else { - /* disable sdma0 clock gating */ - def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL)); - data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | -SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | -SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | -SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | -SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | -SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | -SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | -SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); - - if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data); - - if (adev->sdma.num_instances > 1) { - def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL)); - data |= (SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | -SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | -SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK | -SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK | -SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK | -SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | -SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | -SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK); + for (i = 0; i < adev->sdma.num_instances; i++) { + def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL); + data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | +SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | +
[PATCH libdrm 1/1] tests/amdgpu: add the missing deactivation case for dispatch test
Change-Id: I502cc5fde7f00e41d496bfba0963d4db20459e00 Signed-off-by: Le Ma --- tests/amdgpu/amdgpu_test.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/amdgpu/amdgpu_test.c b/tests/amdgpu/amdgpu_test.c index a4ce8ce..dc54155 100644 --- a/tests/amdgpu/amdgpu_test.c +++ b/tests/amdgpu/amdgpu_test.c @@ -472,9 +472,12 @@ static void amdgpu_disable_suites() fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg()); /* This test was ran on GFX9 only */ - if (family_id < AMDGPU_FAMILY_AI || family_id > AMDGPU_FAMILY_RV) - if (amdgpu_set_test_active(BASIC_TESTS_STR, "Dispatch Test", CU_FALSE)) + if (family_id < AMDGPU_FAMILY_AI || family_id > AMDGPU_FAMILY_RV) { + if (amdgpu_set_test_active(BASIC_TESTS_STR, "Dispatch Test (GFX)", CU_FALSE)) + fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg()); + if (amdgpu_set_test_active(BASIC_TESTS_STR, "Dispatch Test (Compute)", CU_FALSE)) fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg()); + } /* This test was ran on GFX9 only */ if (family_id < AMDGPU_FAMILY_AI || family_id > AMDGPU_FAMILY_RV) -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH libdrm 1/1 v2] tests/amdgpu: divide dispatch test into compute and gfx
for better clarification v2: accordingly change dispatch_test caller in gpu_reset test Change-Id: I245d760d5f9d64eb10b137d5ce375ef52a4d873a Signed-off-by: Le Ma Reviewed-by: Flora Cui --- tests/amdgpu/basic_tests.c | 19 +++ 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c index 938106e..ab2a672 100644 --- a/tests/amdgpu/basic_tests.c +++ b/tests/amdgpu/basic_tests.c @@ -55,7 +55,8 @@ static void amdgpu_userptr_test(void); static void amdgpu_semaphore_test(void); static void amdgpu_sync_dependency_test(void); static void amdgpu_bo_eviction_test(void); -static void amdgpu_dispatch_test(void); +static void amdgpu_compute_dispatch_test(void); +static void amdgpu_gfx_dispatch_test(void); static void amdgpu_draw_test(void); static void amdgpu_gpu_reset_test(void); @@ -79,7 +80,8 @@ CU_TestInfo basic_tests[] = { { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, { "SW semaphore Test", amdgpu_semaphore_test }, { "Sync dependency Test", amdgpu_sync_dependency_test }, - { "Dispatch Test", amdgpu_dispatch_test }, + { "Dispatch Test (Compute)", amdgpu_compute_dispatch_test }, + { "Dispatch Test (GFX)", amdgpu_gfx_dispatch_test }, { "Draw Test", amdgpu_draw_test }, { "GPU reset Test", amdgpu_gpu_reset_test }, CU_TEST_INFO_NULL, @@ -2448,7 +2450,8 @@ static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, r = amdgpu_cs_ctx_free(context_handle); CU_ASSERT_EQUAL(r, 0); } -static void amdgpu_dispatch_test(void) + +static void amdgpu_compute_dispatch_test(void) { int r; struct drm_amdgpu_info_hw_ip info; @@ -2463,6 +2466,13 @@ static void amdgpu_dispatch_test(void) amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id); amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id); } +} + +static void amdgpu_gfx_dispatch_test(void) +{ + int r; + struct drm_amdgpu_info_hw_ip info; + uint32_t ring_id; r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); CU_ASSERT_EQUAL(r, 0); @@ -3170,5 +3180,6 @@ static void amdgpu_gpu_reset_test(void) r = amdgpu_cs_ctx_free(context_handle); CU_ASSERT_EQUAL(r, 0); - amdgpu_dispatch_test(); + amdgpu_compute_dispatch_test(); + amdgpu_gfx_dispatch_test(); } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH libdrm 1/1] tests/amdgpu: divide dispatch test into compute and gfx
for better clarification Change-Id: I245d760d5f9d64eb10b137d5ce375ef52a4d873a Signed-off-by: Le Ma --- tests/amdgpu/basic_tests.c | 16 +--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c index 938106e..fa0f568 100644 --- a/tests/amdgpu/basic_tests.c +++ b/tests/amdgpu/basic_tests.c @@ -55,7 +55,8 @@ static void amdgpu_userptr_test(void); static void amdgpu_semaphore_test(void); static void amdgpu_sync_dependency_test(void); static void amdgpu_bo_eviction_test(void); -static void amdgpu_dispatch_test(void); +static void amdgpu_compute_dispatch_test(void); +static void amdgpu_gfx_dispatch_test(void); static void amdgpu_draw_test(void); static void amdgpu_gpu_reset_test(void); @@ -79,7 +80,8 @@ CU_TestInfo basic_tests[] = { { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, { "SW semaphore Test", amdgpu_semaphore_test }, { "Sync dependency Test", amdgpu_sync_dependency_test }, - { "Dispatch Test", amdgpu_dispatch_test }, + { "Dispatch Test (Compute)", amdgpu_compute_dispatch_test }, + { "Dispatch Test (GFX)", amdgpu_gfx_dispatch_test }, { "Draw Test", amdgpu_draw_test }, { "GPU reset Test", amdgpu_gpu_reset_test }, CU_TEST_INFO_NULL, @@ -2448,7 +2450,8 @@ static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, r = amdgpu_cs_ctx_free(context_handle); CU_ASSERT_EQUAL(r, 0); } -static void amdgpu_dispatch_test(void) + +static void amdgpu_compute_dispatch_test(void) { int r; struct drm_amdgpu_info_hw_ip info; @@ -2463,6 +2466,13 @@ static void amdgpu_dispatch_test(void) amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id); amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id); } +} + +static void amdgpu_gfx_dispatch_test(void) +{ + int r; + struct drm_amdgpu_info_hw_ip info; + uint32_t ring_id; r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); CU_ASSERT_EQUAL(r, 0); -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/1] drm/amdgpu: remove unnecessary rlc reset function on gfx9
From: Le Ma The rlc reset function is not necessary during gfx9 initialization/resume phase. And this function would even cause rlc fw loading failed on some gfx9 ASIC. Remove this function safely with verification well on Vega/Raven platform. Change-Id: I38bf0bbaf7183b7e6a53a1b63dba770de8e47d9e Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 3765d97..3c936b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2455,8 +2455,6 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) /* disable CG */ WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); - adev->gfx.rlc.funcs->reset(adev); - gfx_v9_0_init_pg(adev); if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx