Use BACO to reset the GPU if supported on SMU7 based dGPUs. v2: don't use baco on CI parts
Signed-off-by: Alex Deucher <alexander.deuc...@amd.com> --- drivers/gpu/drm/amd/amdgpu/cik.c | 48 ++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/cik.h | 3 ++ drivers/gpu/drm/amd/amdgpu/vi.c | 84 ++++++++++++++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/vi.h | 3 ++ 4 files changed, 128 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index b81bb414fcb3..fc8b34480f66 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1270,15 +1270,15 @@ static int cik_gpu_pci_config_reset(struct amdgpu_device *adev) } /** - * cik_asic_reset - soft reset GPU + * cik_asic_pci_config_reset - soft reset GPU * * @adev: amdgpu_device pointer * - * Look up which blocks are hung and attempt - * to reset them. + * Use PCI Config method to reset the GPU. + * * Returns 0 for success. */ -static int cik_asic_reset(struct amdgpu_device *adev) +static int cik_asic_pci_config_reset(struct amdgpu_device *adev) { int r; @@ -1294,7 +1294,45 @@ static int cik_asic_reset(struct amdgpu_device *adev) static enum amd_reset_method cik_asic_reset_method(struct amdgpu_device *adev) { - return AMD_RESET_METHOD_LEGACY; + bool baco_reset; + + switch (adev->asic_type) { + case CHIP_BONAIRE: + case CHIP_HAWAII: + /* disable baco reset until it works */ + /* smu7_asic_get_baco_capability(adev, &baco_reset); */ + baco_reset = false; + break; + default: + baco_reset = false; + break; + } + + if (baco_reset) + return AMD_RESET_METHOD_BACO; + else + return AMD_RESET_METHOD_LEGACY; +} + +/** + * cik_asic_reset - soft reset GPU + * + * @adev: amdgpu_device pointer + * + * Look up which blocks are hung and attempt + * to reset them. + * Returns 0 for success. + */ +static int cik_asic_reset(struct amdgpu_device *adev) +{ + int r; + + if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) + r = smu7_asic_baco_reset(adev); + else + r = cik_asic_pci_config_reset(adev); + + return r; } static u32 cik_get_config_memsize(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/cik.h b/drivers/gpu/drm/amd/amdgpu/cik.h index 54c625a2e570..9870bf27870e 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.h +++ b/drivers/gpu/drm/amd/amdgpu/cik.h @@ -31,4 +31,7 @@ void cik_srbm_select(struct amdgpu_device *adev, int cik_set_ip_blocks(struct amdgpu_device *adev); void legacy_doorbell_index_init(struct amdgpu_device *adev); +int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap); +int smu7_asic_baco_reset(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 5f8c8786cac5..78e5cdc0c058 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -689,16 +689,50 @@ static int vi_gpu_pci_config_reset(struct amdgpu_device *adev) return -EINVAL; } +int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap) +{ + void *pp_handle = adev->powerplay.pp_handle; + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + + if (!pp_funcs || !pp_funcs->get_asic_baco_capability) { + *cap = false; + return -ENOENT; + } + + return pp_funcs->get_asic_baco_capability(pp_handle, cap); +} + +int smu7_asic_baco_reset(struct amdgpu_device *adev) +{ + void *pp_handle = adev->powerplay.pp_handle; + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + + if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) + return -ENOENT; + + /* enter BACO state */ + if (pp_funcs->set_asic_baco_state(pp_handle, 1)) + return -EIO; + + /* exit BACO state */ + if (pp_funcs->set_asic_baco_state(pp_handle, 0)) + return -EIO; + + dev_info(adev->dev, "GPU BACO reset\n"); + + return 0; +} + /** - * vi_asic_reset - soft reset GPU + * vi_asic_pci_config_reset - soft reset GPU * * @adev: amdgpu_device pointer * - * Look up which blocks are hung and attempt - * to reset them. + * Use PCI Config method to reset the GPU. + * * Returns 0 for success. */ -static int vi_asic_reset(struct amdgpu_device *adev) +static int vi_asic_pci_config_reset(struct amdgpu_device *adev) { int r; @@ -714,7 +748,47 @@ static int vi_asic_reset(struct amdgpu_device *adev) static enum amd_reset_method vi_asic_reset_method(struct amdgpu_device *adev) { - return AMD_RESET_METHOD_LEGACY; + bool baco_reset; + + switch (adev->asic_type) { + case CHIP_FIJI: + case CHIP_TONGA: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_TOPAZ: + smu7_asic_get_baco_capability(adev, &baco_reset); + break; + default: + baco_reset = false; + break; + } + + if (baco_reset) + return AMD_RESET_METHOD_BACO; + else + return AMD_RESET_METHOD_LEGACY; +} + +/** + * vi_asic_reset - soft reset GPU + * + * @adev: amdgpu_device pointer + * + * Look up which blocks are hung and attempt + * to reset them. + * Returns 0 for success. + */ +static int vi_asic_reset(struct amdgpu_device *adev) +{ + int r; + + if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) + r = smu7_asic_baco_reset(adev); + else + r = vi_asic_pci_config_reset(adev); + + return r; } static u32 vi_get_config_memsize(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h index 8de0772f986c..40d4174913a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.h +++ b/drivers/gpu/drm/amd/amdgpu/vi.h @@ -31,4 +31,7 @@ void vi_srbm_select(struct amdgpu_device *adev, int vi_set_ip_blocks(struct amdgpu_device *adev); void legacy_doorbell_index_init(struct amdgpu_device *adev); +int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap); +int smu7_asic_baco_reset(struct amdgpu_device *adev); + #endif -- 2.20.1 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx