Re: [PATCH 06/13] drm/amdgpu: use the new drm_exec object for CS v2
On 6/20/23 13:07, Tatsuyuki Ishi wrote: @@ -1296,9 +1271,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, */ r = 0; amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - - r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range); + r |= !amdgpu_ttm_tt_get_user_pages_done(e->bo->tbo.ttm, + e->range); e->range = NULL; e->range = NULL; needs to be removed, or it's causing *massive* memory leaks. Actually, I quoted the wrong hunk, the correct one is below. @@ -928,18 +874,56 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, e->user_invalidated = userpage_invalidated; } - r = ttm_eu_reserve_buffers(>ticket, >validated, true, - ); - if (unlikely(r != 0)) { - if (r != -ERESTARTSYS) - DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); - goto out_free_user_pages; + drm_exec_while_not_all_locked(>exec) { + r = amdgpu_vm_lock_pd(>vm, >exec); + drm_exec_continue_on_contention(>exec); + if (unlikely(r)) + goto out_free_user_pages; + + amdgpu_bo_list_for_each_entry(e, p->bo_list) { + r = drm_exec_prepare_obj(>exec, >bo->tbo.base, 2); + drm_exec_break_on_contention(>exec); + if (unlikely(r)) + goto out_free_user_pages; + + e->bo_va = amdgpu_vm_bo_find(vm, e->bo); + e->range = NULL; This causes the leak. + } + drm_exec_continue_on_contention(>exec); + + if (p->uf_bo) { + r = drm_exec_prepare_obj(>exec, >uf_bo->tbo.base, +2); + drm_exec_continue_on_contention(>exec); + if (unlikely(r)) + goto out_free_user_pages; + } } Tatsuyuki
Re: [PATCH 06/13] drm/amdgpu: use the new drm_exec object for CS v2
+Boris and +Matthew in case you want to take over this patch set. Here are some review / testing comments, including those I posted before to ease tracking. On 5/4/23 20:51, Christian König wrote: Use the new component here as well and remove the old handling. v2: drop dupplicate handling Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 71 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 210 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h | 7 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22 -- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 - 7 files changed, 115 insertions(+), 204 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 02b827785e39..eba3e4f01ea6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -133,6 +141,8 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, list->first_userptr = first_userptr; list->num_entries = num_entries; + sort(array, last_entry, sizeof(struct amdgpu_bo_list_entry), +amdgpu_bo_list_entry_cmp, NULL); Previously amdgpu_bo_list_get_list sorted all entries, but this one only sorts userptr entries. I think this changes behavior? @@ -928,18 +874,56 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, e->user_invalidated = userpage_invalidated; } - r = ttm_eu_reserve_buffers(>ticket, >validated, true, - ); - if (unlikely(r != 0)) { - if (r != -ERESTARTSYS) - DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); - goto out_free_user_pages; + drm_exec_while_not_all_locked(>exec) { + r = amdgpu_vm_lock_pd(>vm, >exec); + drm_exec_continue_on_contention(>exec); Duplicate handling is needed for pretty much every call of amdgpu_vm_lock_pd, as bo->tbo.base.resv == vm->root.bo->tbo.base.resv for AMDGPU_GEM_CREATE_VM_ALWAYS_VALID. I think Boris's suggestion of having this through a common DRM_EXEC_FLAG_ALLOW_DUPLICATES flag fits well. + if (unlikely(r)) + goto out_free_user_pages; + + amdgpu_bo_list_for_each_entry(e, p->bo_list) { + r = drm_exec_prepare_obj(>exec, >bo->tbo.base, 2); Previously there were comments for how the fence count is calculated, now they seem to be removed. I'd prefer if they were properly retained as finding out who calls drm_resv_add_fence is not trivial, and wrong reservation count can also be really hard to debug. Likewise for amdgpu_vm_lock_pd (which was added in another commit). + drm_exec_break_on_contention(>exec); + if (unlikely(r)) + goto out_free_user_pages; + + e->bo_va = amdgpu_vm_bo_find(vm, e->bo); + e->range = NULL; + } + drm_exec_continue_on_contention(>exec); + + if (p->uf_bo) { + r = drm_exec_prepare_obj(>exec, >uf_bo->tbo.base, +2); + drm_exec_continue_on_contention(>exec); + if (unlikely(r)) + goto out_free_user_pages; + } } - amdgpu_bo_list_for_each_entry(e, p->bo_list) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + struct mm_struct *usermm; - e->bo_va = amdgpu_vm_bo_find(vm, bo); + usermm = amdgpu_ttm_tt_get_usermm(e->bo->tbo.ttm); + if (usermm && usermm != current->mm) { + r = -EPERM; + goto out_free_user_pages; + } + + if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) && + e->user_invalidated && e->user_pages) { + amdgpu_bo_placement_from_domain(e->bo, + AMDGPU_GEM_DOMAIN_CPU); + r = ttm_bo_validate(>bo->tbo, >bo->placement, + ); + if (r) + goto out_free_user_pages; + + amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm, +e->user_pages); + } + + kvfree(e->user_pages); + e->user_pages = NULL; } amdgpu_cs_get_threshold_for_moves(p->adev, >bytes_moved_threshold, @@ -1296,9 +1271,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, */ r = 0;
[PATCH] drm/amd/pm: add abnormal fan detection for smu 13.0.0
add abnormal fan detection for smu 13.0.0 Signed-off-by: Kenneth Feng --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index a6083957ae51..124287cbbff8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -1710,6 +1710,7 @@ static int smu_v13_0_0_get_thermal_temperature_range(struct smu_context *smu, range->mem_emergency_max = (pptable->SkuTable.TemperatureLimit[TEMP_MEM] + CTF_OFFSET_MEM)* SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; range->software_shutdown_temp = powerplay_table->software_shutdown_temp; + range->software_shutdown_temp_offset = pptable->SkuTable.FanAbnormalTempLimitOffset; return 0; } -- 2.34.1
Re: [PATCH v6 2/8] PCI/VGA: Deal only with VGA class devices
Hi, On 2023/6/20 02:12, Limonciello, Mario wrote: On 6/12/2023 2:25 PM, Sui Jingfeng wrote: From: Sui Jingfeng Deal only with the VGA devcie(pdev->class == 0x0300), so replace the pci_get_subsys() function with pci_get_class(). Filter the non-PCI display device(pdev->class != 0x0300) out. There no need to process the non-display PCI device. Signed-off-by: Sui Jingfeng --- This also means that deleting a PCI device no longer needs to walk the list. Reviewed-by: Mario Limonciello Thanks a lot, can you help to resend this precious R-B to the V7 of this series [1], This is V6. [1] https://patchwork.freedesktop.org/series/119250/ drivers/pci/vgaarb.c | 22 -- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c index c1bc6c983932..22a505e877dc 100644 --- a/drivers/pci/vgaarb.c +++ b/drivers/pci/vgaarb.c @@ -754,10 +754,6 @@ static bool vga_arbiter_add_pci_device(struct pci_dev *pdev) struct pci_dev *bridge; u16 cmd; - /* Only deal with VGA class devices */ - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA) - return false; - /* Allocate structure */ vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL); if (vgadev == NULL) { @@ -1500,7 +1496,9 @@ static int pci_notify(struct notifier_block *nb, unsigned long action, struct pci_dev *pdev = to_pci_dev(dev); bool notify = false; - vgaarb_dbg(dev, "%s\n", __func__); + /* Only deal with VGA class devices */ + if (pdev->class != PCI_CLASS_DISPLAY_VGA << 8) + return 0; /* For now we're only intereted in devices added and removed. I didn't * test this thing here, so someone needs to double check for the @@ -1510,6 +1508,8 @@ static int pci_notify(struct notifier_block *nb, unsigned long action, else if (action == BUS_NOTIFY_DEL_DEVICE) notify = vga_arbiter_del_pci_device(pdev); + vgaarb_dbg(dev, "%s: action = %lu\n", __func__, action); + if (notify) vga_arbiter_notify_clients(); return 0; @@ -1534,8 +1534,8 @@ static struct miscdevice vga_arb_device = { static int __init vga_arb_device_init(void) { + struct pci_dev *pdev = NULL; int rc; - struct pci_dev *pdev; rc = misc_register(_arb_device); if (rc < 0) @@ -1545,11 +1545,13 @@ static int __init vga_arb_device_init(void) /* We add all PCI devices satisfying VGA class in the arbiter by * default */ - pdev = NULL; - while ((pdev = - pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, - PCI_ANY_ID, pdev)) != NULL) + while (1) { + pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev); + if (!pdev) + break; + vga_arbiter_add_pci_device(pdev); + } pr_info("loaded\n"); return rc; -- Jingfeng
Re: [PATCH] drm/amdgpu: remove vm sanity check from amdgpu_vm_make_compute
On 2023-06-19 17:28, Xiaogang.Chen wrote: From: Xiaogang Chen Since we allow kfd and graphic operate on same GPU VM to have interoperation between them GPU VM may have been used by graphic vm operations before kfd turns a GPU VM into a compute VM. Remove vm clean checking at amdgpu_vm_make_compute. Signed-off-by: Xiaogang Chen Reviewed-by: Felix Kuehling As discussed, we can follow this up with a change that enables ATS for graphics VMs as well, so we don't need to enable ATS in amdgpu_vm_make_compute. This would improve interop for Raven. We only enable ATS for the lower half of the address space, so it should not affect graphics client that use the upper half. Thanks, Felix --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index eff73c428b12..291977b93b1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2245,16 +2245,16 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (r) return r; - /* Sanity checks */ - if (!amdgpu_vm_pt_is_root_clean(adev, vm)) { - r = -EINVAL; - goto unreserve_bo; - } - /* Check if PD needs to be reinitialized and do it before * changing any other state, in case it fails. */ if (pte_support_ats != vm->pte_support_ats) { + /* Sanity checks */ + if (!amdgpu_vm_pt_is_root_clean(adev, vm)) { + r = -EINVAL; + goto unreserve_bo; + } + vm->pte_support_ats = pte_support_ats; r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo), false);
[PATCH] drm/amdgpu: remove vm sanity check from amdgpu_vm_make_compute
From: Xiaogang Chen Since we allow kfd and graphic operate on same GPU VM to have interoperation between them GPU VM may have been used by graphic vm operations before kfd turns a GPU VM into a compute VM. Remove vm clean checking at amdgpu_vm_make_compute. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index eff73c428b12..291977b93b1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2245,16 +2245,16 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (r) return r; - /* Sanity checks */ - if (!amdgpu_vm_pt_is_root_clean(adev, vm)) { - r = -EINVAL; - goto unreserve_bo; - } - /* Check if PD needs to be reinitialized and do it before * changing any other state, in case it fails. */ if (pte_support_ats != vm->pte_support_ats) { + /* Sanity checks */ + if (!amdgpu_vm_pt_is_root_clean(adev, vm)) { + r = -EINVAL; + goto unreserve_bo; + } + vm->pte_support_ats = pte_support_ats; r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo), false); -- 2.25.1
Re: [PATCH] drm/amd: Disable PSR-SU on Parade 0803 TCON
On 6/19/23 16:13, Mario Limonciello wrote: A number of users have reported that there are random hangs occurring caused by PSR-SU specifically on panels that contain the parade 0803 TCON. Users have been able to work around the issue by disabling PSR entirely. To avoid these hangs, disable PSR-SU when this TCON is found. Cc: Sean Wang Cc: Marc Rossi Cc: Hamza Mahfooz Suggested-by: Tsung-hua (Ryan) Lin Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2443 Signed-off-by: Mario Limonciello Reviewed-by: Hamza Mahfooz --- drivers/gpu/drm/amd/display/modules/power/power_helpers.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 30349881a283..b9e78451a3d5 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -839,6 +839,8 @@ bool is_psr_su_specific_panel(struct dc_link *link) ((dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x08) || (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x07))) isPSRSUSupported = false; + else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x03) + isPSRSUSupported = false; else if (dpcd_caps->psr_info.force_psrsu_cap == 0x1) isPSRSUSupported = true; } -- Hamza
[PATCH] drm/amd: Disable PSR-SU on Parade 0803 TCON
A number of users have reported that there are random hangs occurring caused by PSR-SU specifically on panels that contain the parade 0803 TCON. Users have been able to work around the issue by disabling PSR entirely. To avoid these hangs, disable PSR-SU when this TCON is found. Cc: Sean Wang Cc: Marc Rossi Cc: Hamza Mahfooz Suggested-by: Tsung-hua (Ryan) Lin Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2443 Signed-off-by: Mario Limonciello --- drivers/gpu/drm/amd/display/modules/power/power_helpers.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 30349881a283..b9e78451a3d5 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -839,6 +839,8 @@ bool is_psr_su_specific_panel(struct dc_link *link) ((dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x08) || (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x07))) isPSRSUSupported = false; + else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x03) + isPSRSUSupported = false; else if (dpcd_caps->psr_info.force_psrsu_cap == 0x1) isPSRSUSupported = true; } -- 2.34.1
Re: [PATCH] drm/amdgpu: remove vm sanity check from amdgpu_vm_make_compute
On 2023-06-19 15:06, Xiaogang.Chen wrote: From: Xiaogang Chen Since we allow kfd and graphic operate on same GPU VM to have interoperation between them GPU VM may have been used by graphic vm operations before kfd turn a GFX VM into a compute VM. Remove vm clean checking at amdgpu_vm_make_compute. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index eff73c428b12..33f05297ab7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2246,7 +2246,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) return r; /* Sanity checks */ - if (!amdgpu_vm_pt_is_root_clean(adev, vm)) { + if (pte_support_ats && !amdgpu_vm_pt_is_root_clean(adev, vm)) { I think the correct condition here would be "pte_support_ats != vm->pte_support_ats", because that's what's used to reinitialize the page table just below. I think it would be even cleaner if you moved that check inside the "if (pte_support_ats != vm->pte_support_ats)" block below. Regards, Felix r = -EINVAL; goto unreserve_bo; }
[PATCH] drm/amdgpu: remove vm sanity check from amdgpu_vm_make_compute
From: Xiaogang Chen Since we allow kfd and graphic operate on same GPU VM to have interoperation between them GPU VM may have been used by graphic vm operations before kfd turn a GFX VM into a compute VM. Remove vm clean checking at amdgpu_vm_make_compute. Signed-off-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index eff73c428b12..33f05297ab7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2246,7 +2246,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) return r; /* Sanity checks */ - if (!amdgpu_vm_pt_is_root_clean(adev, vm)) { + if (pte_support_ats && !amdgpu_vm_pt_is_root_clean(adev, vm)) { r = -EINVAL; goto unreserve_bo; } -- 2.25.1
Re: [PATCH v6 2/8] PCI/VGA: Deal only with VGA class devices
On 6/12/2023 2:25 PM, Sui Jingfeng wrote: From: Sui Jingfeng Deal only with the VGA devcie(pdev->class == 0x0300), so replace the pci_get_subsys() function with pci_get_class(). Filter the non-PCI display device(pdev->class != 0x0300) out. There no need to process the non-display PCI device. Signed-off-by: Sui Jingfeng --- This also means that deleting a PCI device no longer needs to walk the list. Reviewed-by: Mario Limonciello drivers/pci/vgaarb.c | 22 -- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c index c1bc6c983932..22a505e877dc 100644 --- a/drivers/pci/vgaarb.c +++ b/drivers/pci/vgaarb.c @@ -754,10 +754,6 @@ static bool vga_arbiter_add_pci_device(struct pci_dev *pdev) struct pci_dev *bridge; u16 cmd; - /* Only deal with VGA class devices */ - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA) - return false; - /* Allocate structure */ vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL); if (vgadev == NULL) { @@ -1500,7 +1496,9 @@ static int pci_notify(struct notifier_block *nb, unsigned long action, struct pci_dev *pdev = to_pci_dev(dev); bool notify = false; - vgaarb_dbg(dev, "%s\n", __func__); + /* Only deal with VGA class devices */ + if (pdev->class != PCI_CLASS_DISPLAY_VGA << 8) + return 0; /* For now we're only intereted in devices added and removed. I didn't * test this thing here, so someone needs to double check for the @@ -1510,6 +1508,8 @@ static int pci_notify(struct notifier_block *nb, unsigned long action, else if (action == BUS_NOTIFY_DEL_DEVICE) notify = vga_arbiter_del_pci_device(pdev); + vgaarb_dbg(dev, "%s: action = %lu\n", __func__, action); + if (notify) vga_arbiter_notify_clients(); return 0; @@ -1534,8 +1534,8 @@ static struct miscdevice vga_arb_device = { static int __init vga_arb_device_init(void) { + struct pci_dev *pdev = NULL; int rc; - struct pci_dev *pdev; rc = misc_register(_arb_device); if (rc < 0) @@ -1545,11 +1545,13 @@ static int __init vga_arb_device_init(void) /* We add all PCI devices satisfying VGA class in the arbiter by * default */ - pdev = NULL; - while ((pdev = - pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, - PCI_ANY_ID, pdev)) != NULL) + while (1) { + pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev); + if (!pdev) + break; + vga_arbiter_add_pci_device(pdev); + } pr_info("loaded\n"); return rc;
[PATCHv4] drm/amdgpu: Update invalid PTE flag setting
Update the invalid PTE flag setting with TF enabled. This is to ensure, in addition to transitioning the retry fault to a no-retry fault, it also causes the wavefront to enter the trap handler. With the current setting, the fault only transitions to a no-retry fault. Additionally, have 2 sets of invalid PTE settings, one for TF enabled, the other for TF disabled. The setting with TF disabled, doesn't work with TF enabled. Signed-off-by: Mukul Joshi --- v1->v2: - Update handling according to Christian's feedback. v2->v3: - Remove ASIC specific callback (Felix). v3->v4: - Add noretry flag to amdgpu->gmc. This allows to set ASIC specific flags. drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 6 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 31 +++ drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 1 + drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c| 1 + drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 1 + 9 files changed, 45 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 56d73fade568..fdc25cd559b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -331,6 +331,8 @@ struct amdgpu_gmc { u64 VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[16]; u64 VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[16]; u64 MC_VM_MX_L1_TLB_CNTL; + + u64 noretry_flags; }; #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index eff73c428b12..8c7861a4d75d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2604,7 +2604,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, /* Intentionally setting invalid PTE flag * combination to force a no-retry-fault */ - flags = AMDGPU_PTE_SNOOPED | AMDGPU_PTE_PRT; + flags = AMDGPU_VM_NORETRY_FLAGS; value = 0; } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { /* Redirect the access to the dummy page */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 9c85d494f2a2..b81fcb962d8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -84,7 +84,13 @@ struct amdgpu_mem_stats; /* PDE Block Fragment Size for VEGA10 */ #define AMDGPU_PDE_BFS(a) ((uint64_t)a << 59) +/* Flag combination to set no-retry with TF disabled */ +#define AMDGPU_VM_NORETRY_FLAGS(AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE | \ + AMDGPU_PTE_TF) +/* Flag combination to set no-retry with TF enabled */ +#define AMDGPU_VM_NORETRY_FLAGS_TF (AMDGPU_PTE_VALID | AMDGPU_PTE_SYSTEM | \ + AMDGPU_PTE_PRT) /* For GFX9 */ #define AMDGPU_PTE_MTYPE_VG10(a) ((uint64_t)(a) << 57) #define AMDGPU_PTE_MTYPE_VG10_MASK AMDGPU_PTE_MTYPE_VG10(3ULL) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index dea1a64be44d..24ddf6a0512a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -778,6 +778,27 @@ int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, 1, 0, flags); } +/** + * amdgpu_vm_pte_update_noretry_flags - Update PTE no-retry flags + * + * @adev - amdgpu_device pointer + * @flags: pointer to PTE flags + * + * Update PTE no-retry flags when TF is enabled. + */ +static void amdgpu_vm_pte_update_noretry_flags(struct amdgpu_device *adev, + uint64_t *flags) +{ + /* +* Update no-retry flags with the corresponding TF +* no-retry combination. +*/ + if ((*flags & AMDGPU_VM_NORETRY_FLAGS) == AMDGPU_VM_NORETRY_FLAGS) { + *flags &= ~AMDGPU_VM_NORETRY_FLAGS; + *flags |= adev->gmc.noretry_flags; + } +} + /* * amdgpu_vm_pte_update_flags - figure out flags for PTE updates * @@ -804,6 +825,16 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params, flags |= AMDGPU_PTE_EXECUTABLE; } + /* +* Update no-retry flags to use the no-retry flag combination +* with TF enabled. The AMDGPU_VM_NORETRY_FLAGS flag combination +* does not work when TF is enabled. So, replace them with +* AMDGPU_VM_NORETRY_FLAGS_TF flag combination which works for +* all cases. +*/ + if (level ==
Re: [PATCH V3 4/7] drm/amd/pm: setup the framework to support Wifi RFI mitigation feature
On 6/16/2023 12:27 PM, Evan Quan wrote: With WBRF feature supported, as a driver responding to the frequencies, amdgpu driver is able to do shadow pstate switching to mitigate possible interference(between its (G-)DDR memory clocks and local radio module frequency bands used by Wifi 6/6e/7). To make WBRF feature functional, the kernel needs to be configured with CONFIG_ACPI_WBRF and the platform is equipped with necessary ACPI based mechanism to get amdgpu driver notified. Signed-off-by: Evan Quan --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 26 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 63 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 19 ++ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 184 ++ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 20 ++ drivers/gpu/drm/amd/pm/swsmu/smu_internal.h | 3 + 6 files changed, 315 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 02b827785e39..2f2ec64ed1b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -241,6 +242,7 @@ extern int amdgpu_num_kcq; #define AMDGPU_VCNFW_LOG_SIZE (32 * 1024) extern int amdgpu_vcnfw_log; extern int amdgpu_sg_display; +extern int amdgpu_wbrf; #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) @@ -741,6 +743,9 @@ struct amdgpu_reset_domain; */ #define AMDGPU_HAS_VRAM(_adev) ((_adev)->gmc.real_vram_size) +typedef +void (*wbrf_notify_handler) (struct amdgpu_device *adev); + struct amdgpu_device { struct device *dev; struct pci_dev *pdev; @@ -1050,6 +1055,8 @@ struct amdgpu_device { booljob_hang; booldc_enabled; + + wbrf_notify_handler wbrf_event_handler; }; static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) @@ -1381,6 +1388,25 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state) { return 0; } #endif +#if defined(CONFIG_ACPI_WBRF) +bool amdgpu_acpi_is_wbrf_supported(struct amdgpu_device *adev); +int amdgpu_acpi_wbrf_retrieve_exclusions(struct amdgpu_device *adev, +struct wbrf_ranges_out *exclusions_out); +int amdgpu_acpi_register_wbrf_notify_handler(struct amdgpu_device *adev, +wbrf_notify_handler handler); +int amdgpu_acpi_unregister_wbrf_notify_handler(struct amdgpu_device *adev); +#else +static inline bool amdgpu_acpi_is_wbrf_supported(struct amdgpu_device *adev) { return false; } +static inline +int amdgpu_acpi_wbrf_retrieve_exclusions(struct amdgpu_device *adev, +struct wbrf_ranges_out *exclusions_out) { return 0; } +static inline +int amdgpu_acpi_register_wbrf_notify_handler(struct amdgpu_device *adev, +wbrf_notify_handler handler) { return 0; } +static inline +int amdgpu_acpi_unregister_wbrf_notify_handler(struct amdgpu_device *adev) { return 0; } +#endif + #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev); bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index aeeec211861c..efbe6dd91d1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1105,3 +1105,66 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) } #endif /* CONFIG_SUSPEND */ + +#ifdef CONFIG_ACPI_WBRF +bool amdgpu_acpi_is_wbrf_supported(struct amdgpu_device *adev) +{ + struct acpi_device *acpi_dev = ACPI_COMPANION(adev->dev); + + if (!acpi_dev) + return false; + + return wbrf_supported_consumer(acpi_dev); +} + +int amdgpu_acpi_wbrf_retrieve_exclusions(struct amdgpu_device *adev, +struct wbrf_ranges_out *exclusions_out) +{ + struct acpi_device *acpi_dev = ACPI_COMPANION(adev->dev); + + if (!acpi_dev) + return -ENODEV; + + return wbrf_retrieve_exclusions(acpi_dev, exclusions_out); +} + +#define CPM_GPU_NOTIFY_COMMAND 0x55 +static void amdgpu_acpi_wbrf_event(acpi_handle handle, u32 event, void *data) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + + if (event == CPM_GPU_NOTIFY_COMMAND && + adev->wbrf_event_handler) + adev->wbrf_event_handler(adev); > +} + +int amdgpu_acpi_register_wbrf_notify_handler(struct amdgpu_device *adev, +wbrf_notify_handler handler)
Re: [PATCH] drm/amdgpu: Remove struct drm_driver.gem_prime_mmap
Hi Christian Am 19.06.23 um 16:13 schrieb Christian König: Am 19.06.23 um 16:11 schrieb Thomas Zimmermann: The callback struct drm_driver.gem_prime_mmap as been removed in commit 0adec22702d4 ("drm: Remove struct drm_driver.gem_prime_mmap"). Do not assign to it. The assigned function, drm_gem_prime_mmap(), is now the default for the operation, so there is no change in functionality. Signed-off-by: Thomas Zimmermann Fixes: 0adec22702d4 ("drm: Remove struct drm_driver.gem_prime_mmap") Cc: Thomas Zimmermann Cc: Alex Deucher Cc: "Christian König" Cc: "Pan, Xinhui" Cc: amd-gfx@lists.freedesktop.org Cc: dri-de...@lists.freedesktop.org Reviewed-by: Christian König Thanks for the quick response. I'll add the patch to drm-misc-next immediately, to make the tree's amdgpu build again. Best regards Thomas --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 43613569801b6..07e16ad465d06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2879,7 +2879,6 @@ const struct drm_driver amdgpu_partition_driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import = amdgpu_gem_prime_import, - .gem_prime_mmap = drm_gem_prime_mmap, .name = DRIVER_NAME, .desc = DRIVER_DESC, -- Thomas Zimmermann Graphics Driver Developer SUSE Software Solutions Germany GmbH Frankenstrasse 146, 90461 Nuernberg, Germany GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman HRB 36809 (AG Nuernberg) OpenPGP_signature Description: OpenPGP digital signature
Re: [PATCH] drm/amdgpu: Remove struct drm_driver.gem_prime_mmap
Am 19.06.23 um 16:11 schrieb Thomas Zimmermann: The callback struct drm_driver.gem_prime_mmap as been removed in commit 0adec22702d4 ("drm: Remove struct drm_driver.gem_prime_mmap"). Do not assign to it. The assigned function, drm_gem_prime_mmap(), is now the default for the operation, so there is no change in functionality. Signed-off-by: Thomas Zimmermann Fixes: 0adec22702d4 ("drm: Remove struct drm_driver.gem_prime_mmap") Cc: Thomas Zimmermann Cc: Alex Deucher Cc: "Christian König" Cc: "Pan, Xinhui" Cc: amd-gfx@lists.freedesktop.org Cc: dri-de...@lists.freedesktop.org Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 43613569801b6..07e16ad465d06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2879,7 +2879,6 @@ const struct drm_driver amdgpu_partition_driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import = amdgpu_gem_prime_import, - .gem_prime_mmap = drm_gem_prime_mmap, .name = DRIVER_NAME, .desc = DRIVER_DESC,
[PATCH] drm/amdgpu: Remove struct drm_driver.gem_prime_mmap
The callback struct drm_driver.gem_prime_mmap as been removed in commit 0adec22702d4 ("drm: Remove struct drm_driver.gem_prime_mmap"). Do not assign to it. The assigned function, drm_gem_prime_mmap(), is now the default for the operation, so there is no change in functionality. Signed-off-by: Thomas Zimmermann Fixes: 0adec22702d4 ("drm: Remove struct drm_driver.gem_prime_mmap") Cc: Thomas Zimmermann Cc: Alex Deucher Cc: "Christian König" Cc: "Pan, Xinhui" Cc: amd-gfx@lists.freedesktop.org Cc: dri-de...@lists.freedesktop.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 43613569801b6..07e16ad465d06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2879,7 +2879,6 @@ const struct drm_driver amdgpu_partition_driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import = amdgpu_gem_prime_import, - .gem_prime_mmap = drm_gem_prime_mmap, .name = DRIVER_NAME, .desc = DRIVER_DESC, -- 2.41.0
Re: Warning appeared after c8b5a95 ("drm/amdgpu: Fix desktop freezed after gpu-reset")
On Mon, Jun 19, 2023 at 9:05 AM Christian Kastner wrote: > > Hi, > > On a Debian 12 ("bookworm") system, I observed a new warning when I > upgraded from kernel 6.1.25 to 6.1.27. This is on a system with an RX > 6800 XT GPU and 3500X processor. > > I've traced it down to commit c8b5a95 ("drm/amdgpu: Fix desktop freezed > after gpu-reset"). Rebuilding the 6.1.27 kernel without this change > makes the warning disappear. > > I can reliably trigger this (and another) warning with > > $ sudo cat /sys/kernel/debug/dri/0/amdgpu_test_ib > run ib test: > ib ring tests passed. > > 5 or 6 seconds after this, two warnings are printed. I see these same > two warnings on system shutdown (or, at least, they looked similar > enough to the above that I didn't check for identity). > > I've attached > (1) the dmesg output after modprobe'ing amdgpu > (2) the dmesg output after triggering amdgpu_test_ib > > The system in question is only used for ROCm development. I haven't > observed any other side effects there, other than the warning. There's > no monitor attached. So I can't speak to the effect of a desktop freeze. The warnings are harmless, but they have been fixed[1] and the fixes are making their way back to stable kernels. [1] - https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=08c677cb0b436a96a836792bb35a8ec5de4999c2 Alex
[PATCH Review V2 1/1] drm/amdgpu: Remove redundant poison consumption handler function
The function callback handle_poison_consumption and callback function poison_consumption_handler are almost same to handle poison consumption, remove poison_consumption_handler. Changed from V1: Add handle poison consumption function for VCN2.6, VCN4.0, JPEG2.6 and JPEG4.0, return false when handle VCN/JPEGP poison consumption. Signed-off-by: Stanley.Yang --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 9 - drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 8 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 ++- drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c | 12 +--- drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 9 + drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c | 9 + drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c| 9 + drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c| 9 + 9 files changed, 50 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index a33d4bc34cee..c15dbdb2e0f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -840,15 +840,6 @@ int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev) return 0; } -int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev, - struct amdgpu_iv_entry *entry) -{ - if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler) - return adev->gfx.ras->poison_consumption_handler(adev, entry); - - return 0; -} - int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, void *err_data, struct amdgpu_iv_entry *entry) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index d0c3f2955821..95b80bc8cdb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -273,8 +273,6 @@ struct amdgpu_gfx_ras { int (*rlc_gc_fed_irq)(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); - int (*poison_consumption_handler)(struct amdgpu_device *adev, - struct amdgpu_iv_entry *entry); }; struct amdgpu_gfx_shadow_info { @@ -538,8 +536,6 @@ int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev); void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id); int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev); -int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev, - struct amdgpu_iv_entry *entry); bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id); int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 5b6525d8dace..9ce7c7537751 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1668,7 +1668,7 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev) static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *obj, struct amdgpu_iv_entry *entry) { - bool poison_stat = false; + bool poison_stat = true; struct amdgpu_device *adev = obj->adev; struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, obj->head.block, 0); @@ -1694,15 +1694,13 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * amdgpu_umc_poison_handler(adev, false); if (block_obj->hw_ops && block_obj->hw_ops->handle_poison_consumption) - poison_stat = block_obj->hw_ops->handle_poison_consumption(adev); + poison_stat = block_obj->hw_ops->handle_poison_consumption(adev, entry); /* gpu reset is fallback for failed and default cases */ - if (poison_stat) { + if (poison_stat != true) { dev_info(adev->dev, "GPU reset for %s RAS poison consumption is issued!\n", block_obj->ras_comm.name); amdgpu_ras_reset_gpu(adev); - } else { - amdgpu_gfx_poison_consumption_handler(adev, entry); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 46bf1889a9d7..03f3b3774b85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -564,7 +564,8 @@ struct amdgpu_ras_block_hw_ops { void (*reset_ras_error_count)(struct amdgpu_device *adev); void (*reset_ras_error_status)(struct amdgpu_device *adev); bool (*query_poison_status)(struct amdgpu_device *adev); - bool (*handle_poison_consumption)(struct amdgpu_device *adev); +
Requests For Proposals for hosting XDC 2024 are now open
Hello everyone! The X.org board is soliciting proposals to host XDC in 2024. Since XDC 2023 is being held in Europe this year, we've decided to host in North America. However, the board is open to other locations, especially if there's an interesting co-location with another conference. If you're considering hosting XDC, we've assembled a wiki page with what's generally expected and needed: https://www.x.org/wiki/Events/RFP/ When submitting your proposal, please make sure to include at least the key information about the potential location in question, possible dates along with estimated costs. Proposals can be submitted to board at foundation.x.org until the deadline of *September 17th, 2023*. Additionally, an quirk early heads-up to the board if you're considering hosting would be appreciated, in case we need to adjust the schedule a bit. Also, earlier is better since there generally will be a bit of Q with organizers. And if you just have some questions about what organizing XDC entails, please feel free to chat with previous organizers, or someone from the board. Thanks, Ricardo Garcia, on behalf of X.Org
RE: [PATCH] drm/amdgpu/vcn: Need to pause dpg before stop dpg
[AMD Official Use Only - General] Hi Emily, Do you want to pause or un-pause dpg mode based on and change and commit message? With bare metal, before calling the stop, the state of dpg should be un-paused within the call the of amdgpu_vcn_idle_work_handler, is it not the case for SRIOV? Regards, Leo -Original Message- From: amd-gfx On Behalf Of Emily Deng Sent: Monday, June 19, 2023 6:24 AM To: amd-gfx@lists.freedesktop.org Cc: Deng, Emily Subject: [PATCH] drm/amdgpu/vcn: Need to pause dpg before stop dpg Need to pause dpg first, or it will hit follow error during stop dpg: "[drm] Register(1) [regUVD_POWER_STATUS] failed to reach value 0x0001 != 0xn" Signed-off-by: Emily Deng --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index b48bb5212488..259795098173 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1424,8 +1424,10 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) */ static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) { + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; uint32_t tmp; + vcn_v4_0_pause_dpg_mode(adev, inst_idx, ); /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); -- 2.36.1
Warning appeared after c8b5a95 ("drm/amdgpu: Fix desktop freezed after gpu-reset")
Hi, On a Debian 12 ("bookworm") system, I observed a new warning when I upgraded from kernel 6.1.25 to 6.1.27. This is on a system with an RX 6800 XT GPU and 3500X processor. I've traced it down to commit c8b5a95 ("drm/amdgpu: Fix desktop freezed after gpu-reset"). Rebuilding the 6.1.27 kernel without this change makes the warning disappear. I can reliably trigger this (and another) warning with $ sudo cat /sys/kernel/debug/dri/0/amdgpu_test_ib run ib test: ib ring tests passed. 5 or 6 seconds after this, two warnings are printed. I see these same two warnings on system shutdown (or, at least, they looked similar enough to the above that I didn't check for identity). I've attached (1) the dmesg output after modprobe'ing amdgpu (2) the dmesg output after triggering amdgpu_test_ib The system in question is only used for ROCm development. I haven't observed any other side effects there, other than the warning. There's no monitor attached. So I can't speak to the effect of a desktop freeze. Best, Christian[ 266.669251] [drm] PCIE GART of 512M enabled (table at 0x0083FEB0). [ 266.669268] [drm] PSP is resuming... [ 266.739148] [drm] reserve 0xa0 from 0x83fd00 for PSP TMR [ 266.876401] amdgpu :09:00.0: amdgpu: SECUREDISPLAY: securedisplay ta ucode is not available [ 266.876404] amdgpu :09:00.0: amdgpu: SMU is resuming... [ 266.876407] amdgpu :09:00.0: amdgpu: smu driver if version = 0x0040, smu fw if version = 0x0041, smu fw program = 0, version = 0x003a5600 (58.86.0) [ 266.876410] amdgpu :09:00.0: amdgpu: SMU driver if version not matched [ 266.876428] amdgpu :09:00.0: amdgpu: dpm has been enabled [ 266.879972] amdgpu :09:00.0: amdgpu: SMU is resumed successfully! [ 266.881457] [drm] DMUB hardware initialized: version=0x02020017 [ 266.904086] [drm] kiq ring mec 2 pipe 1 q 0 [ 266.910932] [drm] VCN decode and encode initialized successfully(under DPG Mode). [ 266.911082] [drm] JPEG decode initialized successfully. [ 266.911104] amdgpu :09:00.0: amdgpu: ring gfx_0.0.0 uses VM inv eng 0 on hub 0 [ 266.911106] amdgpu :09:00.0: amdgpu: ring comp_1.0.0 uses VM inv eng 1 on hub 0 [ 266.911107] amdgpu :09:00.0: amdgpu: ring comp_1.1.0 uses VM inv eng 4 on hub 0 [ 266.911108] amdgpu :09:00.0: amdgpu: ring comp_1.2.0 uses VM inv eng 5 on hub 0 [ 266.911109] amdgpu :09:00.0: amdgpu: ring comp_1.3.0 uses VM inv eng 6 on hub 0 [ 266.90] amdgpu :09:00.0: amdgpu: ring comp_1.0.1 uses VM inv eng 7 on hub 0 [ 266.90] amdgpu :09:00.0: amdgpu: ring comp_1.1.1 uses VM inv eng 8 on hub 0 [ 266.91] amdgpu :09:00.0: amdgpu: ring comp_1.2.1 uses VM inv eng 9 on hub 0 [ 266.92] amdgpu :09:00.0: amdgpu: ring comp_1.3.1 uses VM inv eng 10 on hub 0 [ 266.93] amdgpu :09:00.0: amdgpu: ring kiq_2.1.0 uses VM inv eng 11 on hub 0 [ 266.94] amdgpu :09:00.0: amdgpu: ring sdma0 uses VM inv eng 12 on hub 0 [ 266.95] amdgpu :09:00.0: amdgpu: ring sdma1 uses VM inv eng 13 on hub 0 [ 266.96] amdgpu :09:00.0: amdgpu: ring sdma2 uses VM inv eng 14 on hub 0 [ 266.97] amdgpu :09:00.0: amdgpu: ring sdma3 uses VM inv eng 15 on hub 0 [ 266.97] amdgpu :09:00.0: amdgpu: ring vcn_dec_0 uses VM inv eng 0 on hub 1 [ 266.98] amdgpu :09:00.0: amdgpu: ring vcn_enc_0.0 uses VM inv eng 1 on hub 1 [ 266.99] amdgpu :09:00.0: amdgpu: ring vcn_enc_0.1 uses VM inv eng 4 on hub 1 [ 266.911120] amdgpu :09:00.0: amdgpu: ring vcn_dec_1 uses VM inv eng 5 on hub 1 [ 266.911121] amdgpu :09:00.0: amdgpu: ring vcn_enc_1.0 uses VM inv eng 6 on hub 1 [ 266.911122] amdgpu :09:00.0: amdgpu: ring vcn_enc_1.1 uses VM inv eng 7 on hub 1 [ 266.911123] amdgpu :09:00.0: amdgpu: ring jpeg_dec uses VM inv eng 8 on hub 1 [ 266.916173] amdgpu :09:00.0: [drm] Cannot find any crtc or sizes [ 266.916177] amdgpu :09:00.0: [drm] Cannot find any crtc or sizes [ 272.409887] [ cut here ] [ 272.409891] WARNING: CPU: 1 PID: 259 at drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c:656 amdgpu_irq_put+0x45/0x70 [amdgpu] [ 272.410166] Modules linked in: amdgpu gpu_sched drm_buddy drm_display_helper cec rc_core drm_ttm_helper ttm drm_kms_helper i2c_algo_bit ipt_REJECT xt_multiport nft_compat ctr ccm wireguard libchacha20poly1305 chacha_x86_64 poly1305_x86_64 curve25519_x86_64 libcurve25519_generic libchacha ip6_udp_tunnel udp_tunnel nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables libcrc32c nfnetlink overlay binfmt_misc nls_ascii nls_cp437 vfat fat intel_rapl_msr intel_rapl_common amd64_edac edac_mce_amd kvm_amd iwlmvm kvm mac80211 snd_hda_codec_realtek irqbypass snd_hda_codec_generic ghash_clmulni_intel snd_hda_codec_hdmi sha512_ssse3 sha512_generic libarc4 snd_hda_intel
[PATCH] gpu: drm/amd: Fix traditional comparison using max method
It would be better to replace the traditional ternary conditional operator with max() Signed-off-by: Li Dong --- drivers/gpu/drm/amd/display/modules/freesync/freesync.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index dbd60811f95d..a5eabde53fa4 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -1005,8 +1005,7 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, (stream->timing.h_total * stream->ctx->dc->caps.max_v_total)); } /* Limit minimum refresh rate to what can be supported by hardware */ - min_refresh_in_uhz = min_hardware_refresh_in_uhz > in_config->min_refresh_in_uhz ? - min_hardware_refresh_in_uhz : in_config->min_refresh_in_uhz; + min_refresh_in_uhz = max(min_hardware_refresh_in_uhz, in_config->min_refresh_in_uhz); max_refresh_in_uhz = in_config->max_refresh_in_uhz; /* Full range may be larger than current video timing, so cap at nominal */ -- 2.31.1.windows.1
Re: [PATCH V3 2/7] wifi: mac80211: Add support for ACPI WBRF
On Sun, 2023-06-18 at 21:17 -0500, Mario Limonciello wrote: > > > --- a/include/net/cfg80211.h > > +++ b/include/net/cfg80211.h > > @@ -920,6 +920,14 @@ const struct cfg80211_chan_def * > > cfg80211_chandef_compatible(const struct cfg80211_chan_def *chandef1, > > const struct cfg80211_chan_def *chandef2); > > > > +/** > > + * nl80211_chan_width_to_mhz - get the channel width in Mhz > > + * @chan_width: the channel width from nl80211_chan_width > > + * Return: channel width in Mhz if the chan_width from > > nl80211_chan_width > > + * is valid. -1 otherwise. > > + */ > > +int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width); > > + > > It's up to mac80211 maintainers, but I would think that the changes to > change nl80211_chan_width_to_mhz from static to exported should be > separate from the patch to introduced WBRF support in the series. Yeah, that'd be nice :) > > +#define KHZ_TO_HZ(freq)((freq) * 1000ULL) Together with MHZ_TO_KHZ() for example :) johannes
[PATCH] drm/amd/pm: Provide energy data in 15.625mJ units
Publish energy data in 15.625mJ unit for SMU v13.0.6. The same unit is used in Aldebaran also. Signed-off-by: Lijo Lazar --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index a92ea4601ea4..6ef12252beb5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -200,7 +200,6 @@ struct PPTable_t { }; #define SMUQ10_TO_UINT(x) ((x) >> 10) -#define SMUQ16_TO_UINT(x) ((x) >> 16) struct smu_v13_0_6_dpm_map { enum smu_clk_type clk_type; @@ -1994,8 +1993,9 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table gpu_metrics->average_socket_power = SMUQ10_TO_UINT(metrics->SocketPower); + /* Energy is reported in 15.625mJ units */ gpu_metrics->energy_accumulator = - SMUQ16_TO_UINT(metrics->SocketEnergyAcc); + SMUQ10_TO_UINT(metrics->SocketEnergyAcc); gpu_metrics->current_gfxclk = SMUQ10_TO_UINT(metrics->GfxclkFrequency[xcc0]); -- 2.25.1
Re: [PATCH 01/13] drm: execution context for GEM buffers v4
On Mon, 19 Jun 2023 12:44:06 +0200 Christian König wrote: > Am 19.06.23 um 12:12 schrieb Boris Brezillon: > > [SNIP] > > Note that the drm_exec_until_all_locked() helper I introduced is taking > > an expression, so in theory, you don't have to define a separate > > function. > > > > drm_exec_until_all_locked(, { > > /* inlined-code */ > > int ret; > > > > ret = blabla() > > if (ret) > > goto error; > > > > ... > > > > error: > > /* return value. */ > > ret; > > }); > > > > This being said, as soon as you have several failure paths, > > it makes things a lot easier/controllable if you make it a function, > > and I honestly don't think the readability would suffer from having a > > function defined just above the user. My main concern with the original > > approach was the risk of calling continue/break_if_contended() in the > > wrong place, and also the fact you can't really externalize things to > > a function if you're looking for a cleaner split. At least with > > drm_exec_until_all_locked() you can do both. > > Yeah, but that means that you can't use return inside your code block > and instead has to define an error label for handling "normal" > contention which is what I'm trying to avoid here. Sorry, didn't pay attention to this particular concern. Indeed, if you want to return inside the expression, that's a problem. > > How about: > > #define drm_exec_until_all_locked(exec) \ > __drm_exec_retry: if (drm_exec_cleanup(exec)) > > > #define drm_exec_retry_on_contention(exec) \ > if (unlikely(drm_exec_is_contended(exec))) \ > goto __drm_exec_retry > > > And then use it like: > > drm_exec_until_all_locked(exec) > { > ret = drm_exec_prepare_obj(exec, obj); > drm_exec_retry_on_contention(exec); > } > > The only problem I can see with this is that the __drm_exec_retry label > would be function local. Yeah, I'm not sure it's safe to use non-local labels for that, because, as soon as you have more than one drm_exec_until_all_locked() call in a given function it won't work, which is why I placed things in a block with local labels, which in turn means you can't return directly, unfortunately.
[PATCH] drm/amd/display: Remove else after return statement in 'dm_update_plane_state'
Else is not necessary after return statements, hence remove it. Reported by checkpatch: WARNING: else is not generally useful after a break or return drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c:9776: return -EINVAL; else Cc: Bhawanpreet Lakha Cc: Qingqing Zhuo Cc: Nicholas Kazlauskas Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Signed-off-by: Srinivasan Shanmugam --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 2446529c329a..e657214e0104 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9777,8 +9777,8 @@ static int dm_update_plane_state(struct dc *dc, if (plane->type == DRM_PLANE_TYPE_OVERLAY) { if (is_video_format(new_plane_state->fb->format->format) && *is_top_most_overlay) return -EINVAL; - else - *is_top_most_overlay = false; + + *is_top_most_overlay = false; } DRM_DEBUG_ATOMIC("Enabling DRM plane: %d on DRM crtc %d\n", -- 2.25.1
Re: [PATCH 01/13] drm: execution context for GEM buffers v4
On Mon, 19 Jun 2023 13:05:02 +0200 Boris Brezillon wrote: > On Mon, 19 Jun 2023 12:44:06 +0200 > Christian König wrote: > > > Am 19.06.23 um 12:12 schrieb Boris Brezillon: > > > [SNIP] > > > Note that the drm_exec_until_all_locked() helper I introduced is taking > > > an expression, so in theory, you don't have to define a separate > > > function. > > > > > > drm_exec_until_all_locked(, { > > > /* inlined-code */ > > > int ret; > > > > > > ret = blabla() > > > if (ret) > > > goto error; > > > > > > ... > > > > > > error: > > > /* return value. */ > > > ret; > > > }); > > > > > > This being said, as soon as you have several failure paths, > > > it makes things a lot easier/controllable if you make it a function, > > > and I honestly don't think the readability would suffer from having a > > > function defined just above the user. My main concern with the original > > > approach was the risk of calling continue/break_if_contended() in the > > > wrong place, and also the fact you can't really externalize things to > > > a function if you're looking for a cleaner split. At least with > > > drm_exec_until_all_locked() you can do both. > > > > Yeah, but that means that you can't use return inside your code block > > and instead has to define an error label for handling "normal" > > contention which is what I'm trying to avoid here. > > > > How about: > > > > #define drm_exec_until_all_locked(exec) \ > > __drm_exec_retry: if (drm_exec_cleanup(exec)) > > > > > > #define drm_exec_retry_on_contention(exec) \ > > if (unlikely(drm_exec_is_contended(exec))) \ > > goto __drm_exec_retry > > > > > > And then use it like: > > > > drm_exec_until_all_locked(exec) > > { > > ret = drm_exec_prepare_obj(exec, obj); > > drm_exec_retry_on_contention(exec); > > } > > That would work, and I was about to suggest extending my proposal with > a drm_exec_retry_on_contention() to support both use cases. The only > downside is the fact you might be able to break out of a loop that has > local variables, which will leak stack space. Nevermind, brain fart on my end. It shouldn't leak any stack space, so yeah, I think that's a good compromise.
Re: [PATCH 01/13] drm: execution context for GEM buffers v4
On 6/19/23 11:48, Christian König wrote: Hi, Am 19.06.23 um 11:33 schrieb Thomas Hellström (Intel): [SNIP] Sometimes you want to just drop the contended lock after the above relaxation. (Eviction would be one example), and not add as prelocked, if the contended object goes out of scope. Eviction would in some situations be one such example, -EDEADLOCK leading to an error path where the object should otherwise be freed is another. Perhaps we could add an argument to prepare_obj() as to whether the object should be immediately put after relaxation. I was considering a try_prepare version as well, that should cover this use case. That sounds a bit different from this use-case. The use-case above would, on -EDEADLOCK actually unlock everything, then lock-slow the contending lock and then immediately unlock it and drop. Hui? What would that be good for? It's for the case where you have nested locking, the contended lock has gone out-of-scope and you're probably not going to need it on the next attempt. I think the refcounted "prelocked" object that is lacking in the i915 variant will resolve all correctness / uaf issues, but still the object might be needlessly carried around for yet another locking round. It sounds like try_prepare would just skip locking and continue with everything locked so far still locked? Correct. + ret = drm_exec_obj_locked(exec, obj); + if (unlikely(ret)) { + dma_resv_unlock(obj->resv); + goto error_dropref; + } + + swap(exec->prelocked, obj); + +error_dropref: + /* Always cleanup the contention so that error handling can kick in */ + drm_gem_object_put(obj); + exec->contended = NULL; + return ret; +} + +/** + * drm_exec_prepare_obj - prepare a GEM object for use + * @exec: the drm_exec object with the state + * @obj: the GEM object to prepare + * @num_fences: how many fences to reserve + * + * Prepare a GEM object for use by locking it and reserving fence slots. All + * successfully locked objects are put into the locked container. + * + * Returns: -EDEADLK if a contention is detected, -EALREADY when object is + * already locked, -ENOMEM when memory allocation failed and zero for success. + */ +int drm_exec_prepare_obj(struct drm_exec *exec, struct drm_gem_object *obj, + unsigned int num_fences) +{ + int ret; + + ret = drm_exec_lock_contended(exec); + if (unlikely(ret)) + return ret; + + if (exec->prelocked == obj) { + drm_gem_object_put(exec->prelocked); + exec->prelocked = NULL; + + return dma_resv_reserve_fences(obj->resv, num_fences); + } + + if (exec->flags & DRM_EXEC_FLAG_INTERRUPTIBLE) + ret = dma_resv_lock_interruptible(obj->resv, >ticket); + else + ret = dma_resv_lock(obj->resv, >ticket); + + if (unlikely(ret == -EDEADLK)) { + drm_gem_object_get(obj); + exec->contended = obj; + return -EDEADLK; + } + + if (unlikely(ret == -EALREADY && + (exec->flags & DRM_EXEC_FLAG_ALLOW_DUPLICATES))) + goto reserve_fences; + + if (unlikely(ret)) + return ret; + + ret = drm_exec_obj_locked(exec, obj); + if (ret) + goto error_unlock; + +reserve_fences: + /* Keep locked when reserving fences fails */ + return dma_resv_reserve_fences(obj->resv, num_fences); Ugh, what is the use-case for keeping things locked here? How would a caller tell the difference between an error where everything is locked and nothing is locked? IMO, we should unlock on error here. If there indeed is a use-case we should add a separate function for reserving fences for all locked objects, rather than returning sometimes locked on error sometime not. We return the object locked here because it was to much churn to remove it again from the array and we are getting fully cleaned up at the end anyway. OK, so if we add an unlock functionality, we could just have a consistent locking state on error return? Yeah, that should work. Going to work on this. Great. Thanks, Thomas Regards, Christian. Thanks, Thomas Regards, Christian. Thanks, Thomas + +error_unlock: + dma_resv_unlock(obj->resv); + return ret; +} +EXPORT_SYMBOL(drm_exec_prepare_obj); + +/** + * drm_exec_prepare_array - helper to prepare an array of objects + * @exec: the drm_exec object with the state + * @objects: array of GEM object to prepare + * @num_objects: number of GEM objects in the array + * @num_fences: number of fences to reserve on each GEM object + * + * Prepares all GEM objects in an array, handles contention but aports on first + * error otherwise. Reserves @num_fences on each GEM object after locking it. + * + * Returns: -EALREADY when object is already locked, -ENOMEM when memory + * allocation failed and zero for success. + */ +int drm_exec_prepare_array(struct drm_exec *exec, + struct drm_gem_object **objects, + unsigned int num_objects, +
Re: [PATCH 01/13] drm: execution context for GEM buffers v4
On Mon, 19 Jun 2023 12:44:06 +0200 Christian König wrote: > Am 19.06.23 um 12:12 schrieb Boris Brezillon: > > [SNIP] > > Note that the drm_exec_until_all_locked() helper I introduced is taking > > an expression, so in theory, you don't have to define a separate > > function. > > > > drm_exec_until_all_locked(, { > > /* inlined-code */ > > int ret; > > > > ret = blabla() > > if (ret) > > goto error; > > > > ... > > > > error: > > /* return value. */ > > ret; > > }); > > > > This being said, as soon as you have several failure paths, > > it makes things a lot easier/controllable if you make it a function, > > and I honestly don't think the readability would suffer from having a > > function defined just above the user. My main concern with the original > > approach was the risk of calling continue/break_if_contended() in the > > wrong place, and also the fact you can't really externalize things to > > a function if you're looking for a cleaner split. At least with > > drm_exec_until_all_locked() you can do both. > > Yeah, but that means that you can't use return inside your code block > and instead has to define an error label for handling "normal" > contention which is what I'm trying to avoid here. > > How about: > > #define drm_exec_until_all_locked(exec) \ > __drm_exec_retry: if (drm_exec_cleanup(exec)) > > > #define drm_exec_retry_on_contention(exec) \ > if (unlikely(drm_exec_is_contended(exec))) \ > goto __drm_exec_retry > > > And then use it like: > > drm_exec_until_all_locked(exec) > { > ret = drm_exec_prepare_obj(exec, obj); > drm_exec_retry_on_contention(exec); > } That would work, and I was about to suggest extending my proposal with a drm_exec_retry_on_contention() to support both use cases. The only downside is the fact you might be able to break out of a loop that has local variables, which will leak stack space. > > The only problem I can see with this is that the __drm_exec_retry label > would be function local. You can use local labels [1] to make it local to a block (see my version, just need to rename the retry label into __drm_exec_retry). I checked, and this is used elsewhere in the kernel (like in linux/wait.h, which is a core feature), so it should be safe to use. [1]https://gcc.gnu.org/onlinedocs/gcc/Local-Labels.html
Re: [PATCH 01/13] drm: execution context for GEM buffers v4
Am 19.06.23 um 12:12 schrieb Boris Brezillon: [SNIP] Note that the drm_exec_until_all_locked() helper I introduced is taking an expression, so in theory, you don't have to define a separate function. drm_exec_until_all_locked(, { /* inlined-code */ int ret; ret = blabla() if (ret) goto error; ... error: /* return value. */ ret; }); This being said, as soon as you have several failure paths, it makes things a lot easier/controllable if you make it a function, and I honestly don't think the readability would suffer from having a function defined just above the user. My main concern with the original approach was the risk of calling continue/break_if_contended() in the wrong place, and also the fact you can't really externalize things to a function if you're looking for a cleaner split. At least with drm_exec_until_all_locked() you can do both. Yeah, but that means that you can't use return inside your code block and instead has to define an error label for handling "normal" contention which is what I'm trying to avoid here. How about: #define drm_exec_until_all_locked(exec) \ __drm_exec_retry: if (drm_exec_cleanup(exec)) #define drm_exec_retry_on_contention(exec) \ if (unlikely(drm_exec_is_contended(exec))) \ goto __drm_exec_retry And then use it like: drm_exec_until_all_locked(exec) { ret = drm_exec_prepare_obj(exec, obj); drm_exec_retry_on_contention(exec); } The only problem I can see with this is that the __drm_exec_retry label would be function local. Regards, Christian. Regards, Boris
RE: [PATCH Review 1/1] drm/amdgpu: Remove redundant poison consumption handler function
[AMD Official Use Only - General] Please ignore this patch, I will send V2. Regards, Stanley > -Original Message- > From: Stanley.Yang > Sent: Monday, June 19, 2023 4:24 PM > To: amd-gfx@lists.freedesktop.org; Zhang, Hawking > ; Zhou1, Tao ; Chai, > Thomas > Cc: Yang, Stanley > Subject: [PATCH Review 1/1] drm/amdgpu: Remove redundant poison > consumption handler function > > The function callback handle_poison_consumption and callback function > poison_consumption_handler are almost same to handle poison > consumption, remove poison_consumption_handler. > > Signed-off-by: Stanley.Yang > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 9 - > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 ++- > drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c | 12 +--- > 5 files changed, 13 insertions(+), 21 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > index a33d4bc34cee..c15dbdb2e0f9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > @@ -840,15 +840,6 @@ int amdgpu_gfx_ras_sw_init(struct amdgpu_device > *adev) > return 0; > } > > -int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev, > - struct amdgpu_iv_entry > *entry) > -{ > - if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler) > - return adev->gfx.ras->poison_consumption_handler(adev, > entry); > - > - return 0; > -} > - > int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, > void *err_data, > struct amdgpu_iv_entry *entry) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > index d0c3f2955821..95b80bc8cdb9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > @@ -273,8 +273,6 @@ struct amdgpu_gfx_ras { > int (*rlc_gc_fed_irq)(struct amdgpu_device *adev, > struct amdgpu_irq_src *source, > struct amdgpu_iv_entry *entry); > - int (*poison_consumption_handler)(struct amdgpu_device *adev, > - struct amdgpu_iv_entry > *entry); > }; > > struct amdgpu_gfx_shadow_info { > @@ -538,8 +536,6 @@ int amdgpu_gfx_get_num_kcq(struct amdgpu_device > *adev); void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, > uint32_t ucode_id); > > int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev); -int > amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev, > - struct amdgpu_iv_entry > *entry); > > bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id); int > amdgpu_gfx_sysfs_init(struct amdgpu_device *adev); diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > index 5b6525d8dace..7be289473034 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > @@ -1694,15 +1694,13 @@ static void > amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * > amdgpu_umc_poison_handler(adev, false); > > if (block_obj->hw_ops && block_obj->hw_ops- > >handle_poison_consumption) > - poison_stat = block_obj->hw_ops- > >handle_poison_consumption(adev); > + poison_stat = block_obj->hw_ops- > >handle_poison_consumption(adev, > +entry); > > /* gpu reset is fallback for failed and default cases */ > - if (poison_stat) { > + if (poison_stat != true) { > dev_info(adev->dev, "GPU reset for %s RAS poison > consumption is issued!\n", > block_obj->ras_comm.name); > amdgpu_ras_reset_gpu(adev); > - } else { > - amdgpu_gfx_poison_consumption_handler(adev, entry); > } > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > index 46bf1889a9d7..03f3b3774b85 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > @@ -564,7 +564,8 @@ struct amdgpu_ras_block_hw_ops { > void (*reset_ras_error_count)(struct amdgpu_device *adev); > void (*reset_ras_error_status)(struct amdgpu_device *adev); > bool (*query_poison_status)(struct amdgpu_device *adev); > - bool (*handle_poison_consumption)(struct amdgpu_device *adev); > + bool (*handle_poison_consumption)(struct amdgpu_device *adev, > + struct amdgpu_iv_entry *entry); > }; > > /* work flow > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c > index 26d6286d86c9..5b7eac547a05 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c >
[PATCH] drm/amdgpu/vcn: Need to pause dpg before stop dpg
Need to pause dpg first, or it will hit follow error during stop dpg: "[drm] Register(1) [regUVD_POWER_STATUS] failed to reach value 0x0001 != 0xn" Signed-off-by: Emily Deng --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index b48bb5212488..259795098173 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1424,8 +1424,10 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) */ static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) { + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; uint32_t tmp; + vcn_v4_0_pause_dpg_mode(adev, inst_idx, ); /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); -- 2.36.1
Re: [PATCH 01/13] drm: execution context for GEM buffers v4
On Mon, 19 Jun 2023 11:20:06 +0200 Christian König wrote: > Hi guys, > > Am 19.06.23 um 10:59 schrieb Thomas Hellström (Intel): > > [SNIP] > > I really need to find some time to work on that anyway. > >> I've been playing with drm_exec for a couple weeks now, and I wanted > >> to share something I hacked to try and make the API simpler and > >> more robust against misuse (see the below diff, which is a slightly > >> adjusted version of your work). > > > > It would be good if we could have someone taking charge of this series > > and address all review comments, I see some of my comments getting > > lost, we have multiple submitters and I can't find a dri-devel > > patchwork entry for this. Anyway some comments below. > > I can try to find some time for the series this week (As long as nobody > comes along and has any burning roof). That's great news! > > > > >> > >> In this version, the user is no longer in control of the retry > >> loop. Instead, it provides an expression (a call to a > >> sub-function) to be re-evaluated each time a contention is > >> detected. IMHO, this makes the 'prepare-objs' functions easier to > >> apprehend, and avoids any mistake like calling > >> drm_exec_continue_on_contention() in an inner loop, or breaking > >> out of the drm_exec_while_all_locked() loop unintentionally. > > > > In i915 we've had a very similar helper to this, and while I agree > > this newer version would probably help make code cleaner, but OTOH > > there also are some places where the short drm_exec_while_all_locked() > > -likeblock don't really motivate a separate function. Porting i915 to > > the current version will take some work, For the xe driver both > > versions would work fine. > > Yeah, this is actually what my first version of this looked like. But I > abandoned that approach because we have a lot of cases were we just > quickly want to lock a few GEM objects and don't want the extra overhead > of putting all the state into some bag to forward it to a function. If you're talking about verbosity, it might be the case, though I guess it mostly a matter of taste (I do like when things are well isolated). As for runtime overhead, I'd expect the compiler to inline the function anyway, so it's unlikely to change anything. > >> +/* Track the locked object in the array */ > >> +static int drm_exec_obj_locked(struct drm_exec *exec, > >> + struct drm_gem_object *obj) > >> +{ > >> + if (unlikely(exec->num_objects == exec->max_objects)) { > >> + size_t size = exec->max_objects * sizeof(void *); > >> + void *tmp; > >> + > >> + tmp = kvrealloc(exec->objects, size, size + PAGE_SIZE, > >> + GFP_KERNEL); > >> + if (!tmp) > >> + return -ENOMEM; > > > > Sometimes you need to just temporarily lock an object and then unlock > > it again if it goes out of scope before reaching the end of > > _until_all_locked(). In that case you might need to remove a lock from > > the array. I *think* for all use-cases in i915 it would suffice to > > take a snapshot of num_objects, and unlock everything above that, > > having exec->objects behave like a stack, but was ever a list > > considered instead of a realloced array? > > Yes, the problem is that linked lists really suck regarding their cache > line locality. That's why I've came up with this approach here. Hm, maybe I'm missing something, but if you place the list_head obj you use to stack the locked objects close enough to the resv pointer, and aligned on cache line, it shouldn't really be a problem, given you have to dereference the GEM object to retrieve its resv anyway.
Re: [PATCH 01/13] drm: execution context for GEM buffers v4
Hello Thomas, On Mon, 19 Jun 2023 10:59:16 +0200 Thomas Hellström (Intel) wrote: > > > +/** > > + * DOC: Overview > > + * > > + * This component mainly abstracts the retry loop necessary for locking > > + * multiple GEM objects while preparing hardware operations (e.g. > > command > > + * submissions, page table updates etc..). > > + * > > + * If a contention is detected while locking a GEM object the cleanup > > procedure > > + * unlocks all previously locked GEM objects and locks the contended > > one first > > + * before locking any further objects. > > + * > > + * After an object is locked fences slots can optionally be reserved > > on the > > + * dma_resv object inside the GEM object. > > + * > > + * A typical usage pattern should look like this:: > > + * > > + * struct drm_gem_object *obj; > > + * struct drm_exec exec; > > + * unsigned long index; > > + * int ret; > > + * > > + * drm_exec_init(, true); > > + * drm_exec_while_not_all_locked() { > > + * ret = drm_exec_prepare_obj(, boA, 1); > > + * drm_exec_continue_on_contention(); > > + * if (ret) > > + * goto error; > > + * > Have you considered defining a drm_exec_try_prepare_obj_or_retry() > combining drm_exec_prepare_obj() and drm_exec_continue_on_contention()? > > #define drm_exec_try_prepare_obj_or_retry(exec, obj, num_fences) \ > ({ \ > int __ret = drm_exec_prepare_obj(exec, bo, > num_fences); \ > if (unlikely(drm_exec_is_contended(exec))) \ > continue; \ > __ret; \ > }) > > This way the following pattern > > ret = drm_exec_prepare_obj(, boA, 1); > drm_exec_continue_on_contention(); > if (ret) > goto error; > > can be turned into something more conventional: > > ret = drm_exec_try_prepare_obj_or_retry(, boA, 1); > if (ret) > goto error; > >>> Yeah, I was considering that as well. But then abandoned it as to > >>> complicated. > >>> > >>> I really need to find some time to work on that anyway. > > I've been playing with drm_exec for a couple weeks now, and I wanted > > to share something I hacked to try and make the API simpler and > > more robust against misuse (see the below diff, which is a slightly > > adjusted version of your work). > > It would be good if we could have someone taking charge of this series > and address all review comments, I see some of my comments getting lost, > we have multiple submitters and I can't find a dri-devel patchwork entry > for this. My bad, I wasn't intending to submit a new version. I just added a diff to show what I had in mind. This being said, it'd be great if we could make some progress on this series, because we have quite a few drivers depending on it now. > > > > > In this version, the user is no longer in control of the retry > > loop. Instead, it provides an expression (a call to a > > sub-function) to be re-evaluated each time a contention is > > detected. IMHO, this makes the 'prepare-objs' functions easier to > > apprehend, and avoids any mistake like calling > > drm_exec_continue_on_contention() in an inner loop, or breaking > > out of the drm_exec_while_all_locked() loop unintentionally. > > In i915 we've had a very similar helper to this, and while I agree this > newer version would probably help make code cleaner, but OTOH there also > are some places where the short drm_exec_while_all_locked() -likeblock > don't really motivate a separate function. Porting i915 to the current > version will take some work, For the xe driver both versions would work > fine. Note that the drm_exec_until_all_locked() helper I introduced is taking an expression, so in theory, you don't have to define a separate function. drm_exec_until_all_locked(, { /* inlined-code */ int ret; ret = blabla() if (ret) goto error; ... error: /* return value. */ ret; }); This being said, as soon as you have several failure paths, it makes things a lot easier/controllable if you make it a function, and I honestly don't think the readability would suffer from having a function defined just above the user. My main concern with the original approach was the risk of calling continue/break_if_contended() in the wrong place, and also the fact you can't really externalize things to a function if you're looking for a cleaner split. At least with drm_exec_until_all_locked() you can do both. Regards, Boris
Re: [PATCH 01/13] drm: execution context for GEM buffers v4
Hi, Am 19.06.23 um 11:33 schrieb Thomas Hellström (Intel): [SNIP] Sometimes you want to just drop the contended lock after the above relaxation. (Eviction would be one example), and not add as prelocked, if the contended object goes out of scope. Eviction would in some situations be one such example, -EDEADLOCK leading to an error path where the object should otherwise be freed is another. Perhaps we could add an argument to prepare_obj() as to whether the object should be immediately put after relaxation. I was considering a try_prepare version as well, that should cover this use case. That sounds a bit different from this use-case. The use-case above would, on -EDEADLOCK actually unlock everything, then lock-slow the contending lock and then immediately unlock it and drop. Hui? What would that be good for? It sounds like try_prepare would just skip locking and continue with everything locked so far still locked? Correct. + ret = drm_exec_obj_locked(exec, obj); + if (unlikely(ret)) { + dma_resv_unlock(obj->resv); + goto error_dropref; + } + + swap(exec->prelocked, obj); + +error_dropref: + /* Always cleanup the contention so that error handling can kick in */ + drm_gem_object_put(obj); + exec->contended = NULL; + return ret; +} + +/** + * drm_exec_prepare_obj - prepare a GEM object for use + * @exec: the drm_exec object with the state + * @obj: the GEM object to prepare + * @num_fences: how many fences to reserve + * + * Prepare a GEM object for use by locking it and reserving fence slots. All + * successfully locked objects are put into the locked container. + * + * Returns: -EDEADLK if a contention is detected, -EALREADY when object is + * already locked, -ENOMEM when memory allocation failed and zero for success. + */ +int drm_exec_prepare_obj(struct drm_exec *exec, struct drm_gem_object *obj, + unsigned int num_fences) +{ + int ret; + + ret = drm_exec_lock_contended(exec); + if (unlikely(ret)) + return ret; + + if (exec->prelocked == obj) { + drm_gem_object_put(exec->prelocked); + exec->prelocked = NULL; + + return dma_resv_reserve_fences(obj->resv, num_fences); + } + + if (exec->flags & DRM_EXEC_FLAG_INTERRUPTIBLE) + ret = dma_resv_lock_interruptible(obj->resv, >ticket); + else + ret = dma_resv_lock(obj->resv, >ticket); + + if (unlikely(ret == -EDEADLK)) { + drm_gem_object_get(obj); + exec->contended = obj; + return -EDEADLK; + } + + if (unlikely(ret == -EALREADY && + (exec->flags & DRM_EXEC_FLAG_ALLOW_DUPLICATES))) + goto reserve_fences; + + if (unlikely(ret)) + return ret; + + ret = drm_exec_obj_locked(exec, obj); + if (ret) + goto error_unlock; + +reserve_fences: + /* Keep locked when reserving fences fails */ + return dma_resv_reserve_fences(obj->resv, num_fences); Ugh, what is the use-case for keeping things locked here? How would a caller tell the difference between an error where everything is locked and nothing is locked? IMO, we should unlock on error here. If there indeed is a use-case we should add a separate function for reserving fences for all locked objects, rather than returning sometimes locked on error sometime not. We return the object locked here because it was to much churn to remove it again from the array and we are getting fully cleaned up at the end anyway. OK, so if we add an unlock functionality, we could just have a consistent locking state on error return? Yeah, that should work. Going to work on this. Regards, Christian. Thanks, Thomas Regards, Christian. Thanks, Thomas + +error_unlock: + dma_resv_unlock(obj->resv); + return ret; +} +EXPORT_SYMBOL(drm_exec_prepare_obj); + +/** + * drm_exec_prepare_array - helper to prepare an array of objects + * @exec: the drm_exec object with the state + * @objects: array of GEM object to prepare + * @num_objects: number of GEM objects in the array + * @num_fences: number of fences to reserve on each GEM object + * + * Prepares all GEM objects in an array, handles contention but aports on first + * error otherwise. Reserves @num_fences on each GEM object after locking it. + * + * Returns: -EALREADY when object is already locked, -ENOMEM when memory + * allocation failed and zero for success. + */ +int drm_exec_prepare_array(struct drm_exec *exec, + struct drm_gem_object **objects, + unsigned int num_objects, + unsigned int num_fences) +{ + int ret; + + for (unsigned int i = 0; i < num_objects; ++i) { + ret = drm_exec_prepare_obj(exec, objects[i], num_fences); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL(drm_exec_prepare_array); + +MODULE_DESCRIPTION("DRM execution context"); +MODULE_LICENSE("Dual MIT/GPL"); diff --git a/include/drm/drm_exec.h b/include/drm/drm_exec.h new file mode
Re: [PATCH 01/13] drm: execution context for GEM buffers v4
Hi! On 6/19/23 11:20, Christian König wrote: Hi guys, Am 19.06.23 um 10:59 schrieb Thomas Hellström (Intel): [SNIP] I really need to find some time to work on that anyway. I've been playing with drm_exec for a couple weeks now, and I wanted to share something I hacked to try and make the API simpler and more robust against misuse (see the below diff, which is a slightly adjusted version of your work). It would be good if we could have someone taking charge of this series and address all review comments, I see some of my comments getting lost, we have multiple submitters and I can't find a dri-devel patchwork entry for this. Anyway some comments below. I can try to find some time for the series this week (As long as nobody comes along and has any burning roof). In this version, the user is no longer in control of the retry loop. Instead, it provides an expression (a call to a sub-function) to be re-evaluated each time a contention is detected. IMHO, this makes the 'prepare-objs' functions easier to apprehend, and avoids any mistake like calling drm_exec_continue_on_contention() in an inner loop, or breaking out of the drm_exec_while_all_locked() loop unintentionally. In i915 we've had a very similar helper to this, and while I agree this newer version would probably help make code cleaner, but OTOH there also are some places where the short drm_exec_while_all_locked() -likeblock don't really motivate a separate function. Porting i915 to the current version will take some work, For the xe driver both versions would work fine. Yeah, this is actually what my first version of this looked like. But I abandoned that approach because we have a lot of cases were we just quickly want to lock a few GEM objects and don't want the extra overhead of putting all the state into some bag to forward it to a function. Some additional review comments not related to the interface change below: It also makes the internal management a bit simpler, since we no longer call drm_exec_cleanup() on the first attempt, and can thus get rid of the DRM_EXEC_DUMMY trick. In the below diff, I also re-introduced native support for duplicates as an opt-in, so we don't have to do things like: ret = drm_exec_prepare_obj(exec, obj, num_fences); if (ret == -EALREADY) ret = dma_resv_reserve_fences(obj->resv, num_fences); if (ret) return ret; and can just do: ret = drm_exec_prepare_obj(exec, obj, num_fences); if (ret) return; Of course drivers can open-code a wrapper doing the same thing, but given at least pvr and radeon need this, it'd be nice if the core could support it natively. That's mostly it. Just wanted to share what I had in case you're interested. If not, that's fine too. Regards, Boris --- Documentation/gpu/drm-mm.rst | 12 ++ drivers/gpu/drm/Kconfig | 6 + drivers/gpu/drm/Makefile | 2 + drivers/gpu/drm/drm_exec.c | 274 +++ include/drm/drm_exec.h | 130 + 5 files changed, 424 insertions(+) create mode 100644 drivers/gpu/drm/drm_exec.c create mode 100644 include/drm/drm_exec.h diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst index fe40ee686f6e..c9f120cfe730 100644 --- a/Documentation/gpu/drm-mm.rst +++ b/Documentation/gpu/drm-mm.rst @@ -524,6 +524,18 @@ DRM Sync Objects .. kernel-doc:: drivers/gpu/drm/drm_syncobj.c :export: +DRM Execution context += + +.. kernel-doc:: drivers/gpu/drm/drm_exec.c + :doc: Overview + +.. kernel-doc:: include/drm/drm_exec.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/drm_exec.c + :export: + GPU Scheduler = diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 76991720637c..01a38fcdb1c4 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -194,6 +194,12 @@ config DRM_TTM GPU memory types. Will be enabled automatically if a device driver uses it. +config DRM_EXEC + tristate + depends on DRM + help + Execution context for command submissions + config DRM_BUDDY tristate depends on DRM diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 1873f64db171..18a02eaf2d49 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -79,6 +79,8 @@ obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o # # Memory-management helpers # +# +obj-$(CONFIG_DRM_EXEC) += drm_exec.o obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c new file mode 100644 index ..e0ad1a3e1610 --- /dev/null +++ b/drivers/gpu/drm/drm_exec.c @@ -0,0 +1,274 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +#include +#include +#include + +/** + * DOC: Overview + * + * This component mainly abstracts the retry loop necessary for locking + * multiple GEM objects while preparing
Re: [PATCH 01/13] drm: execution context for GEM buffers v4
Hi guys, Am 19.06.23 um 10:59 schrieb Thomas Hellström (Intel): [SNIP] I really need to find some time to work on that anyway. I've been playing with drm_exec for a couple weeks now, and I wanted to share something I hacked to try and make the API simpler and more robust against misuse (see the below diff, which is a slightly adjusted version of your work). It would be good if we could have someone taking charge of this series and address all review comments, I see some of my comments getting lost, we have multiple submitters and I can't find a dri-devel patchwork entry for this. Anyway some comments below. I can try to find some time for the series this week (As long as nobody comes along and has any burning roof). In this version, the user is no longer in control of the retry loop. Instead, it provides an expression (a call to a sub-function) to be re-evaluated each time a contention is detected. IMHO, this makes the 'prepare-objs' functions easier to apprehend, and avoids any mistake like calling drm_exec_continue_on_contention() in an inner loop, or breaking out of the drm_exec_while_all_locked() loop unintentionally. In i915 we've had a very similar helper to this, and while I agree this newer version would probably help make code cleaner, but OTOH there also are some places where the short drm_exec_while_all_locked() -likeblock don't really motivate a separate function. Porting i915 to the current version will take some work, For the xe driver both versions would work fine. Yeah, this is actually what my first version of this looked like. But I abandoned that approach because we have a lot of cases were we just quickly want to lock a few GEM objects and don't want the extra overhead of putting all the state into some bag to forward it to a function. Some additional review comments not related to the interface change below: It also makes the internal management a bit simpler, since we no longer call drm_exec_cleanup() on the first attempt, and can thus get rid of the DRM_EXEC_DUMMY trick. In the below diff, I also re-introduced native support for duplicates as an opt-in, so we don't have to do things like: ret = drm_exec_prepare_obj(exec, obj, num_fences); if (ret == -EALREADY) ret = dma_resv_reserve_fences(obj->resv, num_fences); if (ret) return ret; and can just do: ret = drm_exec_prepare_obj(exec, obj, num_fences); if (ret) return; Of course drivers can open-code a wrapper doing the same thing, but given at least pvr and radeon need this, it'd be nice if the core could support it natively. That's mostly it. Just wanted to share what I had in case you're interested. If not, that's fine too. Regards, Boris --- Documentation/gpu/drm-mm.rst | 12 ++ drivers/gpu/drm/Kconfig | 6 + drivers/gpu/drm/Makefile | 2 + drivers/gpu/drm/drm_exec.c | 274 +++ include/drm/drm_exec.h | 130 + 5 files changed, 424 insertions(+) create mode 100644 drivers/gpu/drm/drm_exec.c create mode 100644 include/drm/drm_exec.h diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst index fe40ee686f6e..c9f120cfe730 100644 --- a/Documentation/gpu/drm-mm.rst +++ b/Documentation/gpu/drm-mm.rst @@ -524,6 +524,18 @@ DRM Sync Objects .. kernel-doc:: drivers/gpu/drm/drm_syncobj.c :export: +DRM Execution context += + +.. kernel-doc:: drivers/gpu/drm/drm_exec.c + :doc: Overview + +.. kernel-doc:: include/drm/drm_exec.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/drm_exec.c + :export: + GPU Scheduler = diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 76991720637c..01a38fcdb1c4 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -194,6 +194,12 @@ config DRM_TTM GPU memory types. Will be enabled automatically if a device driver uses it. +config DRM_EXEC + tristate + depends on DRM + help + Execution context for command submissions + config DRM_BUDDY tristate depends on DRM diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 1873f64db171..18a02eaf2d49 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -79,6 +79,8 @@ obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o # # Memory-management helpers # +# +obj-$(CONFIG_DRM_EXEC) += drm_exec.o obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c new file mode 100644 index ..e0ad1a3e1610 --- /dev/null +++ b/drivers/gpu/drm/drm_exec.c @@ -0,0 +1,274 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +#include +#include +#include + +/** + * DOC: Overview + * + * This component mainly abstracts the retry loop necessary for locking + * multiple GEM objects while preparing hardware operations (e.g. command + * submissions,
Re: [PATCH 01/13] drm: execution context for GEM buffers v4
On 6/17/23 13:54, Boris Brezillon wrote: +Matthew who's been using drm_exec in Xe if I'm correct. Hello Christian, On Wed, 14 Jun 2023 15:02:52 +0200 Boris Brezillon wrote: On Wed, 14 Jun 2023 14:30:53 +0200 Christian König wrote: Am 14.06.23 um 14:23 schrieb Boris Brezillon: Hi Christian, On Thu, 4 May 2023 13:51:47 +0200 "Christian König" wrote: This adds the infrastructure for an execution context for GEM buffers which is similar to the existing TTMs execbuf util and intended to replace it in the long term. The basic functionality is that we abstracts the necessary loop to lock many different GEM buffers with automated deadlock and duplicate handling. As many other drivers do already, we are considering using drm_exec() for our resv locking in the PowerVR driver, so we might have more questions/comments in the coming days/weeks, but I already have a couple right now (see below). v3: drop duplicate tracking, radeon is really the only one needing that I think we'd actually be interested in duplicate tracking. Is there any way we can make it an optional feature through some extra helpers/flags? Doesn't have to be done in this patch series, I'm just wondering if this is something we can share as well. You can still capture the -EALREADY error and act appropriately in your driver. For radeon it just means ignoring the error code and going ahead, but that behavior doesn't seem to be desired in most cases. Initially I though we need to separately track how many and how often BOs are duplicated, but there is simply no use for this. [...] +/** + * DOC: Overview + * + * This component mainly abstracts the retry loop necessary for locking + * multiple GEM objects while preparing hardware operations (e.g. command + * submissions, page table updates etc..). + * + * If a contention is detected while locking a GEM object the cleanup procedure + * unlocks all previously locked GEM objects and locks the contended one first + * before locking any further objects. + * + * After an object is locked fences slots can optionally be reserved on the + * dma_resv object inside the GEM object. + * + * A typical usage pattern should look like this:: + * + * struct drm_gem_object *obj; + * struct drm_exec exec; + * unsigned long index; + * int ret; + * + * drm_exec_init(, true); + * drm_exec_while_not_all_locked() { + * ret = drm_exec_prepare_obj(, boA, 1); + * drm_exec_continue_on_contention(); + * if (ret) + * goto error; + * Have you considered defining a drm_exec_try_prepare_obj_or_retry() combining drm_exec_prepare_obj() and drm_exec_continue_on_contention()? #define drm_exec_try_prepare_obj_or_retry(exec, obj, num_fences) \ ({ \ int __ret = drm_exec_prepare_obj(exec, bo, num_fences); \ if (unlikely(drm_exec_is_contended(exec))) \ continue; \ __ret; \ }) This way the following pattern ret = drm_exec_prepare_obj(, boA, 1); drm_exec_continue_on_contention(); if (ret) goto error; can be turned into something more conventional: ret = drm_exec_try_prepare_obj_or_retry(, boA, 1); if (ret) goto error; Yeah, I was considering that as well. But then abandoned it as to complicated. I really need to find some time to work on that anyway. I've been playing with drm_exec for a couple weeks now, and I wanted to share something I hacked to try and make the API simpler and more robust against misuse (see the below diff, which is a slightly adjusted version of your work). It would be good if we could have someone taking charge of this series and address all review comments, I see some of my comments getting lost, we have multiple submitters and I can't find a dri-devel patchwork entry for this. Anyway some comments below. In this version, the user is no longer in control of the retry loop. Instead, it provides an expression (a call to a sub-function) to be re-evaluated each time a contention is detected. IMHO, this makes the 'prepare-objs' functions easier to apprehend, and avoids any mistake like calling drm_exec_continue_on_contention() in an inner loop, or breaking out of the drm_exec_while_all_locked() loop unintentionally. In i915 we've had a very similar helper to this, and while I agree this newer version would probably help make code cleaner, but OTOH there also are some places where the short drm_exec_while_all_locked() -likeblock don't really motivate a separate function. Porting i915 to the current version will take some work, For the xe driver both versions would work fine. Some additional review comments not related to the interface change below: It also makes the internal management a bit simpler, since we no longer call drm_exec_cleanup() on the first
Re: [PATCH v7 2/8] PCI/VGA: Deal only with VGA class devices
On Tue, 13 Jun 2023, Sui Jingfeng wrote: > Deal only with the VGA devcie(pdev->class == 0x0300), so replace the Typo here: s/devcie/device/. > pci_get_subsys() function with pci_get_class(). Filter the non-PCI display > device(pdev->class != 0x0300) out. There no need to process the non-display > PCI device. I've only come across this patch series now. Without diving into what this code actually does I have just one question as a matter of interest. > diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c > index c1bc6c983932..22a505e877dc 100644 > --- a/drivers/pci/vgaarb.c > +++ b/drivers/pci/vgaarb.c > @@ -1500,7 +1496,9 @@ static int pci_notify(struct notifier_block *nb, > unsigned long action, > struct pci_dev *pdev = to_pci_dev(dev); > bool notify = false; > > - vgaarb_dbg(dev, "%s\n", __func__); > + /* Only deal with VGA class devices */ > + if (pdev->class != PCI_CLASS_DISPLAY_VGA << 8) > + return 0; Hmm, shouldn't this also handle PCI_CLASS_NOT_DEFINED_VGA? As far as I know it is the equivalent of PCI_CLASS_DISPLAY_VGA for PCI <= 2.0 devices that were implemented before the idea of PCI device classes has developed into its current form. I may have such a VGA device somewhere. Maciej
[PATCH Review 1/1] drm/amdgpu: Remove redundant poison consumption handler function
The function callback handle_poison_consumption and callback function poison_consumption_handler are almost same to handle poison consumption, remove poison_consumption_handler. Signed-off-by: Stanley.Yang --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 9 - drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 ++- drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c | 12 +--- 5 files changed, 13 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index a33d4bc34cee..c15dbdb2e0f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -840,15 +840,6 @@ int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev) return 0; } -int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev, - struct amdgpu_iv_entry *entry) -{ - if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler) - return adev->gfx.ras->poison_consumption_handler(adev, entry); - - return 0; -} - int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, void *err_data, struct amdgpu_iv_entry *entry) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index d0c3f2955821..95b80bc8cdb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -273,8 +273,6 @@ struct amdgpu_gfx_ras { int (*rlc_gc_fed_irq)(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); - int (*poison_consumption_handler)(struct amdgpu_device *adev, - struct amdgpu_iv_entry *entry); }; struct amdgpu_gfx_shadow_info { @@ -538,8 +536,6 @@ int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev); void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id); int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev); -int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev, - struct amdgpu_iv_entry *entry); bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id); int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 5b6525d8dace..7be289473034 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1694,15 +1694,13 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * amdgpu_umc_poison_handler(adev, false); if (block_obj->hw_ops && block_obj->hw_ops->handle_poison_consumption) - poison_stat = block_obj->hw_ops->handle_poison_consumption(adev); + poison_stat = block_obj->hw_ops->handle_poison_consumption(adev, entry); /* gpu reset is fallback for failed and default cases */ - if (poison_stat) { + if (poison_stat != true) { dev_info(adev->dev, "GPU reset for %s RAS poison consumption is issued!\n", block_obj->ras_comm.name); amdgpu_ras_reset_gpu(adev); - } else { - amdgpu_gfx_poison_consumption_handler(adev, entry); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 46bf1889a9d7..03f3b3774b85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -564,7 +564,8 @@ struct amdgpu_ras_block_hw_ops { void (*reset_ras_error_count)(struct amdgpu_device *adev); void (*reset_ras_error_status)(struct amdgpu_device *adev); bool (*query_poison_status)(struct amdgpu_device *adev); - bool (*handle_poison_consumption)(struct amdgpu_device *adev); + bool (*handle_poison_consumption)(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry); }; /* work flow diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c index 26d6286d86c9..5b7eac547a05 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c @@ -78,7 +78,7 @@ static int gfx_v11_0_3_rlc_gc_fed_irq(struct amdgpu_device *adev, return 0; } -static int gfx_v11_0_3_poison_consumption_handler(struct amdgpu_device *adev, +static bool gfx_v11_0_3_handle_poison_consumption(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { /* Workaround: when vmid and pasid are both zero, trigger gpu reset in KGD. */ @@ -99,10 +99,16 @@ static int gfx_v11_0_3_poison_consumption_handler(struct amdgpu_device
[PATCH v2] drm/amdgpu: Modify for_each_inst macro
Modify it such that it doesn't change the instance mask parameter. Signed-off-by: Lijo Lazar --- v2: Take care of bit-shift beyond width (Victor) drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f4029c13a9be..aa42347bd67d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1295,9 +1295,10 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter)); -#define for_each_inst(i, inst_mask) \ - for (i = ffs(inst_mask) - 1; inst_mask;\ -inst_mask &= ~(1U << i), i = ffs(inst_mask) - 1) +#define BIT_MASK_UPPER(i) ((i) >= BITS_PER_LONG ? 0 : ~0UL << (i)) +#define for_each_inst(i, inst_mask)\ + for (i = ffs(inst_mask); i-- != 0; \ +i = ffs(inst_mask & BIT_MASK_UPPER(i + 1))) #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) -- 2.25.1