Re: [PATCHv2] drm/amdkfd: Enable GWS on GFX9.4.3
On 2023-06-16 14:44, Mukul Joshi wrote: Enable GWS capable queue creation for forward progress gaurantee on GFX 9.4.3. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling --- v1->v2: - Update the condition for setting pqn->q->gws for GFX 9.4.3. drivers/gpu/drm/amd/amdkfd/kfd_device.c | 1 + .../amd/amdkfd/kfd_process_queue_manager.c| 35 --- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 9d4abfd8b55e..226d2dd7fa49 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -518,6 +518,7 @@ static int kfd_gws_init(struct kfd_node *node) && kfd->mec2_fw_version >= 0x30) || (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 2) && kfd->mec2_fw_version >= 0x28) || + (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3)) || (KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0) && KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0) && kfd->mec2_fw_version >= 0x6b diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 9ad1a2186a24..ba9d69054119 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -123,16 +123,24 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, if (!gws && pdd->qpd.num_gws == 0) return -EINVAL; - if (gws) - ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, - gws, ); - else - ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info, - pqn->q->gws); - if (unlikely(ret)) - return ret; + if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3)) { + if (gws) + ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, + gws, ); + else + ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info, + pqn->q->gws); + if (unlikely(ret)) + return ret; + pqn->q->gws = mem; + } else { + /* +* Intentionally set GWS to a non-NULL value +* for GFX 9.4.3. +*/ + pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL; + } - pqn->q->gws = mem; pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0; return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, @@ -164,7 +172,8 @@ void pqm_uninit(struct process_queue_manager *pqm) struct process_queue_node *pqn, *next; list_for_each_entry_safe(pqn, next, >queues, process_queue_list) { - if (pqn->q && pqn->q->gws) + if (pqn->q && pqn->q->gws && + KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3)) amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, pqn->q->gws); kfd_procfs_del_queue(pqn->q); @@ -446,8 +455,10 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) } if (pqn->q->gws) { - amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, - pqn->q->gws); + if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3)) + amdgpu_amdkfd_remove_gws_from_process( + pqm->process->kgd_process_info, + pqn->q->gws); pdd->qpd.num_gws = 0; }
[PATCH 2/2] drm/amdgpu: enable mcbp by default on gfx9
It's required for high priority queues. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2535 Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 78c6265fe79b..3eb370b77ad9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3677,6 +3677,11 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) if (amdgpu_mcbp == 1) adev->gfx.mcbp = true; + if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) && + (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) && + adev->gfx.num_gfx_rings) + adev->gfx.mcbp = true; + if (amdgpu_sriov_vf(adev)) adev->gfx.mcbp = true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 03874371af60..308149dd7d00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -180,7 +180,7 @@ uint amdgpu_dc_feature_mask = 2; uint amdgpu_dc_debug_mask; uint amdgpu_dc_visual_confirm; int amdgpu_async_gfx_ring = 1; -int amdgpu_mcbp; +int amdgpu_mcbp = -1; int amdgpu_discovery = -1; int amdgpu_mes; int amdgpu_mes_kiq; @@ -635,10 +635,10 @@ module_param_named(async_gfx_ring, amdgpu_async_gfx_ring, int, 0444); /** * DOC: mcbp (int) - * It is used to enable mid command buffer preemption. (0 = disabled (default), 1 = enabled) + * It is used to enable mid command buffer preemption. (0 = disabled, 1 = enabled, -1 auto (default)) */ MODULE_PARM_DESC(mcbp, - "Enable Mid-command buffer preemption (0 = disabled (default), 1 = enabled)"); + "Enable Mid-command buffer preemption (0 = disabled, 1 = enabled), -1 = auto (default)"); module_param_named(mcbp, amdgpu_mcbp, int, 0444); /** -- 2.40.1
[PATCH 1/2] drm/amdgpu: make mcbp a per device setting
So we can selectively enable it on certain devices. No intended functional change. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 19 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h| 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c| 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 3 --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- 7 files changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f39db4a2c2cf..78c6265fe79b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2551,7 +2551,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = true; /* right after GMC hw init, we create CSA */ - if (amdgpu_mcbp) { + if (adev->gfx.mcbp) { r = amdgpu_allocate_static_csa(adev, >virt.csa_obj, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT, @@ -3672,6 +3672,18 @@ static const struct attribute *amdgpu_dev_attributes[] = { NULL }; +static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) +{ + if (amdgpu_mcbp == 1) + adev->gfx.mcbp = true; + + if (amdgpu_sriov_vf(adev)) + adev->gfx.mcbp = true; + + if (adev->gfx.mcbp) + DRM_INFO("MCBP is enabled\n"); +} + /** * amdgpu_device_init - initialize the driver * @@ -3823,9 +3835,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); - if (amdgpu_mcbp) - DRM_INFO("MCBP is enabled\n"); - /* * Reset domain needs to be present early, before XGMI hive discovered * (if any) and intitialized to use reset sem and in_gpu reset flag @@ -3851,6 +3860,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; + amdgpu_device_set_mcbp(adev); + /* Get rid of things like offb */ r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, _kms_driver); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index ce0f7a8ad4b8..a4ff515ce896 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -434,6 +434,7 @@ struct amdgpu_gfx { uint16_txcc_mask; uint32_tnum_xcc_per_xcp; struct mutexpartition_mutex; + boolmcbp; /* mid command buffer preemption */ }; struct amdgpu_gfx_ras_reg_entry { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index e3531aa3c8bd..cca5a495611f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -805,7 +805,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) dev_info->ids_flags = 0; if (adev->flags & AMD_IS_APU) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_FUSION; - if (amdgpu_mcbp) + if (adev->gfx.mcbp) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION; if (amdgpu_is_tmz(adev)) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_TMZ; @@ -1247,7 +1247,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) goto error_vm; } - if (amdgpu_mcbp) { + if (adev->gfx.mcbp) { uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK; r = amdgpu_map_static_csa(adev, >vm, adev->virt.csa_obj, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 78ec3420ef85..dacf281d2b21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -72,7 +72,7 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, int r; /* don't enable OS preemption on SDMA under SRIOV */ - if (amdgpu_sriov_vf(adev) || vmid == 0 || !amdgpu_mcbp) + if (amdgpu_sriov_vf(adev) || vmid == 0 || !adev->gfx.mcbp) return 0; if (ring->is_mes_queue) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 25b4d7f0bd35..41aa853a07d2 100644 ---
[PATCHv2] drm/amdkfd: Enable GWS on GFX9.4.3
Enable GWS capable queue creation for forward progress gaurantee on GFX 9.4.3. Signed-off-by: Mukul Joshi --- v1->v2: - Update the condition for setting pqn->q->gws for GFX 9.4.3. drivers/gpu/drm/amd/amdkfd/kfd_device.c | 1 + .../amd/amdkfd/kfd_process_queue_manager.c| 35 --- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 9d4abfd8b55e..226d2dd7fa49 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -518,6 +518,7 @@ static int kfd_gws_init(struct kfd_node *node) && kfd->mec2_fw_version >= 0x30) || (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 2) && kfd->mec2_fw_version >= 0x28) || + (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3)) || (KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0) && KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0) && kfd->mec2_fw_version >= 0x6b diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 9ad1a2186a24..ba9d69054119 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -123,16 +123,24 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, if (!gws && pdd->qpd.num_gws == 0) return -EINVAL; - if (gws) - ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, - gws, ); - else - ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info, - pqn->q->gws); - if (unlikely(ret)) - return ret; + if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3)) { + if (gws) + ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, + gws, ); + else + ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info, + pqn->q->gws); + if (unlikely(ret)) + return ret; + pqn->q->gws = mem; + } else { + /* +* Intentionally set GWS to a non-NULL value +* for GFX 9.4.3. +*/ + pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL; + } - pqn->q->gws = mem; pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0; return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, @@ -164,7 +172,8 @@ void pqm_uninit(struct process_queue_manager *pqm) struct process_queue_node *pqn, *next; list_for_each_entry_safe(pqn, next, >queues, process_queue_list) { - if (pqn->q && pqn->q->gws) + if (pqn->q && pqn->q->gws && + KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3)) amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, pqn->q->gws); kfd_procfs_del_queue(pqn->q); @@ -446,8 +455,10 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) } if (pqn->q->gws) { - amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, - pqn->q->gws); + if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3)) + amdgpu_amdkfd_remove_gws_from_process( + pqm->process->kgd_process_info, + pqn->q->gws); pdd->qpd.num_gws = 0; } -- 2.35.1
Re: [PATCH] drm/amdkfd: Use KIQ to unmap HIQ
On 2023-06-16 14:00, Mukul Joshi wrote: Currently, we unmap HIQ by directly writing to HQD registers. This doesn't work for GFX9.4.3. Instead, use KIQ to unmap HIQ, similar to how we use KIQ to map HIQ. Using KIQ to unmap HIQ works for all GFX series post GFXv9. Signed-off-by: Mukul Joshi --- .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c | 1 + .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 47 ++ .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h| 3 ++ .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 1 + .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c| 47 ++ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 48 +++ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 3 ++ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 8 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h | 4 ++ .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 2 +- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 2 +- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 7 ++- .../gpu/drm/amd/include/kgd_kfd_interface.h | 3 ++ 13 files changed, 170 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index 5b4b7f8b92a5..b82435e17ed0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -372,6 +372,7 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = { .hqd_sdma_dump = kgd_gfx_v9_4_3_hqd_sdma_dump, .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, .hqd_sdma_is_occupied = kgd_gfx_v9_4_3_hqd_sdma_is_occupied, + .hiq_hqd_destroy = kgd_gfx_v9_hiq_hqd_destroy, .hqd_destroy = kgd_gfx_v9_hqd_destroy, .hqd_sdma_destroy = kgd_gfx_v9_4_3_hqd_sdma_destroy, .wave_control_execute = kgd_gfx_v9_wave_control_execute, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 8ad7a7779e14..a919fb8e09a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -510,6 +510,52 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd) return false; } +int kgd_gfx_v10_hiq_hqd_destroy(struct amdgpu_device *adev, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst) +{ + struct amdgpu_ring *kiq_ring = >gfx.kiq[0].ring; + struct v10_compute_mqd *m = get_mqd(mqd); + uint32_t mec, pipe; + uint32_t doorbell_off; + int r; + + doorbell_off = m->cp_hqd_pq_doorbell_control >> + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + + acquire_queue(adev, pipe_id, queue_id); + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + spin_lock(>gfx.kiq[0].ring_lock); + r = amdgpu_ring_alloc(kiq_ring, 6); + if (r) { + pr_err("Failed to alloc KIQ (%d).\n", r); + goto out_unlock; + } + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_UNMAP_QUEUES_ACTION(0) | + PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | + PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(doorbell_off)); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); This looks like you're duplicating the functionality in kiq->pmf->kiq_unmap_queues. Can we just call that instead? See amdgpu_gfx_disable_kcq for example. Regards, Felix + + amdgpu_ring_commit(kiq_ring); + +out_unlock: + spin_unlock(>gfx.kiq[0].ring_lock); + release_queue(adev); + + return r; +} + static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, @@ -1034,6 +1080,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .hqd_sdma_dump = kgd_hqd_sdma_dump, .hqd_is_occupied = kgd_hqd_is_occupied, .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hiq_hqd_destroy = kgd_gfx_v10_hiq_hqd_destroy, .hqd_destroy = kgd_hqd_destroy, .hqd_sdma_destroy = kgd_hqd_sdma_destroy, .wave_control_execute = kgd_wave_control_execute, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h index e6b70196071a..00b4514ebdd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h +++
Re: [PATCH] drm/amdkfd: Enable GWS on GFX9.4.3
On 2023-06-16 13:59, Mukul Joshi wrote: Enable GWS capable queue creation for forward progress gaurantee on GFX 9.4.3. Signed-off-by: Mukul Joshi --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 1 + .../amd/amdkfd/kfd_process_queue_manager.c| 31 --- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 9d4abfd8b55e..226d2dd7fa49 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -518,6 +518,7 @@ static int kfd_gws_init(struct kfd_node *node) && kfd->mec2_fw_version >= 0x30) || (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 2) && kfd->mec2_fw_version >= 0x28) || + (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3)) || (KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0) && KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0) && kfd->mec2_fw_version >= 0x6b diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 9ad1a2186a24..9a091d8f9aaf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -123,16 +123,20 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, if (!gws && pdd->qpd.num_gws == 0) return -EINVAL; - if (gws) - ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, - gws, ); - else - ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info, - pqn->q->gws); - if (unlikely(ret)) - return ret; + if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3)) { + if (gws) + ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, + gws, ); + else + ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info, + pqn->q->gws); + if (unlikely(ret)) + return ret; + pqn->q->gws = mem; + } else { + pqn->q->gws = ERR_PTR(-ENOMEM); I think this needs to be pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL; Regards, Felix + } - pqn->q->gws = mem; pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0; return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, @@ -164,7 +168,8 @@ void pqm_uninit(struct process_queue_manager *pqm) struct process_queue_node *pqn, *next; list_for_each_entry_safe(pqn, next, >queues, process_queue_list) { - if (pqn->q && pqn->q->gws) + if (pqn->q && pqn->q->gws && + KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3)) amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, pqn->q->gws); kfd_procfs_del_queue(pqn->q); @@ -446,8 +451,10 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) } if (pqn->q->gws) { - amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, - pqn->q->gws); + if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3)) + amdgpu_amdkfd_remove_gws_from_process( + pqm->process->kgd_process_info, + pqn->q->gws); pdd->qpd.num_gws = 0; }
[PATCH] drm/amdkfd: Use KIQ to unmap HIQ
Currently, we unmap HIQ by directly writing to HQD registers. This doesn't work for GFX9.4.3. Instead, use KIQ to unmap HIQ, similar to how we use KIQ to map HIQ. Using KIQ to unmap HIQ works for all GFX series post GFXv9. Signed-off-by: Mukul Joshi --- .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c | 1 + .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 47 ++ .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h| 3 ++ .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 1 + .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c| 47 ++ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 48 +++ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 3 ++ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 8 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h | 4 ++ .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 2 +- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 2 +- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 7 ++- .../gpu/drm/amd/include/kgd_kfd_interface.h | 3 ++ 13 files changed, 170 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index 5b4b7f8b92a5..b82435e17ed0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -372,6 +372,7 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = { .hqd_sdma_dump = kgd_gfx_v9_4_3_hqd_sdma_dump, .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, .hqd_sdma_is_occupied = kgd_gfx_v9_4_3_hqd_sdma_is_occupied, + .hiq_hqd_destroy = kgd_gfx_v9_hiq_hqd_destroy, .hqd_destroy = kgd_gfx_v9_hqd_destroy, .hqd_sdma_destroy = kgd_gfx_v9_4_3_hqd_sdma_destroy, .wave_control_execute = kgd_gfx_v9_wave_control_execute, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 8ad7a7779e14..a919fb8e09a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -510,6 +510,52 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd) return false; } +int kgd_gfx_v10_hiq_hqd_destroy(struct amdgpu_device *adev, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst) +{ + struct amdgpu_ring *kiq_ring = >gfx.kiq[0].ring; + struct v10_compute_mqd *m = get_mqd(mqd); + uint32_t mec, pipe; + uint32_t doorbell_off; + int r; + + doorbell_off = m->cp_hqd_pq_doorbell_control >> + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + + acquire_queue(adev, pipe_id, queue_id); + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + spin_lock(>gfx.kiq[0].ring_lock); + r = amdgpu_ring_alloc(kiq_ring, 6); + if (r) { + pr_err("Failed to alloc KIQ (%d).\n", r); + goto out_unlock; + } + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_UNMAP_QUEUES_ACTION(0) | + PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | + PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(doorbell_off)); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + + amdgpu_ring_commit(kiq_ring); + +out_unlock: + spin_unlock(>gfx.kiq[0].ring_lock); + release_queue(adev); + + return r; +} + static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, @@ -1034,6 +1080,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .hqd_sdma_dump = kgd_hqd_sdma_dump, .hqd_is_occupied = kgd_hqd_is_occupied, .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hiq_hqd_destroy = kgd_gfx_v10_hiq_hqd_destroy, .hqd_destroy = kgd_hqd_destroy, .hqd_sdma_destroy = kgd_hqd_sdma_destroy, .wave_control_execute = kgd_wave_control_execute, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h index e6b70196071a..00b4514ebdd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h @@ -53,3 +53,6 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, uint32_t grace_period,
[PATCH] drm/amdkfd: Enable GWS on GFX9.4.3
Enable GWS capable queue creation for forward progress gaurantee on GFX 9.4.3. Signed-off-by: Mukul Joshi --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 1 + .../amd/amdkfd/kfd_process_queue_manager.c| 31 --- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 9d4abfd8b55e..226d2dd7fa49 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -518,6 +518,7 @@ static int kfd_gws_init(struct kfd_node *node) && kfd->mec2_fw_version >= 0x30) || (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 2) && kfd->mec2_fw_version >= 0x28) || + (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3)) || (KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0) && KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0) && kfd->mec2_fw_version >= 0x6b diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 9ad1a2186a24..9a091d8f9aaf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -123,16 +123,20 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, if (!gws && pdd->qpd.num_gws == 0) return -EINVAL; - if (gws) - ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, - gws, ); - else - ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info, - pqn->q->gws); - if (unlikely(ret)) - return ret; + if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3)) { + if (gws) + ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, + gws, ); + else + ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info, + pqn->q->gws); + if (unlikely(ret)) + return ret; + pqn->q->gws = mem; + } else { + pqn->q->gws = ERR_PTR(-ENOMEM); + } - pqn->q->gws = mem; pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0; return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, @@ -164,7 +168,8 @@ void pqm_uninit(struct process_queue_manager *pqm) struct process_queue_node *pqn, *next; list_for_each_entry_safe(pqn, next, >queues, process_queue_list) { - if (pqn->q && pqn->q->gws) + if (pqn->q && pqn->q->gws && + KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3)) amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, pqn->q->gws); kfd_procfs_del_queue(pqn->q); @@ -446,8 +451,10 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) } if (pqn->q->gws) { - amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, - pqn->q->gws); + if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3)) + amdgpu_amdkfd_remove_gws_from_process( + pqm->process->kgd_process_info, + pqn->q->gws); pdd->qpd.num_gws = 0; } -- 2.35.1
[pull] amdgpu, amdkfd, radeon, drm, scheduler, UAPI drm-next-6.5
Hi Dave, Daniel, Last few odds and ends for 6.5. Mostly bug fixes. Was waiting on some GPU scheduler changes in drm-misc for a few of the GPU reset fixes in amdgpu. The following changes since commit 901bdf5ea1a836400ee69aa32b04e9c209271ec7: Merge tag 'amd-drm-next-6.5-2023-06-09' of https://gitlab.freedesktop.org/agd5f/linux into drm-next (2023-06-15 14:11:22 +1000) are available in the Git repository at: https://gitlab.freedesktop.org/agd5f/linux.git tags/amd-drm-next-6.5-2023-06-16 for you to fetch changes up to 72f1de49ffb90b29748284f27f1d6b829ab1de95: drm/dp_mst: Clear MSG_RDY flag before sending new message (2023-06-15 17:55:41 -0400) amd-drm-next-6.5-2023-06-16: amdgpu: - Misc display fixes - W=1 fixes - Improve scheduler naming - DCN 3.1.4 fixes - kdoc fixes - Enable W=1 - VCN 4.0 fix - xgmi fixes - TOPDOWN fix for large BAR systems - eDP fix - PSR fixes - SubVP fixes - Freesync fix - DPIA fix - SMU 13.0.5 fixes - vblflash fix - RAS fixes - SDMA 4 fix - BO locking fix - BO backing store fix - NBIO 7.9 fixes - GC 9.4.3 fixes - GPU reset recovery fixes - HMM fix amdkfd: - Fix NULL check - Trap fixes - Queue count fix - Add event age tracking radeon: - fbdev client fix scheduler: - Avoid an infinite loop UAPI: - Add KFD event age tracking: Proposed ROCT-Thunk-Interface: https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/commit/efdbf6cfbc026bd68ac3c35d00dacf84370eb81e https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/commit/1820ae0a2db85b6f584611dc0cde1a00e7c22915 Proposed ROCR-Runtime: https://github.com/RadeonOpenCompute/ROCR-Runtime/compare/master...zhums:ROCR-Runtime:new_event_wait_review https://github.com/RadeonOpenCompute/ROCR-Runtime/commit/e1f5bdb88eb882ac798aeca2c00ea3fbb2dba459 https://github.com/RadeonOpenCompute/ROCR-Runtime/commit/7d26afd14107b5c2a754c1a3f415d89f3aabb503 drm: - DP MST fix Alex Deucher (4): Revert "drm/amd/display: fix dpms_off issue when disabling bios mode" drm/amd/display: don't free stolen console memory during suspend drm/amdgpu/sdma4: set align mask to 255 drm/amdgpu: mark GC 9.4.3 experimental for now Alvin Lee (4): drm/amd/display: SubVP high refresh only if all displays >= 120hz drm/amd/display: Re-enable SubVP high refresh drm/amd/display: Block SubVP + DRR if the DRR is PSR capable drm/amd/display: Include CSC updates in new fast update path Aric Cyr (2): drm/amd/display: Promote DAL to 3.2.238 drm/amd/display: 3.2.239 Artem Grishin (1): drm/amd/display: Bug fix in dcn315_populate_dml_pipes_from_context Arunpravin Paneer Selvam (1): Revert "drm/amdgpu: remove TOPDOWN flags when allocating VRAM in large bar system" Austin Zheng (2): drm/amd/display: Add DP2 Metrics drm/amd/display: Limit Minimum FreeSync Refresh Rate Candice Li (3): drm/amd/pm: Align eccinfo table structure with smu v13_0_0 interface drm/amdgpu: Update total channel number for umc v8_10 drm/amdgpu: Add channel_dis_num to ras init flags Christian König (8): drm/amdgpu: make sure BOs are locked in amdgpu_vm_get_memory drm/amdgpu: make sure that BOs have a backing store drm/amdgpu: add amdgpu_error_* debugfs file drm/amdgpu: mark force completed fences with -ECANCELED drm/amdgpu: mark soft recovered fences with -ENODATA drm/amdgpu: abort submissions during prepare on error drm/amdgpu: reset VM when an error is detected drm/amdgpu: add VM generation token Daniel Miess (2): drm/amd/display: Enable dcn314 DPP RCO drm/amd/display: Re-enable DPP/HUBP Power Gating Dmytro Laktyushkin (1): drm/amd/display: fix pixel rate update sequence Fangzhi Zuo (1): drm/amd/display: Add Error Code for Dml Validation Failure Hamza Mahfooz (1): drm/amd/amdgpu: enable W=1 for amdgpu Hersen Wu (2): drm/amd/display: edp do not add non-edid timings drm/amd/display: add debugfs for allow_edp_hotplug_detection James Zhu (5): drm/amdkfd: add event age tracking drm/amdkfd: add event_age tracking when receiving interrupt drm/amdkfd: set activated flag true when event age unmatchs drm/amdkfd: update user space last_event_age drm/amdkfd: bump kfd ioctl minor version for event age availability Jonathan Kim (2): drm/amdkfd: fix null queue check on debug setting exceptions drm/amdkfd: decrement queue count on mes queue destroy Lee Jones (1): drm/amd/display/amdgpu_dm/amdgpu_dm_helpers: Move SYNAPTICS_DEVICE_ID into CONFIG_DRM_AMD_DC_DCN ifdef Lijo Lazar (4): drm/amdgpu: Release SDMAv4.4.2 ecc irq properly drm/amdgpu: Change nbio v7.9 xcp status definition drm/amdgpu: Use PSP FW API for partition switch drm/amdgpu: Remove unused NBIO interface Likun Gao (1):
Re: [PATCH v7 2/8] PCI/VGA: Deal only with VGA class devices
Hi, On 2023/6/16 22:34, Alex Deucher wrote: On Fri, Jun 16, 2023 at 10:22 AM Sui Jingfeng wrote: On 2023/6/16 21:41, Alex Deucher wrote: On Fri, Jun 16, 2023 at 3:11 AM Sui Jingfeng wrote: Hi, On 2023/6/16 05:11, Alex Deucher wrote: On Wed, Jun 14, 2023 at 6:50 AM Sui Jingfeng wrote: Hi, On 2023/6/13 11:01, Sui Jingfeng wrote: From: Sui Jingfeng Deal only with the VGA devcie(pdev->class == 0x0300), so replace the pci_get_subsys() function with pci_get_class(). Filter the non-PCI display device(pdev->class != 0x0300) out. There no need to process the non-display PCI device. Cc: Bjorn Helgaas Signed-off-by: Sui Jingfeng --- drivers/pci/vgaarb.c | 22 -- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c index c1bc6c983932..22a505e877dc 100644 --- a/drivers/pci/vgaarb.c +++ b/drivers/pci/vgaarb.c @@ -754,10 +754,6 @@ static bool vga_arbiter_add_pci_device(struct pci_dev *pdev) struct pci_dev *bridge; u16 cmd; - /* Only deal with VGA class devices */ - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA) - return false; - Hi, here is probably a bug fixing. For an example, nvidia render only GPU typically has 0x0380. as its PCI class number, but render only GPU should not participate in the arbitration. As it shouldn't snoop the legacy fixed VGA address. It(render only GPU) can not display anything. But 0x0380 >> 8 = 0x03, the filter failed. /* Allocate structure */ vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL); if (vgadev == NULL) { @@ -1500,7 +1496,9 @@ static int pci_notify(struct notifier_block *nb, unsigned long action, struct pci_dev *pdev = to_pci_dev(dev); bool notify = false; - vgaarb_dbg(dev, "%s\n", __func__); + /* Only deal with VGA class devices */ + if (pdev->class != PCI_CLASS_DISPLAY_VGA << 8) + return 0; So here we only care 0x0300, my initial intent is to make an optimization, nowadays sane display graphic card should all has 0x0300 as its PCI class number, is this complete right? ``` #define PCI_BASE_CLASS_DISPLAY0x03 #define PCI_CLASS_DISPLAY_VGA0x0300 #define PCI_CLASS_DISPLAY_XGA0x0301 #define PCI_CLASS_DISPLAY_3D0x0302 #define PCI_CLASS_DISPLAY_OTHER0x0380 ``` Any ideas ? I'm not quite sure what you are asking about here. To be honest, I'm worried about the PCI devices which has a PCI_CLASS_DISPLAY_XGA as its PCI class number. As those devices are very uncommon in the real world. $ find . -name "*.c" -type f | xargs grep "PCI_CLASS_DISPLAY_XGA" Grep the "PCI_CLASS_DISPLAY_XGA" in the linux kernel tree got ZERO, there no code reference this macro. So I think it seems safe to ignore the XGA ? PCI_CLASS_DISPLAY_3D and PCI_CLASS_DISPLAY_OTHER are used to annotate the render-only GPU. And render-only GPU can't decode the fixed VGA address space, it is safe to ignore them. For vga_arb, we only care about VGA class devices since those should be on the only ones that might have VGA routed to them. However, as VGA gets deprecated, We need the vgaarb for a system with multiple video card. Not only because some Legacy VGA devices implemented on PCI will typically have the same "hard-decoded" addresses; But also these video card need to participate in the arbitration, determine the default boot device. But couldn't the boot device be determined via what whatever resources were used by the pre-OS console? I don't know what you are refer to by saying pre-OS console, UEFI SHELL, UEFI GOP or something like that. Right. Before the OS loads the platform firmware generally sets up something for display. That could be GOP or vesa or some other platform specific protocol. If you are referring to the framebuffer driver which light up the screen before the Linux kernel is loaded . Then, what you have said is true, the boot device is determined by the pre-OS console. But the problem is how does the Linux kernel(vgaarb) could know which one is the default boot device on a multiple GPU machine. Relaying on the firmware fb's address and size is what the mechanism we already in using. Right. It shouldn't need to depend on vgaarb. I feel like that should be separate from vgaarb. Emm, this really deserved another patch, please ? vgaarb should handle PCI VGA routing and some other mechanism should be used to determine what device provided the pre-OS console. If the new mechanism need the firmware changed, then this probably break the old machine. Also, this probably will get all arch involved. to get the new mechanism supported. The testing pressure and review power needed is quite large. drm/amdgpu and drm/radeon already being used on X86, ARM64, Mips and more arch... The reviewing process will became quite difficult then. vgaarb is really what we already in use, and being
Re: [PATCH] drm/amdgpu: Modify for_each_inst macro
[AMD Official Use Only - General] cc: Victor Victor pointed at a shift beyond MSB condition. Will send a v2. Thanks, Lijo From: amd-gfx on behalf of Lijo Lazar Sent: Friday, June 16, 2023 3:53:40 PM To: amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Ma, Le ; Kamal, Asad ; Zhang, Hawking Subject: [PATCH] drm/amdgpu: Modify for_each_inst macro Modify it such that it doesn't change the instance mask parameter. Signed-off-by: Lijo Lazar --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f4029c13a9be..c5451a9b0ee4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1295,9 +1295,9 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter)); -#define for_each_inst(i, inst_mask) \ - for (i = ffs(inst_mask) - 1; inst_mask;\ -inst_mask &= ~(1U << i), i = ffs(inst_mask) - 1) +#define for_each_inst(i, inst_mask)\ + for (i = ffs(inst_mask); i-- != 0; \ +i = ffs((inst_mask & (~0U << (i + 1) #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) -- 2.25.1
Re: [PATCH v7 2/8] PCI/VGA: Deal only with VGA class devices
On Fri, Jun 16, 2023 at 10:22 AM Sui Jingfeng wrote: > > > On 2023/6/16 21:41, Alex Deucher wrote: > > On Fri, Jun 16, 2023 at 3:11 AM Sui Jingfeng > > wrote: > >> Hi, > >> > >> On 2023/6/16 05:11, Alex Deucher wrote: > >>> On Wed, Jun 14, 2023 at 6:50 AM Sui Jingfeng > >>> wrote: > Hi, > > On 2023/6/13 11:01, Sui Jingfeng wrote: > > From: Sui Jingfeng > > > > Deal only with the VGA devcie(pdev->class == 0x0300), so replace the > > pci_get_subsys() function with pci_get_class(). Filter the non-PCI > > display > > device(pdev->class != 0x0300) out. There no need to process the > > non-display > > PCI device. > > > > Cc: Bjorn Helgaas > > Signed-off-by: Sui Jingfeng > > --- > > drivers/pci/vgaarb.c | 22 -- > > 1 file changed, 12 insertions(+), 10 deletions(-) > > > > diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c > > index c1bc6c983932..22a505e877dc 100644 > > --- a/drivers/pci/vgaarb.c > > +++ b/drivers/pci/vgaarb.c > > @@ -754,10 +754,6 @@ static bool vga_arbiter_add_pci_device(struct > > pci_dev *pdev) > > struct pci_dev *bridge; > > u16 cmd; > > > > - /* Only deal with VGA class devices */ > > - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA) > > - return false; > > - > Hi, here is probably a bug fixing. > > For an example, nvidia render only GPU typically has 0x0380. > > as its PCI class number, but render only GPU should not participate in > the arbitration. > > As it shouldn't snoop the legacy fixed VGA address. > > It(render only GPU) can not display anything. > > > But 0x0380 >> 8 = 0x03, the filter failed. > > > > /* Allocate structure */ > > vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL); > > if (vgadev == NULL) { > > @@ -1500,7 +1496,9 @@ static int pci_notify(struct notifier_block *nb, > > unsigned long action, > > struct pci_dev *pdev = to_pci_dev(dev); > > bool notify = false; > > > > - vgaarb_dbg(dev, "%s\n", __func__); > > + /* Only deal with VGA class devices */ > > + if (pdev->class != PCI_CLASS_DISPLAY_VGA << 8) > > + return 0; > So here we only care 0x0300, my initial intent is to make an > optimization, > > nowadays sane display graphic card should all has 0x0300 as its PCI > class number, is this complete right? > > ``` > > #define PCI_BASE_CLASS_DISPLAY0x03 > #define PCI_CLASS_DISPLAY_VGA0x0300 > #define PCI_CLASS_DISPLAY_XGA0x0301 > #define PCI_CLASS_DISPLAY_3D0x0302 > #define PCI_CLASS_DISPLAY_OTHER0x0380 > > ``` > > Any ideas ? > >>> I'm not quite sure what you are asking about here. > >> To be honest, I'm worried about the PCI devices which has a > >> > >> PCI_CLASS_DISPLAY_XGA as its PCI class number. > >> > >> As those devices are very uncommon in the real world. > >> > >> > >> $ find . -name "*.c" -type f | xargs grep "PCI_CLASS_DISPLAY_XGA" > >> > >> > >> Grep the "PCI_CLASS_DISPLAY_XGA" in the linux kernel tree got ZERO, > >> > >> there no code reference this macro. So I think it seems safe to ignore > >> the XGA ? > >> > >> > >> PCI_CLASS_DISPLAY_3D and PCI_CLASS_DISPLAY_OTHER are used to annotate > >> the render-only GPU. > >> > >> And render-only GPU can't decode the fixed VGA address space, it is safe > >> to ignore them. > >> > >> > >>>For vga_arb, we > >>> only care about VGA class devices since those should be on the only > >>> ones that might have VGA routed to them. > >>>However, as VGA gets deprecated, > >> We need the vgaarb for a system with multiple video card. > >> > >> Not only because some Legacy VGA devices implemented > >> > >> on PCI will typically have the same "hard-decoded" addresses; > >> > >> But also these video card need to participate in the arbitration, > >> > >> determine the default boot device. > > But couldn't the boot device be determined via what whatever resources > > were used by the pre-OS console? > > I don't know what you are refer to by saying pre-OS console, UEFI > SHELL, UEFI GOP or something like that. > Right. Before the OS loads the platform firmware generally sets up something for display. That could be GOP or vesa or some other platform specific protocol. > If you are referring to the framebuffer driver which light up the screen > before the Linux kernel is loaded . > > > Then, what you have said is true, the boot device is determined by the > pre-OS console. > > But the problem is how does the Linux kernel(vgaarb) could know which > one is the default boot device > > on a multiple GPU machine. Relaying on the firmware fb's address and > size is what the mechanism > > we already in
Re: [PATCH] drm/amdgpu: Modify for_each_inst macro
Am 2023-06-16 um 06:23 schrieb Lijo Lazar: Modify it such that it doesn't change the instance mask parameter. Signed-off-by: Lijo Lazar Reviewed-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f4029c13a9be..c5451a9b0ee4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1295,9 +1295,9 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter)); -#define for_each_inst(i, inst_mask)\ - for (i = ffs(inst_mask) - 1; inst_mask;\ -inst_mask &= ~(1U << i), i = ffs(inst_mask) - 1) +#define for_each_inst(i, inst_mask)\ + for (i = ffs(inst_mask); i-- != 0; \ +i = ffs((inst_mask & (~0U << (i + 1) #define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
Re: [PATCH v7 2/8] PCI/VGA: Deal only with VGA class devices
On 2023/6/16 21:41, Alex Deucher wrote: On Fri, Jun 16, 2023 at 3:11 AM Sui Jingfeng wrote: Hi, On 2023/6/16 05:11, Alex Deucher wrote: On Wed, Jun 14, 2023 at 6:50 AM Sui Jingfeng wrote: Hi, On 2023/6/13 11:01, Sui Jingfeng wrote: From: Sui Jingfeng Deal only with the VGA devcie(pdev->class == 0x0300), so replace the pci_get_subsys() function with pci_get_class(). Filter the non-PCI display device(pdev->class != 0x0300) out. There no need to process the non-display PCI device. Cc: Bjorn Helgaas Signed-off-by: Sui Jingfeng --- drivers/pci/vgaarb.c | 22 -- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c index c1bc6c983932..22a505e877dc 100644 --- a/drivers/pci/vgaarb.c +++ b/drivers/pci/vgaarb.c @@ -754,10 +754,6 @@ static bool vga_arbiter_add_pci_device(struct pci_dev *pdev) struct pci_dev *bridge; u16 cmd; - /* Only deal with VGA class devices */ - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA) - return false; - Hi, here is probably a bug fixing. For an example, nvidia render only GPU typically has 0x0380. as its PCI class number, but render only GPU should not participate in the arbitration. As it shouldn't snoop the legacy fixed VGA address. It(render only GPU) can not display anything. But 0x0380 >> 8 = 0x03, the filter failed. /* Allocate structure */ vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL); if (vgadev == NULL) { @@ -1500,7 +1496,9 @@ static int pci_notify(struct notifier_block *nb, unsigned long action, struct pci_dev *pdev = to_pci_dev(dev); bool notify = false; - vgaarb_dbg(dev, "%s\n", __func__); + /* Only deal with VGA class devices */ + if (pdev->class != PCI_CLASS_DISPLAY_VGA << 8) + return 0; So here we only care 0x0300, my initial intent is to make an optimization, nowadays sane display graphic card should all has 0x0300 as its PCI class number, is this complete right? ``` #define PCI_BASE_CLASS_DISPLAY0x03 #define PCI_CLASS_DISPLAY_VGA0x0300 #define PCI_CLASS_DISPLAY_XGA0x0301 #define PCI_CLASS_DISPLAY_3D0x0302 #define PCI_CLASS_DISPLAY_OTHER0x0380 ``` Any ideas ? I'm not quite sure what you are asking about here. To be honest, I'm worried about the PCI devices which has a PCI_CLASS_DISPLAY_XGA as its PCI class number. As those devices are very uncommon in the real world. $ find . -name "*.c" -type f | xargs grep "PCI_CLASS_DISPLAY_XGA" Grep the "PCI_CLASS_DISPLAY_XGA" in the linux kernel tree got ZERO, there no code reference this macro. So I think it seems safe to ignore the XGA ? PCI_CLASS_DISPLAY_3D and PCI_CLASS_DISPLAY_OTHER are used to annotate the render-only GPU. And render-only GPU can't decode the fixed VGA address space, it is safe to ignore them. For vga_arb, we only care about VGA class devices since those should be on the only ones that might have VGA routed to them. However, as VGA gets deprecated, We need the vgaarb for a system with multiple video card. Not only because some Legacy VGA devices implemented on PCI will typically have the same "hard-decoded" addresses; But also these video card need to participate in the arbitration, determine the default boot device. But couldn't the boot device be determined via what whatever resources were used by the pre-OS console? I don't know what you are refer to by saying pre-OS console, UEFI SHELL, UEFI GOP or something like that. If you are referring to the framebuffer driver which light up the screen before the Linux kernel is loaded . Then, what you have said is true, the boot device is determined by the pre-OS console. But the problem is how does the Linux kernel(vgaarb) could know which one is the default boot device on a multiple GPU machine. Relaying on the firmware fb's address and size is what the mechanism we already in using. I feel like that should be separate from vgaarb. Emm, this really deserved another patch, please ? vgaarb should handle PCI VGA routing and some other mechanism should be used to determine what device provided the pre-OS console. If the new mechanism need the firmware changed, then this probably break the old machine. Also, this probably will get all arch involved. to get the new mechanism supported. The testing pressure and review power needed is quite large. drm/amdgpu and drm/radeon already being used on X86, ARM64, Mips and more arch... The reviewing process will became quite difficult then. vgaarb is really what we already in use, and being used more than ten years ... Alex Nowadays, the 'VGA devices' here is stand for the Graphics card which is capable of display something on the screen. We still need vgaarb to select the default boot device. you'll have more non VGA PCI classes for devices which could be the pre-OS console
Re: [PATCH v2] drm/amdgpu: fix clearing mappings for BOs that are always valid in VM
Applied. Thanks! Alex On Fri, Jun 16, 2023 at 9:38 AM Samuel Pitoiset wrote: > > Per VM BOs must be marked as moved or otherwise their ranges are not > updated on use which might be necessary when the replace operation > splits mappings. > > This fixes random GPU hangs when replacing sparse mappings from the > userspace, while OP_MAP/OP_UNMAP works fine because always valid BOs > are correctly handled there. > > Cc: sta...@vger.kernel.org > Signed-off-by: Samuel Pitoiset > Reviewed-by: Christian König > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 > 1 file changed, 12 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index 143d11afe0e5..eff73c428b12 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -1771,18 +1771,30 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device > *adev, > > /* Insert partial mapping before the range */ > if (!list_empty(>list)) { > + struct amdgpu_bo *bo = before->bo_va->base.bo; > + > amdgpu_vm_it_insert(before, >va); > if (before->flags & AMDGPU_PTE_PRT) > amdgpu_vm_prt_get(adev); > + > + if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && > + !before->bo_va->base.moved) > + amdgpu_vm_bo_moved(>bo_va->base); > } else { > kfree(before); > } > > /* Insert partial mapping after the range */ > if (!list_empty(>list)) { > + struct amdgpu_bo *bo = after->bo_va->base.bo; > + > amdgpu_vm_it_insert(after, >va); > if (after->flags & AMDGPU_PTE_PRT) > amdgpu_vm_prt_get(adev); > + > + if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && > + !after->bo_va->base.moved) > + amdgpu_vm_bo_moved(>bo_va->base); > } else { > kfree(after); > } > -- > 2.41.0 >
Re: [PATCH v7 2/8] PCI/VGA: Deal only with VGA class devices
On Fri, Jun 16, 2023 at 3:11 AM Sui Jingfeng wrote: > > Hi, > > On 2023/6/16 05:11, Alex Deucher wrote: > > On Wed, Jun 14, 2023 at 6:50 AM Sui Jingfeng > > wrote: > >> Hi, > >> > >> On 2023/6/13 11:01, Sui Jingfeng wrote: > >>> From: Sui Jingfeng > >>> > >>> Deal only with the VGA devcie(pdev->class == 0x0300), so replace the > >>> pci_get_subsys() function with pci_get_class(). Filter the non-PCI display > >>> device(pdev->class != 0x0300) out. There no need to process the > >>> non-display > >>> PCI device. > >>> > >>> Cc: Bjorn Helgaas > >>> Signed-off-by: Sui Jingfeng > >>> --- > >>>drivers/pci/vgaarb.c | 22 -- > >>>1 file changed, 12 insertions(+), 10 deletions(-) > >>> > >>> diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c > >>> index c1bc6c983932..22a505e877dc 100644 > >>> --- a/drivers/pci/vgaarb.c > >>> +++ b/drivers/pci/vgaarb.c > >>> @@ -754,10 +754,6 @@ static bool vga_arbiter_add_pci_device(struct > >>> pci_dev *pdev) > >>>struct pci_dev *bridge; > >>>u16 cmd; > >>> > >>> - /* Only deal with VGA class devices */ > >>> - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA) > >>> - return false; > >>> - > >> Hi, here is probably a bug fixing. > >> > >> For an example, nvidia render only GPU typically has 0x0380. > >> > >> as its PCI class number, but render only GPU should not participate in > >> the arbitration. > >> > >> As it shouldn't snoop the legacy fixed VGA address. > >> > >> It(render only GPU) can not display anything. > >> > >> > >> But 0x0380 >> 8 = 0x03, the filter failed. > >> > >> > >>>/* Allocate structure */ > >>>vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL); > >>>if (vgadev == NULL) { > >>> @@ -1500,7 +1496,9 @@ static int pci_notify(struct notifier_block *nb, > >>> unsigned long action, > >>>struct pci_dev *pdev = to_pci_dev(dev); > >>>bool notify = false; > >>> > >>> - vgaarb_dbg(dev, "%s\n", __func__); > >>> + /* Only deal with VGA class devices */ > >>> + if (pdev->class != PCI_CLASS_DISPLAY_VGA << 8) > >>> + return 0; > >> So here we only care 0x0300, my initial intent is to make an optimization, > >> > >> nowadays sane display graphic card should all has 0x0300 as its PCI > >> class number, is this complete right? > >> > >> ``` > >> > >> #define PCI_BASE_CLASS_DISPLAY0x03 > >> #define PCI_CLASS_DISPLAY_VGA0x0300 > >> #define PCI_CLASS_DISPLAY_XGA0x0301 > >> #define PCI_CLASS_DISPLAY_3D0x0302 > >> #define PCI_CLASS_DISPLAY_OTHER0x0380 > >> > >> ``` > >> > >> Any ideas ? > > I'm not quite sure what you are asking about here. > > To be honest, I'm worried about the PCI devices which has a > > PCI_CLASS_DISPLAY_XGA as its PCI class number. > > As those devices are very uncommon in the real world. > > > $ find . -name "*.c" -type f | xargs grep "PCI_CLASS_DISPLAY_XGA" > > > Grep the "PCI_CLASS_DISPLAY_XGA" in the linux kernel tree got ZERO, > > there no code reference this macro. So I think it seems safe to ignore > the XGA ? > > > PCI_CLASS_DISPLAY_3D and PCI_CLASS_DISPLAY_OTHER are used to annotate > the render-only GPU. > > And render-only GPU can't decode the fixed VGA address space, it is safe > to ignore them. > > > > For vga_arb, we > > only care about VGA class devices since those should be on the only > > ones that might have VGA routed to them. > > > However, as VGA gets deprecated, > > We need the vgaarb for a system with multiple video card. > > Not only because some Legacy VGA devices implemented > > on PCI will typically have the same "hard-decoded" addresses; > > But also these video card need to participate in the arbitration, > > determine the default boot device. But couldn't the boot device be determined via what whatever resources were used by the pre-OS console? I feel like that should be separate from vgaarb. vgaarb should handle PCI VGA routing and some other mechanism should be used to determine what device provided the pre-OS console. Alex > > > Nowadays, the 'VGA devices' here is stand for the Graphics card > > which is capable of display something on the screen. > > We still need vgaarb to select the default boot device. > > > > you'll have more non VGA PCI classes for devices which > > could be the pre-OS console device. > > Ah, we still want do this(by applying this patch) first, > > and then we will have the opportunity to see who will crying if > something is broken. Will know more then. > > But drop this patch or revise it with more consideration is also > acceptable. > > > I asking about suggestion and/or review. > > > Alex > > > >>>/* For now we're only intereted in devices added and removed. I > >>> didn't > >>> * test this thing here, so someone needs to double check for the > >>> @@ -1510,6 +1508,8 @@ static int pci_notify(struct notifier_block *nb, > >>> unsigned long action, > >>>else if (action ==
[PATCH v2] drm/amdgpu: fix clearing mappings for BOs that are always valid in VM
Per VM BOs must be marked as moved or otherwise their ranges are not updated on use which might be necessary when the replace operation splits mappings. This fixes random GPU hangs when replacing sparse mappings from the userspace, while OP_MAP/OP_UNMAP works fine because always valid BOs are correctly handled there. Cc: sta...@vger.kernel.org Signed-off-by: Samuel Pitoiset Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 143d11afe0e5..eff73c428b12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1771,18 +1771,30 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, /* Insert partial mapping before the range */ if (!list_empty(>list)) { + struct amdgpu_bo *bo = before->bo_va->base.bo; + amdgpu_vm_it_insert(before, >va); if (before->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); + + if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && + !before->bo_va->base.moved) + amdgpu_vm_bo_moved(>bo_va->base); } else { kfree(before); } /* Insert partial mapping after the range */ if (!list_empty(>list)) { + struct amdgpu_bo *bo = after->bo_va->base.bo; + amdgpu_vm_it_insert(after, >va); if (after->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); + + if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && + !after->bo_va->base.moved) + amdgpu_vm_bo_moved(>bo_va->base); } else { kfree(after); } -- 2.41.0
Re: [PATCH] drm/amdgpu: Modify for_each_inst macro
[Public] Acked-by: Alex Deucher From: Lazar, Lijo Sent: Friday, June 16, 2023 6:23 AM To: amd-gfx@lists.freedesktop.org Cc: Zhang, Hawking ; Deucher, Alexander ; Kamal, Asad ; Ma, Le Subject: [PATCH] drm/amdgpu: Modify for_each_inst macro Modify it such that it doesn't change the instance mask parameter. Signed-off-by: Lijo Lazar --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f4029c13a9be..c5451a9b0ee4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1295,9 +1295,9 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter)); -#define for_each_inst(i, inst_mask) \ - for (i = ffs(inst_mask) - 1; inst_mask;\ -inst_mask &= ~(1U << i), i = ffs(inst_mask) - 1) +#define for_each_inst(i, inst_mask)\ + for (i = ffs(inst_mask); i-- != 0; \ +i = ffs((inst_mask & (~0U << (i + 1) #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) -- 2.25.1
Re: [PATCH] drm/amdgpu: enable mcbp by default on gfx9 chips
On Fri, Jun 16, 2023 at 1:07 AM wrote: > > From: Jiadong Zhu > > Gfx9 is using software rings which would trigger mcbp in some cases. > Thus the parameter amdgpu_mcbp shall be enabled by default. > > Signed-off-by: Jiadong Zhu > --- > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index 65577eca58f1..1b3cfda946f9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -4527,6 +4527,7 @@ static int gfx_v9_0_early_init(void *handle) > adev->gfx.xcc_mask = 1; > adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), > AMDGPU_MAX_COMPUTE_RINGS); > + amdgpu_mcbp = 1; This will enable it for all chips since it's a global. You'll need to store the state in adev->gfx or something like that and enable it on a per chip basis, or just make it the default everywhere. Alex > gfx_v9_0_set_kiq_pm4_funcs(adev); > gfx_v9_0_set_ring_funcs(adev); > gfx_v9_0_set_irq_funcs(adev); > -- > 2.25.1 >
Re: [PATCH] drm/amdgpu: fix clearing mappings for BOs that are always valid in VM
Am 16.06.23 um 08:27 schrieb Samuel Pitoiset: If the BO has been moved the PT should be updated, otherwise the VAs might point to invalid PT. You might want to update this sentence a bit. Something like: Per VM BOs must be marked as moved or otherwise their ranges are not updated on use which might be necessary when the replace operation splits mappings. Apart from that really good catch and the patch is Reviewed-by: Christian König Regards, Christian. This fixes random GPU hangs when replacing sparse mappings from the userspace, while OP_MAP/OP_UNMAP works fine because always valid BOs are correctly handled there. Cc: sta...@vger.kernel.org Signed-off-by: Samuel Pitoiset --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 143d11afe0e5..eff73c428b12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1771,18 +1771,30 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, /* Insert partial mapping before the range */ if (!list_empty(>list)) { + struct amdgpu_bo *bo = before->bo_va->base.bo; + amdgpu_vm_it_insert(before, >va); if (before->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); + + if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && + !before->bo_va->base.moved) + amdgpu_vm_bo_moved(>bo_va->base); } else { kfree(before); } /* Insert partial mapping after the range */ if (!list_empty(>list)) { + struct amdgpu_bo *bo = after->bo_va->base.bo; + amdgpu_vm_it_insert(after, >va); if (after->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); + + if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && + !after->bo_va->base.moved) + amdgpu_vm_bo_moved(>bo_va->base); } else { kfree(after); }
Re: [PATCH] drm/amdgpu: Add missing MODULE_FIRMWARE macro
On Fri, Jun 16, 2023 at 8:11 AM Juerg Haefliger wrote: > > Add the missing MODULE_FIRMWARE macro for "amdgpu/fiji_smc.bin". > > Signed-off-by: Juerg Haefliger > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 5c7d40873ee2..1f83a939d641 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -92,6 +92,7 @@ MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); > MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); > MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin"); > MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); > +MODULE_FIRMWARE("amdgpu/fiji_smc.bin"); This is already specified in smumgr.c. Alex > > #define AMDGPU_RESUME_MS 2000 > #define AMDGPU_MAX_RETRY_LIMIT 2 > -- > 2.37.2 >
[PATCH] drm/amdgpu: Add missing MODULE_FIRMWARE macro
Add the missing MODULE_FIRMWARE macro for "amdgpu/fiji_smc.bin". Signed-off-by: Juerg Haefliger --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5c7d40873ee2..1f83a939d641 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -92,6 +92,7 @@ MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/fiji_smc.bin"); #define AMDGPU_RESUME_MS 2000 #define AMDGPU_MAX_RETRY_LIMIT 2 -- 2.37.2
Re: [PATCH] drm/amdgpu: enable mcbp by default on gfx9 chips
Am 16.06.23 um 07:07 schrieb jiadong@amd.com: From: Jiadong Zhu Gfx9 is using software rings which would trigger mcbp in some cases. Thus the parameter amdgpu_mcbp shall be enabled by default. Actually the idea was to not need the amdgpu_mcbp parameter any more and deprecate it with the use of the software rings. Why is that still necessary? Regards, Christian. Signed-off-by: Jiadong Zhu --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 65577eca58f1..1b3cfda946f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4527,6 +4527,7 @@ static int gfx_v9_0_early_init(void *handle) adev->gfx.xcc_mask = 1; adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); + amdgpu_mcbp = 1; gfx_v9_0_set_kiq_pm4_funcs(adev); gfx_v9_0_set_ring_funcs(adev); gfx_v9_0_set_irq_funcs(adev);
Re: [PATCH 2/2] drm/amd/pm: update the LC_L1_INACTIIVY setting to address possible noise issue
Series is: Reviewed-by: Alex Deucher On Thu, Jun 15, 2023 at 9:20 PM Evan Quan wrote: > > It is proved that insufficient LC_L1_INACTIIVY setting can cause audio > noise on some platform. With the LC_L1_INACTIIVY increased to 4ms, the > issue can be resolved. > > Signed-off-by: Evan Quan > --- > drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c > b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c > index 7ba47fc1917b..4038455d7998 100644 > --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c > +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c > @@ -345,7 +345,7 @@ static void nbio_v2_3_init_registers(struct amdgpu_device > *adev) > } > > #define NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT 0x // off by > default, no gains over L1 > -#define NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT 0x0009 // 1=1us, > 9=1ms > +#define NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT 0x000A // 1=1us, > 9=1ms, 10=4ms > #define NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT 0x000E // 400ms > > static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev, > -- > 2.34.1 >
[PATCH AUTOSEL 4.14 4/5] drm/radeon: fix race condition UAF in radeon_gem_set_domain_ioctl
From: Min Li [ Upstream commit 982b173a6c6d9472730c3116051977e05d17c8c5 ] Userspace can race to free the gobj(robj converted from), robj should not be accessed again after drm_gem_object_put, otherwith it will result in use-after-free. Reviewed-by: Christian König Signed-off-by: Min Li Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/radeon_gem.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index ac467b80edc7c..59ad0a4e2fd53 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -376,7 +376,6 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_set_domain *args = data; struct drm_gem_object *gobj; - struct radeon_bo *robj; int r; /* for now if someone requests domain CPU - @@ -389,13 +388,12 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, up_read(>exclusive_lock); return -ENOENT; } - robj = gem_to_radeon_bo(gobj); r = radeon_gem_set_domain(gobj, args->read_domains, args->write_domain); drm_gem_object_put_unlocked(gobj); up_read(>exclusive_lock); - r = radeon_gem_handle_lockup(robj->rdev, r); + r = radeon_gem_handle_lockup(rdev, r); return r; } -- 2.39.2
[PATCH AUTOSEL 4.19 5/6] drm/radeon: fix race condition UAF in radeon_gem_set_domain_ioctl
From: Min Li [ Upstream commit 982b173a6c6d9472730c3116051977e05d17c8c5 ] Userspace can race to free the gobj(robj converted from), robj should not be accessed again after drm_gem_object_put, otherwith it will result in use-after-free. Reviewed-by: Christian König Signed-off-by: Min Li Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/radeon_gem.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 27d8e7dd2d067..46f7789693ea0 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -377,7 +377,6 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_set_domain *args = data; struct drm_gem_object *gobj; - struct radeon_bo *robj; int r; /* for now if someone requests domain CPU - @@ -390,13 +389,12 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, up_read(>exclusive_lock); return -ENOENT; } - robj = gem_to_radeon_bo(gobj); r = radeon_gem_set_domain(gobj, args->read_domains, args->write_domain); drm_gem_object_put_unlocked(gobj); up_read(>exclusive_lock); - r = radeon_gem_handle_lockup(robj->rdev, r); + r = radeon_gem_handle_lockup(rdev, r); return r; } -- 2.39.2
[PATCH AUTOSEL 5.4 7/8] drm/radeon: fix race condition UAF in radeon_gem_set_domain_ioctl
From: Min Li [ Upstream commit 982b173a6c6d9472730c3116051977e05d17c8c5 ] Userspace can race to free the gobj(robj converted from), robj should not be accessed again after drm_gem_object_put, otherwith it will result in use-after-free. Reviewed-by: Christian König Signed-off-by: Min Li Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/radeon_gem.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index b2b076606f54b..e164b3c7a234f 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -384,7 +384,6 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_set_domain *args = data; struct drm_gem_object *gobj; - struct radeon_bo *robj; int r; /* for now if someone requests domain CPU - @@ -397,13 +396,12 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, up_read(>exclusive_lock); return -ENOENT; } - robj = gem_to_radeon_bo(gobj); r = radeon_gem_set_domain(gobj, args->read_domains, args->write_domain); drm_gem_object_put_unlocked(gobj); up_read(>exclusive_lock); - r = radeon_gem_handle_lockup(robj->rdev, r); + r = radeon_gem_handle_lockup(rdev, r); return r; } -- 2.39.2
[PATCH AUTOSEL 5.10 11/14] drm/radeon: fix race condition UAF in radeon_gem_set_domain_ioctl
From: Min Li [ Upstream commit 982b173a6c6d9472730c3116051977e05d17c8c5 ] Userspace can race to free the gobj(robj converted from), robj should not be accessed again after drm_gem_object_put, otherwith it will result in use-after-free. Reviewed-by: Christian König Signed-off-by: Min Li Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/radeon_gem.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index e5c4271e64ede..75053917d2137 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -385,7 +385,6 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_set_domain *args = data; struct drm_gem_object *gobj; - struct radeon_bo *robj; int r; /* for now if someone requests domain CPU - @@ -398,13 +397,12 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, up_read(>exclusive_lock); return -ENOENT; } - robj = gem_to_radeon_bo(gobj); r = radeon_gem_set_domain(gobj, args->read_domains, args->write_domain); drm_gem_object_put(gobj); up_read(>exclusive_lock); - r = radeon_gem_handle_lockup(robj->rdev, r); + r = radeon_gem_handle_lockup(rdev, r); return r; } -- 2.39.2
[PATCH AUTOSEL 5.15 12/16] drm/radeon: fix race condition UAF in radeon_gem_set_domain_ioctl
From: Min Li [ Upstream commit 982b173a6c6d9472730c3116051977e05d17c8c5 ] Userspace can race to free the gobj(robj converted from), robj should not be accessed again after drm_gem_object_put, otherwith it will result in use-after-free. Reviewed-by: Christian König Signed-off-by: Min Li Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/radeon_gem.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index a36a4f2c76b09..57218263ef3b1 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -456,7 +456,6 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_set_domain *args = data; struct drm_gem_object *gobj; - struct radeon_bo *robj; int r; /* for now if someone requests domain CPU - @@ -469,13 +468,12 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, up_read(>exclusive_lock); return -ENOENT; } - robj = gem_to_radeon_bo(gobj); r = radeon_gem_set_domain(gobj, args->read_domains, args->write_domain); drm_gem_object_put(gobj); up_read(>exclusive_lock); - r = radeon_gem_handle_lockup(robj->rdev, r); + r = radeon_gem_handle_lockup(rdev, r); return r; } -- 2.39.2
[PATCH AUTOSEL 6.1 22/26] drm/radeon: fix race condition UAF in radeon_gem_set_domain_ioctl
From: Min Li [ Upstream commit 982b173a6c6d9472730c3116051977e05d17c8c5 ] Userspace can race to free the gobj(robj converted from), robj should not be accessed again after drm_gem_object_put, otherwith it will result in use-after-free. Reviewed-by: Christian König Signed-off-by: Min Li Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/radeon_gem.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 261fcbae88d78..75d79c3110389 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -459,7 +459,6 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_set_domain *args = data; struct drm_gem_object *gobj; - struct radeon_bo *robj; int r; /* for now if someone requests domain CPU - @@ -472,13 +471,12 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, up_read(>exclusive_lock); return -ENOENT; } - robj = gem_to_radeon_bo(gobj); r = radeon_gem_set_domain(gobj, args->read_domains, args->write_domain); drm_gem_object_put(gobj); up_read(>exclusive_lock); - r = radeon_gem_handle_lockup(robj->rdev, r); + r = radeon_gem_handle_lockup(rdev, r); return r; } -- 2.39.2
[PATCH AUTOSEL 6.3 25/30] drm/radeon: fix race condition UAF in radeon_gem_set_domain_ioctl
From: Min Li [ Upstream commit 982b173a6c6d9472730c3116051977e05d17c8c5 ] Userspace can race to free the gobj(robj converted from), robj should not be accessed again after drm_gem_object_put, otherwith it will result in use-after-free. Reviewed-by: Christian König Signed-off-by: Min Li Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/radeon_gem.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 261fcbae88d78..75d79c3110389 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -459,7 +459,6 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_set_domain *args = data; struct drm_gem_object *gobj; - struct radeon_bo *robj; int r; /* for now if someone requests domain CPU - @@ -472,13 +471,12 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, up_read(>exclusive_lock); return -ENOENT; } - robj = gem_to_radeon_bo(gobj); r = radeon_gem_set_domain(gobj, args->read_domains, args->write_domain); drm_gem_object_put(gobj); up_read(>exclusive_lock); - r = radeon_gem_handle_lockup(robj->rdev, r); + r = radeon_gem_handle_lockup(rdev, r); return r; } -- 2.39.2
[PATCH] drm/amdgpu: Modify for_each_inst macro
Modify it such that it doesn't change the instance mask parameter. Signed-off-by: Lijo Lazar --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f4029c13a9be..c5451a9b0ee4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1295,9 +1295,9 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter)); -#define for_each_inst(i, inst_mask) \ - for (i = ffs(inst_mask) - 1; inst_mask;\ -inst_mask &= ~(1U << i), i = ffs(inst_mask) - 1) +#define for_each_inst(i, inst_mask)\ + for (i = ffs(inst_mask); i-- != 0; \ +i = ffs((inst_mask & (~0U << (i + 1) #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) -- 2.25.1
Re: [PATCH v7 2/8] PCI/VGA: Deal only with VGA class devices
Hi, On 2023/6/16 05:11, Alex Deucher wrote: On Wed, Jun 14, 2023 at 6:50 AM Sui Jingfeng wrote: Hi, On 2023/6/13 11:01, Sui Jingfeng wrote: From: Sui Jingfeng Deal only with the VGA devcie(pdev->class == 0x0300), so replace the pci_get_subsys() function with pci_get_class(). Filter the non-PCI display device(pdev->class != 0x0300) out. There no need to process the non-display PCI device. Cc: Bjorn Helgaas Signed-off-by: Sui Jingfeng --- drivers/pci/vgaarb.c | 22 -- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c index c1bc6c983932..22a505e877dc 100644 --- a/drivers/pci/vgaarb.c +++ b/drivers/pci/vgaarb.c @@ -754,10 +754,6 @@ static bool vga_arbiter_add_pci_device(struct pci_dev *pdev) struct pci_dev *bridge; u16 cmd; - /* Only deal with VGA class devices */ - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA) - return false; - Hi, here is probably a bug fixing. For an example, nvidia render only GPU typically has 0x0380. as its PCI class number, but render only GPU should not participate in the arbitration. As it shouldn't snoop the legacy fixed VGA address. It(render only GPU) can not display anything. But 0x0380 >> 8 = 0x03, the filter failed. /* Allocate structure */ vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL); if (vgadev == NULL) { @@ -1500,7 +1496,9 @@ static int pci_notify(struct notifier_block *nb, unsigned long action, struct pci_dev *pdev = to_pci_dev(dev); bool notify = false; - vgaarb_dbg(dev, "%s\n", __func__); + /* Only deal with VGA class devices */ + if (pdev->class != PCI_CLASS_DISPLAY_VGA << 8) + return 0; So here we only care 0x0300, my initial intent is to make an optimization, nowadays sane display graphic card should all has 0x0300 as its PCI class number, is this complete right? ``` #define PCI_BASE_CLASS_DISPLAY0x03 #define PCI_CLASS_DISPLAY_VGA0x0300 #define PCI_CLASS_DISPLAY_XGA0x0301 #define PCI_CLASS_DISPLAY_3D0x0302 #define PCI_CLASS_DISPLAY_OTHER0x0380 ``` Any ideas ? I'm not quite sure what you are asking about here. To be honest, I'm worried about the PCI devices which has a PCI_CLASS_DISPLAY_XGA as its PCI class number. As those devices are very uncommon in the real world. $ find . -name "*.c" -type f | xargs grep "PCI_CLASS_DISPLAY_XGA" Grep the "PCI_CLASS_DISPLAY_XGA" in the linux kernel tree got ZERO, there no code reference this macro. So I think it seems safe to ignore the XGA ? PCI_CLASS_DISPLAY_3D and PCI_CLASS_DISPLAY_OTHER are used to annotate the render-only GPU. And render-only GPU can't decode the fixed VGA address space, it is safe to ignore them. For vga_arb, we only care about VGA class devices since those should be on the only ones that might have VGA routed to them. However, as VGA gets deprecated, We need the vgaarb for a system with multiple video card. Not only because some Legacy VGA devices implemented on PCI will typically have the same "hard-decoded" addresses; But also these video card need to participate in the arbitration, determine the default boot device. Nowadays, the 'VGA devices' here is stand for the Graphics card which is capable of display something on the screen. We still need vgaarb to select the default boot device. you'll have more non VGA PCI classes for devices which could be the pre-OS console device. Ah, we still want do this(by applying this patch) first, and then we will have the opportunity to see who will crying if something is broken. Will know more then. But drop this patch or revise it with more consideration is also acceptable. I asking about suggestion and/or review. Alex /* For now we're only intereted in devices added and removed. I didn't * test this thing here, so someone needs to double check for the @@ -1510,6 +1508,8 @@ static int pci_notify(struct notifier_block *nb, unsigned long action, else if (action == BUS_NOTIFY_DEL_DEVICE) notify = vga_arbiter_del_pci_device(pdev); + vgaarb_dbg(dev, "%s: action = %lu\n", __func__, action); + if (notify) vga_arbiter_notify_clients(); return 0; @@ -1534,8 +1534,8 @@ static struct miscdevice vga_arb_device = { static int __init vga_arb_device_init(void) { + struct pci_dev *pdev = NULL; int rc; - struct pci_dev *pdev; rc = misc_register(_arb_device); if (rc < 0) @@ -1545,11 +1545,13 @@ static int __init vga_arb_device_init(void) /* We add all PCI devices satisfying VGA class in the arbiter by * default */ - pdev = NULL; - while ((pdev = - pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, -PCI_ANY_ID, pdev)) != NULL) + while (1) { + pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8,
[PATCH V3 6/7] drm/amd/pm: enable Wifi RFI mitigation feature support for SMU13.0.0
Fulfill the SMU13.0.0 support for Wifi RFI mitigation feature. Signed-off-by: Evan Quan --- drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 3 + drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 3 +- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 3 + .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c| 9 +++ .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 60 +++ 5 files changed, 77 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index aa63cc43d41c..a8a4be32cc59 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -323,6 +323,7 @@ enum smu_table_id SMU_TABLE_PACE, SMU_TABLE_ECCINFO, SMU_TABLE_COMBO_PPTABLE, + SMU_TABLE_WIFIBAND, SMU_TABLE_COUNT, }; @@ -1496,6 +1497,8 @@ enum smu_baco_seq { __dst_size); \ }) +#define HZ_IN_MHZ 100U + #if !defined(SWSMU_CODE_LAYER_L2) && !defined(SWSMU_CODE_LAYER_L3) && !defined(SWSMU_CODE_LAYER_L4) int smu_get_power_limit(void *handle, uint32_t *limit, diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index 297b70b9388f..5bbb60289a79 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -245,7 +245,8 @@ __SMU_DUMMY_MAP(AllowGpo), \ __SMU_DUMMY_MAP(Mode2Reset),\ __SMU_DUMMY_MAP(RequestI2cTransaction), \ - __SMU_DUMMY_MAP(GetMetricsTable), + __SMU_DUMMY_MAP(GetMetricsTable), \ + __SMU_DUMMY_MAP(EnableUCLKShadow), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index df3baaab0037..b6fae9b92303 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -303,5 +303,8 @@ int smu_v13_0_get_pptable_from_firmware(struct smu_context *smu, uint32_t *size, uint32_t pptable_id); +int smu_v13_0_enable_uclk_shadow(struct smu_context *smu, +bool enablement); + #endif #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 393c6a7b9609..8c2230d1d862 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2453,3 +2453,12 @@ int smu_v13_0_mode1_reset(struct smu_context *smu) return ret; } + +int smu_v13_0_enable_uclk_shadow(struct smu_context *smu, +bool enablement) +{ + return smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_EnableUCLKShadow, + enablement, + NULL); +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 09405ef1e3c8..cf75feaee779 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -155,6 +155,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(AllowGpo, PPSMC_MSG_SetGpoAllow, 0), MSG_MAP(AllowIHHostInterrupt, PPSMC_MSG_AllowIHHostInterrupt, 0), MSG_MAP(ReenableAcDcInterrupt, PPSMC_MSG_ReenableAcDcInterrupt, 0), + MSG_MAP(EnableUCLKShadow, PPSMC_MSG_EnableUCLKShadow, 0), }; static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = { @@ -235,6 +236,7 @@ static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = { TAB_MAP(DRIVER_SMU_CONFIG), TAB_MAP(ACTIVITY_MONITOR_COEFF), [SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE}, + TAB_MAP(WIFIBAND), TAB_MAP(I2C_COMMANDS), TAB_MAP(ECCINFO), }; @@ -472,6 +474,9 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu) PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO, sizeof(EccInfoTable_t), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + SMU_TABLE_INIT(tables, SMU_TABLE_WIFIBAND, + sizeof(WifiBandEntryTable_t), PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM); smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), GFP_KERNEL); if (!smu_table->metrics_table) @@ -2112,6 +2117,58 @@ static ssize_t smu_v13_0_0_get_ecc_info(struct smu_context *smu, return ret; } +static bool smu_v13_0_0_wbrf_support_check(struct smu_context
[PATCH V3 7/7] drm/amd/pm: enable Wifi RFI mitigation feature support for SMU13.0.7
Fulfill the SMU13.0.7 support for Wifi RFI mitigation feature. Signed-off-by: Evan Quan --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 59 +++ 1 file changed, 59 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 98a33f8ee209..16c1c04e2034 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -125,6 +125,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), MSG_MAP(AllowGpo, PPSMC_MSG_SetGpoAllow, 0), MSG_MAP(GetPptLimit,PPSMC_MSG_GetPptLimit, 0), + MSG_MAP(EnableUCLKShadow, PPSMC_MSG_EnableUCLKShadow, 0), }; static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = { @@ -205,6 +206,7 @@ static struct cmn2asic_mapping smu_v13_0_7_table_map[SMU_TABLE_COUNT] = { TAB_MAP(DRIVER_SMU_CONFIG), TAB_MAP(ACTIVITY_MONITOR_COEFF), [SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE}, + TAB_MAP(WIFIBAND), }; static struct cmn2asic_mapping smu_v13_0_7_pwr_src_map[SMU_POWER_SOURCE_COUNT] = { @@ -487,6 +489,9 @@ static int smu_v13_0_7_tables_init(struct smu_context *smu) AMDGPU_GEM_DOMAIN_VRAM); SMU_TABLE_INIT(tables, SMU_TABLE_COMBO_PPTABLE, MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + SMU_TABLE_INIT(tables, SMU_TABLE_WIFIBAND, + sizeof(WifiBandEntryTable_t), PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM); smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), GFP_KERNEL); if (!smu_table->metrics_table) @@ -1721,6 +1726,57 @@ static int smu_v13_0_7_set_df_cstate(struct smu_context *smu, NULL); } +static bool smu_v13_0_7_wbrf_support_check(struct smu_context *smu) +{ + return smu->smc_fw_version > 0x00524600; +} + +static int smu_v13_0_7_set_wbrf_exclusion_ranges(struct smu_context *smu, +struct exclusion_range *exclusion_ranges) +{ + WifiBandEntryTable_t wifi_bands; + int valid_entries = 0; + int ret, i; + + memset(_bands, 0, sizeof(wifi_bands)); + for (i = 0; i < ARRAY_SIZE(wifi_bands.WifiBandEntry); i++) { + if (!exclusion_ranges[i].start && + !exclusion_ranges[i].end) + break; + + /* PMFW expects the inputs to be in Mhz unit */ + wifi_bands.WifiBandEntry[valid_entries].LowFreq = + DIV_ROUND_DOWN_ULL(exclusion_ranges[i].start, HZ_IN_MHZ); + wifi_bands.WifiBandEntry[valid_entries++].HighFreq = + DIV_ROUND_UP_ULL(exclusion_ranges[i].end, HZ_IN_MHZ); + } + wifi_bands.WifiBandEntryNum = valid_entries; + + /* +* Per confirm with PMFW team, WifiBandEntryNum = 0 is a valid setting. +* Considering the scenarios below: +* - At first the wifi device adds an exclusion range e.g. (2400,2500) to +* BIOS and our driver gets notified. We will set WifiBandEntryNum = 1 +* and pass the WifiBandEntry (2400, 2500) to PMFW. +* +* - Later the wifi device removes the wifiband list added above and +* our driver gets notified again. At this time, driver will set +* WifiBandEntryNum = 0 and pass an empty WifiBandEntry list to PMFW. +* - PMFW may still need to do some uclk shadow update(e.g. switching +* from shadow clock back to primary clock) on receiving this. +*/ + + ret = smu_cmn_update_table(smu, + SMU_TABLE_WIFIBAND, + 0, + (void *)(_bands), + true); + if (ret) + dev_err(smu->adev->dev, "Failed to set wifiband!"); + + return ret; +} + static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_7_set_default_dpm_table, @@ -1786,6 +1842,9 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .set_mp1_state = smu_v13_0_7_set_mp1_state, .set_df_cstate = smu_v13_0_7_set_df_cstate, .gpo_control = smu_v13_0_gpo_control, + .is_asic_wbrf_supported = smu_v13_0_7_wbrf_support_check, + .enable_uclk_shadow = smu_v13_0_enable_uclk_shadow, + .set_wbrf_exclusion_ranges = smu_v13_0_7_set_wbrf_exclusion_ranges, }; void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu) -- 2.34.1
[PATCH V3 5/7] drm/amd/pm: add flood detection for wbrf events
To protect PMFW from being overloaded. Signed-off-by: Evan Quan --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 28 --- drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 7 + 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 89f876cc60e6..2619e310ef54 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1272,6 +1272,22 @@ static void smu_wbrf_event_handler(struct amdgpu_device *adev) { struct smu_context *smu = adev->powerplay.pp_handle; + schedule_delayed_work(>wbrf_delayed_work, + msecs_to_jiffies(SMU_WBRF_EVENT_HANDLING_PACE)); +} + +/** + * smu_wbrf_delayed_work_handler - callback on delayed work timer expired + * + * @work: struct work_struct pointer + * + * Flood is over and driver will consume the latest exclusion ranges. + */ +static void smu_wbrf_delayed_work_handler(struct work_struct *work) +{ + struct smu_context *smu = + container_of(work, struct smu_context, wbrf_delayed_work.work); + smu_wbrf_handle_exclusion_ranges(smu); } @@ -1311,6 +1327,9 @@ static int smu_wbrf_init(struct smu_context *smu) if (!smu->wbrf_supported) return 0; + INIT_DELAYED_WORK(>wbrf_delayed_work, + smu_wbrf_delayed_work_handler); + ret = amdgpu_acpi_register_wbrf_notify_handler(adev, smu_wbrf_event_handler); if (ret) @@ -1321,11 +1340,10 @@ static int smu_wbrf_init(struct smu_context *smu) * before our driver loaded. To make sure our driver * is awared of those exclusion ranges. */ - ret = smu_wbrf_handle_exclusion_ranges(smu); - if (ret) - dev_err(adev->dev, "Failed to handle wbrf exclusion ranges\n"); + schedule_delayed_work(>wbrf_delayed_work, + msecs_to_jiffies(SMU_WBRF_EVENT_HANDLING_PACE)); - return ret; + return 0; } /** @@ -1343,6 +1361,8 @@ static void smu_wbrf_fini(struct smu_context *smu) return; amdgpu_acpi_unregister_wbrf_notify_handler(adev); + + cancel_delayed_work_sync(>wbrf_delayed_work); } static int smu_smc_hw_setup(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index ff0af3da0be2..aa63cc43d41c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -478,6 +478,12 @@ struct stb_context { #define WORKLOAD_POLICY_MAX 7 +/* + * Configure wbrf event handling pace as there can be only one + * event processed every SMU_WBRF_EVENT_HANDLING_PACE ms. + */ +#define SMU_WBRF_EVENT_HANDLING_PACE 10 + struct smu_context { struct amdgpu_device*adev; @@ -576,6 +582,7 @@ struct smu_context /* data structures for wbrf feature support */ boolwbrf_supported; + struct delayed_work wbrf_delayed_work; }; struct i2c_adapter; -- 2.34.1
[PATCH V3 4/7] drm/amd/pm: setup the framework to support Wifi RFI mitigation feature
With WBRF feature supported, as a driver responding to the frequencies, amdgpu driver is able to do shadow pstate switching to mitigate possible interference(between its (G-)DDR memory clocks and local radio module frequency bands used by Wifi 6/6e/7). To make WBRF feature functional, the kernel needs to be configured with CONFIG_ACPI_WBRF and the platform is equipped with necessary ACPI based mechanism to get amdgpu driver notified. Signed-off-by: Evan Quan --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 26 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 63 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 19 ++ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 184 ++ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 20 ++ drivers/gpu/drm/amd/pm/swsmu/smu_internal.h | 3 + 6 files changed, 315 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 02b827785e39..2f2ec64ed1b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -241,6 +242,7 @@ extern int amdgpu_num_kcq; #define AMDGPU_VCNFW_LOG_SIZE (32 * 1024) extern int amdgpu_vcnfw_log; extern int amdgpu_sg_display; +extern int amdgpu_wbrf; #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD(256*1024*1024) @@ -741,6 +743,9 @@ struct amdgpu_reset_domain; */ #define AMDGPU_HAS_VRAM(_adev) ((_adev)->gmc.real_vram_size) +typedef +void (*wbrf_notify_handler) (struct amdgpu_device *adev); + struct amdgpu_device { struct device *dev; struct pci_dev *pdev; @@ -1050,6 +1055,8 @@ struct amdgpu_device { booljob_hang; booldc_enabled; + + wbrf_notify_handler wbrf_event_handler; }; static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) @@ -1381,6 +1388,25 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state) { return 0; } #endif +#if defined(CONFIG_ACPI_WBRF) +bool amdgpu_acpi_is_wbrf_supported(struct amdgpu_device *adev); +int amdgpu_acpi_wbrf_retrieve_exclusions(struct amdgpu_device *adev, +struct wbrf_ranges_out *exclusions_out); +int amdgpu_acpi_register_wbrf_notify_handler(struct amdgpu_device *adev, +wbrf_notify_handler handler); +int amdgpu_acpi_unregister_wbrf_notify_handler(struct amdgpu_device *adev); +#else +static inline bool amdgpu_acpi_is_wbrf_supported(struct amdgpu_device *adev) { return false; } +static inline +int amdgpu_acpi_wbrf_retrieve_exclusions(struct amdgpu_device *adev, +struct wbrf_ranges_out *exclusions_out) { return 0; } +static inline +int amdgpu_acpi_register_wbrf_notify_handler(struct amdgpu_device *adev, +wbrf_notify_handler handler) { return 0; } +static inline +int amdgpu_acpi_unregister_wbrf_notify_handler(struct amdgpu_device *adev) { return 0; } +#endif + #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev); bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index aeeec211861c..efbe6dd91d1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1105,3 +1105,66 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) } #endif /* CONFIG_SUSPEND */ + +#ifdef CONFIG_ACPI_WBRF +bool amdgpu_acpi_is_wbrf_supported(struct amdgpu_device *adev) +{ + struct acpi_device *acpi_dev = ACPI_COMPANION(adev->dev); + + if (!acpi_dev) + return false; + + return wbrf_supported_consumer(acpi_dev); +} + +int amdgpu_acpi_wbrf_retrieve_exclusions(struct amdgpu_device *adev, +struct wbrf_ranges_out *exclusions_out) +{ + struct acpi_device *acpi_dev = ACPI_COMPANION(adev->dev); + + if (!acpi_dev) + return -ENODEV; + + return wbrf_retrieve_exclusions(acpi_dev, exclusions_out); +} + +#define CPM_GPU_NOTIFY_COMMAND 0x55 +static void amdgpu_acpi_wbrf_event(acpi_handle handle, u32 event, void *data) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + + if (event == CPM_GPU_NOTIFY_COMMAND && + adev->wbrf_event_handler) + adev->wbrf_event_handler(adev); +} + +int amdgpu_acpi_register_wbrf_notify_handler(struct amdgpu_device *adev, +wbrf_notify_handler handler) +{ + struct acpi_handle *acpi_hdler = ACPI_HANDLE(adev->dev);
[PATCH V3 3/7] drm/amd/pm: update driver_if and ppsmc headers for coming wbrf feature
Add those data structures to support Wifi RFI mitigation feature. Signed-off-by: Evan Quan --- .../pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h | 14 +- .../pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h | 14 +- .../amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h | 3 ++- .../amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h | 3 ++- 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h index b686fb68a6e7..d64188fb5839 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h @@ -388,6 +388,17 @@ typedef struct { EccInfo_t EccInfo[24]; } EccInfoTable_t; +typedef struct { + uint16_t LowFreq; + uint16_t HighFreq; +} WifiOneBand_t; + +typedef struct { + uint32_t WifiBandEntryNum; + WifiOneBand_tWifiBandEntry[11]; + uint32_t MmHubPadding[8]; +} WifiBandEntryTable_t; + //D3HOT sequences typedef enum { BACO_SEQUENCE, @@ -1592,7 +1603,8 @@ typedef struct { #define TABLE_I2C_COMMANDS9 #define TABLE_DRIVER_INFO 10 #define TABLE_ECCINFO 11 -#define TABLE_COUNT 12 +#define TABLE_WIFIBAND12 +#define TABLE_COUNT 13 //IH Interupt ID #define IH_INTERRUPT_ID_TO_DRIVER 0xFE diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h index 4c46a0392451..77483e8485e7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h @@ -392,6 +392,17 @@ typedef struct { EccInfo_t EccInfo[24]; } EccInfoTable_t; +typedef struct { + uint16_t LowFreq; + uint16_t HighFreq; +} WifiOneBand_t; + +typedef struct { + uint32_t WifiBandEntryNum; + WifiOneBand_tWifiBandEntry[11]; + uint32_t MmHubPadding[8]; +} WifiBandEntryTable_t; + //D3HOT sequences typedef enum { BACO_SEQUENCE, @@ -1624,7 +1635,8 @@ typedef struct { #define TABLE_I2C_COMMANDS9 #define TABLE_DRIVER_INFO 10 #define TABLE_ECCINFO 11 -#define TABLE_COUNT 12 +#define TABLE_WIFIBAND12 +#define TABLE_COUNT 13 //IH Interupt ID #define IH_INTERRUPT_ID_TO_DRIVER 0xFE diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h index 10cff75b44d5..c98cc32d11bd 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h @@ -138,7 +138,8 @@ #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A #define PPSMC_MSG_SetPriorityDeltaGain 0x4B #define PPSMC_MSG_AllowIHHostInterrupt 0x4C -#define PPSMC_Message_Count 0x4D +#define PPSMC_MSG_EnableUCLKShadow 0x51 +#define PPSMC_Message_Count 0x52 //Debug Dump Message #define DEBUGSMC_MSG_TestMessage0x1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h index 6aaefca9b595..a6bf9cdd130e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h @@ -134,6 +134,7 @@ #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A #define PPSMC_MSG_SetPriorityDeltaGain 0x4B #define PPSMC_MSG_AllowIHHostInterrupt 0x4C -#define PPSMC_Message_Count 0x4D +#define PPSMC_MSG_EnableUCLKShadow 0x51 +#define PPSMC_Message_Count 0x52 #endif -- 2.34.1
[PATCH V3 2/7] wifi: mac80211: Add support for ACPI WBRF
From: Mario Limonciello To support AMD's WBRF interference mitigation mechanism, Wifi adapters utilized in the system must register the frequencies in use(or unregister those frequencies no longer used) via the dedicated APCI calls. So that, other drivers responding to the frequencies can take proper actions to mitigate possible interference. To make WBRF feature functional, the kernel needs to be configured with CONFIG_ACPI_WBRF and the platform is equipped with WBRF support(from BIOS and drivers). Signed-off-by: Mario Limonciello Co-developed-by: Evan Quan Signed-off-by: Evan Quan -- v1->v2: - place the new added member(`wbrf_supported`) in ieee80211_local(Johannes) - handle chandefs change scenario properly(Johannes) - some minor fixes around code sharing and possible invalid input checks(Johannes) --- include/net/cfg80211.h | 8 +++ net/mac80211/Makefile | 2 + net/mac80211/chan.c| 11 +++ net/mac80211/ieee80211_i.h | 19 + net/mac80211/main.c| 2 + net/mac80211/wbrf.c| 137 + net/wireless/chan.c| 3 +- 7 files changed, 181 insertions(+), 1 deletion(-) create mode 100644 net/mac80211/wbrf.c diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9e04f69712b1..c6dc337eafce 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -920,6 +920,14 @@ const struct cfg80211_chan_def * cfg80211_chandef_compatible(const struct cfg80211_chan_def *chandef1, const struct cfg80211_chan_def *chandef2); +/** + * nl80211_chan_width_to_mhz - get the channel width in Mhz + * @chan_width: the channel width from nl80211_chan_width + * Return: channel width in Mhz if the chan_width from nl80211_chan_width + * is valid. -1 otherwise. + */ +int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width); + /** * cfg80211_chandef_valid - check if a channel definition is valid * @chandef: the channel definition to check diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index b8de44da1fb8..709eb678f42a 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -65,4 +65,6 @@ rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += \ mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y) +mac80211-$(CONFIG_ACPI_WBRF) += wbrf.o + ccflags-y += -DDEBUG diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 77c90ed8f5d7..0c5289a9aa6c 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -506,11 +506,16 @@ static void _ieee80211_change_chanctx(struct ieee80211_local *local, WARN_ON(!cfg80211_chandef_compatible(>conf.def, chandef)); + ieee80211_remove_wbrf(local, >conf.def); + ctx->conf.def = *chandef; /* check if min chanctx also changed */ changed = IEEE80211_CHANCTX_CHANGE_WIDTH | _ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for); + + ieee80211_add_wbrf(local, >conf.def); + drv_change_chanctx(local, ctx, changed); if (!local->use_chanctx) { @@ -668,6 +673,10 @@ static int ieee80211_add_chanctx(struct ieee80211_local *local, lockdep_assert_held(>mtx); lockdep_assert_held(>chanctx_mtx); + err = ieee80211_add_wbrf(local, >conf.def); + if (err) + return err; + if (!local->use_chanctx) local->hw.conf.radar_enabled = ctx->conf.radar_enabled; @@ -748,6 +757,8 @@ static void ieee80211_del_chanctx(struct ieee80211_local *local, } ieee80211_recalc_idle(local); + + ieee80211_remove_wbrf(local, >conf.def); } static void ieee80211_free_chanctx(struct ieee80211_local *local, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b0372e76f373..f832de16073b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1591,6 +1591,10 @@ struct ieee80211_local { /* extended capabilities provided by mac80211 */ u8 ext_capa[8]; + +#ifdef CONFIG_ACPI_WBRF + bool wbrf_supported; +#endif }; static inline struct ieee80211_sub_if_data * @@ -2615,4 +2619,19 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata, const struct ieee80211_eht_cap_elem *eht_cap_ie_elem, u8 eht_cap_len, struct link_sta_info *link_sta); + +#ifdef CONFIG_ACPI_WBRF +void ieee80211_check_wbrf_support(struct ieee80211_local *local); +int ieee80211_add_wbrf(struct ieee80211_local *local, + struct cfg80211_chan_def *chandef); +void ieee80211_remove_wbrf(struct ieee80211_local *local, + struct cfg80211_chan_def *chandef); +#else +static inline void ieee80211_check_wbrf_support(struct ieee80211_local *local) { } +static inline int ieee80211_add_wbrf(struct ieee80211_local *local, +struct cfg80211_chan_def *chandef) {
[PATCH V3 1/7] drivers/acpi: Add support for Wifi band RF mitigations
From: Mario Limonciello Due to electrical and mechanical constraints in certain platform designs there may be likely interference of relatively high-powered harmonics of the (G-)DDR memory clocks with local radio module frequency bands used by Wifi 6/6e/7. To mitigate this, AMD has introduced an ACPI based mechanism that devices can use to notify active use of particular frequencies so that devices can make relative internal adjustments as necessary to avoid this resonance. In order for a device to support this, the expected flow for device driver or subsystems: Drivers/subsystems contributing frequencies: 1) During probe, check `wbrf_supported_producer` to see if WBRF supported for the device. 2) If adding frequencies, then call `wbrf_add_exclusion` with the start and end ranges of the frequencies. 3) If removing frequencies, then call `wbrf_remove_exclusion` with start and end ranges of the frequencies. Drivers/subsystems responding to frequencies: 1) During probe, check `wbrf_supported_consumer` to see if WBRF is supported for the device. 2) Call the `wbrf_retrieve_exclusions` to retrieve the current exclusions on receiving an ACPI notification for a new frequency change. Signed-off-by: Mario Limonciello Co-developed-by: Evan Quan Signed-off-by: Evan Quan -- v1->v2: - move those wlan specific implementations to net/mac80211(Mario) --- drivers/acpi/Kconfig | 7 ++ drivers/acpi/Makefile| 2 + drivers/acpi/acpi_wbrf.c | 215 +++ include/linux/wbrf.h | 55 ++ 4 files changed, 279 insertions(+) create mode 100644 drivers/acpi/acpi_wbrf.c create mode 100644 include/linux/wbrf.h diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index ccbeab9500ec..9ee7c7dcc3e6 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -611,3 +611,10 @@ config X86_PM_TIMER You should nearly always say Y here because many modern systems require this timer. + +config ACPI_WBRF + bool "ACPI Wifi band RF mitigation mechanism" + help + Wifi band RF mitigation mechanism allows multiple drivers from + different domains to notify the frequencies in use so that hardware + can be reconfigured to avoid harmonic conflicts. \ No newline at end of file diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index feb36c0b9446..be173e76aa62 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -131,3 +131,5 @@ obj-y += dptf/ obj-$(CONFIG_ARM64)+= arm64/ obj-$(CONFIG_ACPI_VIOT)+= viot.o + +obj-$(CONFIG_ACPI_WBRF)+= acpi_wbrf.o \ No newline at end of file diff --git a/drivers/acpi/acpi_wbrf.c b/drivers/acpi/acpi_wbrf.c new file mode 100644 index ..8c275998ac29 --- /dev/null +++ b/drivers/acpi/acpi_wbrf.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * AMD Wifi Band Exclusion Interface + * Copyright (C) 2023 Advanced Micro Devices + * + */ + +#include + +/* functions */ +#define WBRF_RECORD0x1 +#define WBRF_RETRIEVE 0x2 + +/* record actions */ +#define WBRF_RECORD_ADD0x0 +#define WBRF_RECORD_REMOVE 0x1 + +#define WBRF_REVISION 0x1 + +static const guid_t wifi_acpi_dsm_guid = + GUID_INIT(0x7b7656cf, 0xdc3d, 0x4c1c, + 0x83, 0xe9, 0x66, 0xe7, 0x21, 0xde, 0x30, 0x70); + +static int wbrf_dsm(struct acpi_device *adev, u8 fn, + union acpi_object *argv4, + union acpi_object **out) +{ + union acpi_object *obj; + int rc; + + obj = acpi_evaluate_dsm(adev->handle, _acpi_dsm_guid, + WBRF_REVISION, fn, argv4); + if (!obj) + return -ENXIO; + + switch (obj->type) { + case ACPI_TYPE_BUFFER: + if (!*out) { + rc = -EINVAL; + break; + } + *out = obj; + return 0; + + case ACPI_TYPE_INTEGER: + rc = obj->integer.value ? -EINVAL : 0; + break; + default: + rc = -EOPNOTSUPP; + } + ACPI_FREE(obj); + + return rc; +} + +static int wbrf_record(struct acpi_device *adev, uint8_t action, + struct wbrf_ranges_in *in) +{ + union acpi_object *argv4; + uint32_t num_of_ranges = 0; + uint32_t arg_idx = 0; + uint32_t loop_idx; + int ret; + + if (!in) + return -EINVAL; + + for (loop_idx = 0; loop_idx < ARRAY_SIZE(in->band_list); +loop_idx++) + if (in->band_list[loop_idx].start && + in->band_list[loop_idx].end) + num_of_ranges++; + + argv4 = kzalloc(sizeof(*argv4) * (2 * num_of_ranges + 2 + 1), GFP_KERNEL); + if (!argv4) + return -ENOMEM; + + argv4[arg_idx].package.type =
[PATCH V3 0/7] Support Wifi RFI interference mitigation feature
Due to electrical and mechanical constraints in certain platform designs there may be likely interference of relatively high-powered harmonics of the (G-)DDR memory clocks with local radio module frequency bands used by Wifi 6/6e/7. To mitigate possible RFI interference producers can advertise the frequencies in use and consumers can use this information to avoid using these frequencies for sensitive features. The whole patch set is based on 6.4-rc3. With some brief introductions as below: Patch1: Core ACPI interfaces needed to support WBRF feature. Patch2: Enable WBRF support for wifi subsystem. Patch3 - 7: Enable WBRF support for AMD graphics driver. Evan Quan (5): drm/amd/pm: update driver_if and ppsmc headers for coming wbrf feature drm/amd/pm: setup the framework to support Wifi RFI mitigation feature drm/amd/pm: add flood detection for wbrf events drm/amd/pm: enable Wifi RFI mitigation feature support for SMU13.0.0 drm/amd/pm: enable Wifi RFI mitigation feature support for SMU13.0.7 Mario Limonciello (2): drivers/acpi: Add support for Wifi band RF mitigations wifi: mac80211: Add support for ACPI WBRF drivers/acpi/Kconfig | 7 + drivers/acpi/Makefile | 2 + drivers/acpi/acpi_wbrf.c | 215 ++ drivers/gpu/drm/amd/amdgpu/amdgpu.h | 26 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 63 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 19 ++ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 204 + drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 30 +++ .../inc/pmfw_if/smu13_driver_if_v13_0_0.h | 14 +- .../inc/pmfw_if/smu13_driver_if_v13_0_7.h | 14 +- .../pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h | 3 +- .../pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h | 3 +- drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 3 +- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 3 + .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c| 9 + .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 60 + .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 59 + drivers/gpu/drm/amd/pm/swsmu/smu_internal.h | 3 + include/linux/wbrf.h | 55 + include/net/cfg80211.h| 8 + net/mac80211/Makefile | 2 + net/mac80211/chan.c | 11 + net/mac80211/ieee80211_i.h| 19 ++ net/mac80211/main.c | 2 + net/mac80211/wbrf.c | 137 +++ net/wireless/chan.c | 3 +- 26 files changed, 968 insertions(+), 6 deletions(-) create mode 100644 drivers/acpi/acpi_wbrf.c create mode 100644 include/linux/wbrf.h create mode 100644 net/mac80211/wbrf.c -- 2.34.1
[PATCH] drm/amdgpu: fix clearing mappings for BOs that are always valid in VM
If the BO has been moved the PT should be updated, otherwise the VAs might point to invalid PT. This fixes random GPU hangs when replacing sparse mappings from the userspace, while OP_MAP/OP_UNMAP works fine because always valid BOs are correctly handled there. Cc: sta...@vger.kernel.org Signed-off-by: Samuel Pitoiset --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 143d11afe0e5..eff73c428b12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1771,18 +1771,30 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, /* Insert partial mapping before the range */ if (!list_empty(>list)) { + struct amdgpu_bo *bo = before->bo_va->base.bo; + amdgpu_vm_it_insert(before, >va); if (before->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); + + if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && + !before->bo_va->base.moved) + amdgpu_vm_bo_moved(>bo_va->base); } else { kfree(before); } /* Insert partial mapping after the range */ if (!list_empty(>list)) { + struct amdgpu_bo *bo = after->bo_va->base.bo; + amdgpu_vm_it_insert(after, >va); if (after->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); + + if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && + !after->bo_va->base.moved) + amdgpu_vm_bo_moved(>bo_va->base); } else { kfree(after); } -- 2.41.0