[PATCH 1/2] drm/amdgpu/pm: correct the firmware flag address for SMU IP v13.0.4
For SMU IP v13.0.4, the smnMP1_FIRMWARE_FLAGS address is different, we need this to correct the reading address. Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang --- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 1 + drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 12 ++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index e3454a876cac..43de0a8d4bd9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -43,6 +43,7 @@ /* address block */ #define smnMP1_FIRMWARE_FLAGS 0x3010024 +#define smnMP1_V13_0_4_FIRMWARE_FLAGS 0x3010028 #define smnMP0_FW_INTF 0x30101c0 #define smnMP1_PUB_CTRL0x3010b14 diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index f9c36d294448..fba0b87d01fb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -273,8 +273,16 @@ int smu_v13_0_check_fw_status(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; uint32_t mp1_fw_flags; - mp1_fw_flags = RREG32_PCIE(MP1_Public | - (smnMP1_FIRMWARE_FLAGS & 0x)); + switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(13, 0, 4): + mp1_fw_flags = RREG32_PCIE(MP1_Public | + (smnMP1_V13_0_4_FIRMWARE_FLAGS & 0x)); + break; + default: + mp1_fw_flags = RREG32_PCIE(MP1_Public | + (smnMP1_FIRMWARE_FLAGS & 0x)); + break; + } if ((mp1_fw_flags & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) >> MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED__SHIFT) -- 2.25.1
[PATCH 2/2] drm/amdgpu/pm: remove the repeated EnableGfxImu message sending
The EnableGfxImu message will be issued in the set_gfx_power_up_by_imu. Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang --- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c| 13 - 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c index 196670345552..82d3718d8324 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c @@ -219,15 +219,10 @@ static int smu_v13_0_4_system_features_control(struct smu_context *smu, bool en) { struct amdgpu_device *adev = smu->adev; int ret = 0; - /* SMU fw need this message to trigger IMU to complete the initialization */ - if (en) - ret = smu_cmn_send_smc_msg(smu, SMU_MSG_EnableGfxImu, NULL); - else { - if (!adev->in_s0ix) - ret = smu_cmn_send_smc_msg(smu, - SMU_MSG_PrepareMp1ForUnload, - NULL); - } + + if (!en && !adev->in_s0ix) + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL); + return ret; } -- 2.25.1
Re: [PATCH 1/8] drm: execution context for GEM buffers v2
On 2022-05-04 03:47, Christian König wrote: This adds the infrastructure for an execution context for GEM buffers which is similar to the existinc TTMs execbuf util and intended to replace it in the long term. The basic functionality is that we abstracts the necessary loop to lock many different GEM buffers with automated deadlock and duplicate handling. v2: drop xarray and use dynamic resized array instead, the locking overhead is unecessary and measureable. Signed-off-by: Christian König I finally got around to catching up with this thread. See some questions and comments inline. --- Documentation/gpu/drm-mm.rst | 12 ++ drivers/gpu/drm/Kconfig | 7 + drivers/gpu/drm/Makefile | 2 + drivers/gpu/drm/drm_exec.c | 295 +++ include/drm/drm_exec.h | 144 + 5 files changed, 460 insertions(+) create mode 100644 drivers/gpu/drm/drm_exec.c create mode 100644 include/drm/drm_exec.h diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst index f32ccce5722d..bf7dd2a78e9b 100644 --- a/Documentation/gpu/drm-mm.rst +++ b/Documentation/gpu/drm-mm.rst @@ -493,6 +493,18 @@ DRM Sync Objects .. kernel-doc:: drivers/gpu/drm/drm_syncobj.c :export: +DRM Execution context += + +.. kernel-doc:: drivers/gpu/drm/drm_exec.c + :doc: Overview + +.. kernel-doc:: include/drm/drm_exec.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/drm_exec.c + :export: + GPU Scheduler = diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index e88c497fa010..1b35c10df263 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -179,6 +179,12 @@ config DRM_TTM GPU memory types. Will be enabled automatically if a device driver uses it. +config DRM_EXEC + tristate + depends on DRM + help + Execution context for command submissions + config DRM_BUDDY tristate depends on DRM @@ -252,6 +258,7 @@ config DRM_AMDGPU select DRM_SCHED select DRM_TTM select DRM_TTM_HELPER + select DRM_EXEC select POWER_SUPPLY select HWMON select BACKLIGHT_CLASS_DEVICE diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 15fe3163f822..ee8573b683f3 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -37,6 +37,8 @@ obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o # # Memory-management helpers # +# +obj-$(CONFIG_DRM_EXEC) += drm_exec.o obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c new file mode 100644 index ..ed2106c22786 --- /dev/null +++ b/drivers/gpu/drm/drm_exec.c @@ -0,0 +1,295 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +#include +#include +#include + +/** + * DOC: Overview + * + * This component mainly abstracts the retry loop necessary for locking + * multiple GEM objects while preparing hardware operations (e.g. command + * submissions, page table updates etc..). + * + * If a contention is detected while locking a GEM object the cleanup procedure + * unlocks all previously locked GEM objects and locks the contended one first + * before locking any further objects. + * + * After an object is locked fences slots can optionally be reserved on the + * dma_resv object inside the GEM object. + * + * A typical usage pattern should look like this:: + * + * struct drm_gem_object *obj; + * struct drm_exec exec; + * unsigned long index; + * int ret; + * + * drm_exec_init(, true); + * drm_exec_while_not_all_locked() { + * ret = drm_exec_prepare_obj(, boA, 1); + * drm_exec_continue_on_contention(); + * if (ret) + * goto error; + * + * ret = drm_exec_lock(, boB, 1); Where is drm_exec_lock? It's not in this patch. + * drm_exec_continue_on_contention(); + * if (ret) + * goto error; + * } + * + * drm_exec_for_each_locked_object(, index, obj) { + * dma_resv_add_fence(obj->resv, fence, DMA_RESV_USAGE_READ); + * ... + * } + * drm_exec_fini(); + * + * See struct dma_exec for more details. + */ + +/* Dummy value used to initially enter the retry loop */ +#define DRM_EXEC_DUMMY (void*)~0 + +/* Initialize the drm_exec_objects container */ +static void drm_exec_objects_init(struct drm_exec_objects *container) +{ + container->objects = kmalloc(PAGE_SIZE, GFP_KERNEL); Should this be kvmalloc? You're using kvrealloc and kvfree elsewhere. + + /* If allocation here fails, just delay that till the first use */ + container->max_objects = container->objects ? + PAGE_SIZE / sizeof(void *) : 0; + container->num_objects = 0; +} + +/* Cleanup the drm_exec_objects container */ +static void
Re: [PATCH 3/8] drm/amdkfd: switch over to using drm_exec
On 2022-05-04 03:47, Christian König wrote: Avoids quite a bit of logic and kmalloc overhead. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 5 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 303 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 14 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 3 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 32 +- 5 files changed, 152 insertions(+), 205 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 4cb14c2fe53f..3f3a994c68e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -25,12 +25,12 @@ #ifndef AMDGPU_AMDKFD_H_INCLUDED #define AMDGPU_AMDKFD_H_INCLUDED +#include #include #include #include #include #include -#include #include "amdgpu_sync.h" #include "amdgpu_vm.h" @@ -66,8 +66,7 @@ struct kgd_mem { struct dma_buf *dmabuf; struct list_head attachments; /* protected by amdkfd_process_info.lock */ - struct ttm_validate_buffer validate_list; - struct ttm_validate_buffer resv_list; + struct list_head validate_list; uint32_t domain; unsigned int mapped_to_gpu_memory; uint64_t va; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 3dc5ab2764ff..64ac4f8f49be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -25,6 +25,8 @@ #include #include +#include + #include "amdgpu_object.h" #include "amdgpu_gem.h" #include "amdgpu_vm.h" @@ -770,28 +772,19 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, struct amdkfd_process_info *process_info, bool userptr) { - struct ttm_validate_buffer *entry = >validate_list; - struct amdgpu_bo *bo = mem->bo; - - INIT_LIST_HEAD(>head); - entry->num_shared = 1; - entry->bo = >tbo; - mutex_lock(_info->lock); You removed mutex_lock, but left mutex_unlock below. Other than that, this patch looks reasonable. But my eyes may have glazed over with this much churn. Regards, Felix if (userptr) - list_add_tail(>head, _info->userptr_valid_list); + list_add_tail(>validate_list, + _info->userptr_valid_list); else - list_add_tail(>head, _info->kfd_bo_list); + list_add_tail(>validate_list, _info->kfd_bo_list); mutex_unlock(_info->lock); } static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem, struct amdkfd_process_info *process_info) { - struct ttm_validate_buffer *bo_list_entry; - - bo_list_entry = >validate_list; mutex_lock(_info->lock); - list_del(_list_entry->head); + list_del(>validate_list); mutex_unlock(_info->lock); } @@ -875,13 +868,12 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, * object can track VM updates. */ struct bo_vm_reservation_context { - struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */ - unsigned int n_vms; /* Number of VMs reserved */ - struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries */ - struct ww_acquire_ctx ticket; /* Reservation ticket */ - struct list_head list, duplicates; /* BO lists */ - struct amdgpu_sync *sync; /* Pointer to sync object */ - bool reserved; /* Whether BOs are reserved */ + /* DRM execution context for the reservation */ + struct drm_exec exec; + /* Number of VMs reserved */ + unsigned int n_vms; + /* Pointer to sync object */ + struct amdgpu_sync *sync; }; enum bo_vm_match { @@ -905,35 +897,24 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, WARN_ON(!vm); - ctx->reserved = false; ctx->n_vms = 1; ctx->sync = >sync; - - INIT_LIST_HEAD(>list); - INIT_LIST_HEAD(>duplicates); - - ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL); - if (!ctx->vm_pd) - return -ENOMEM; - - ctx->kfd_bo.priority = 0; - ctx->kfd_bo.tv.bo = >tbo; - ctx->kfd_bo.tv.num_shared = 1; - list_add(>kfd_bo.tv.head, >list); - - amdgpu_vm_get_pd_bo(vm, >list, >vm_pd[0]); - - ret = ttm_eu_reserve_buffers(>ticket, >list, -false, >duplicates); - if (ret) { - pr_err("Failed to reserve buffers in ttm.\n"); - kfree(ctx->vm_pd); - ctx->vm_pd = NULL; - return ret; + drm_exec_init(>exec, true); +
Re: [PATCH] drm/amdgpu: Unpin MMIO and DOORBELL BOs only after map count goes to zero
On 2022-06-08 16:03, Errabolu, Ramesh wrote: [AMD Official Use Only - General] My response is inline. Regards, Ramesh -Original Message- From: Kuehling, Felix Sent: Thursday, June 9, 2022 1:10 AM To: amd-gfx@lists.freedesktop.org; Errabolu, Ramesh Subject: Re: [PATCH] drm/amdgpu: Unpin MMIO and DOORBELL BOs only after map count goes to zero On 2022-06-08 07:51, Ramesh Errabolu wrote: In existing code MMIO and DOORBELL BOs are unpinned without ensuring the condition that their map count has reached zero. Unpinning without checking this constraint could lead to an error while BO is being freed. The patch fixes this issue. Signed-off-by: Ramesh Errabolu --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 15 +++ 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index a1de900ba677..e5dc94b745b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1832,13 +1832,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( mutex_lock(>lock); - /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */ - if (mem->alloc_flags & - (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | -KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { - amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo); - } - mapped_to_gpu_memory = mem->mapped_to_gpu_memory; is_imported = mem->is_imported; mutex_unlock(>lock); @@ -1855,7 +1848,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( /* Make sure restore workers don't access the BO any more */ bo_list_entry = >validate_list; mutex_lock(_info->lock); - list_del(_list_entry->head); + list_del_init(_list_entry->head); Is this an unrelated fix? What is this needed for? I vaguely remember discussing this before, but can't remember the reason. Ramesh: This fix is unrelated to P2P work. I brought this issue to attention while working on IOMMU support on DKMS branch. Basically a user could call free() before the map count goes to zero. The patch is trying fix that. I get that, but I couldn't remember why I suggested list_del_init here. It has nothing to do with unpinning of BOs. Now I recall that it had something to do with restarting the ioctl after it was interrupted by a signal. reserve_bo_and_cond_vms can fail with -ERESTARTSYS. In that case the ioctl is reentered. We need to make sure it doesn't crash the second time around. list_del will remove bo_list_entry from the list but leave the pointers dangling. The second time around it will probably cause corruption or an oops. Using list_del_init avoids that by initializing the prev and next pointers to NULL. See one more little fix below. Regards, Felix mutex_unlock(_info->lock); /* No more MMU notifiers */ @@ -1880,6 +1873,12 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( ret = unreserve_bo_and_vms(, false, false); This unreserve_bo_and_vms call cannot fail because the wait parameter is false. If it did fail, the error handling would be broken. I'd add a WARN_ONCE to make that assumption explicit, and change the return at the end of this function to return 0. Basically, if we got this far, we are not turning back, and we should return success. You could update the commit headline to be more general. Something like: Fix error handling in amdgpu_amdkfd_gpuvm_free_memory_of_gpu. Regards, Felix + /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */ + if (mem->alloc_flags & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | +KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) + amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo); + /* Free the sync object */ amdgpu_sync_free(>sync);
[pull] amdgpu, amdkfd drm-fixes-5.19
Hi Dave, Daniel, Fixes for 5.19. The following changes since commit bf23729c7a5f44f0e863666b9364a64741fd3241: Merge tag 'amd-drm-next-5.19-2022-05-26-2' of https://gitlab.freedesktop.org/agd5f/linux into drm-next (2022-06-01 10:56:11 +1000) are available in the Git repository at: https://gitlab.freedesktop.org/agd5f/linux.git tags/amd-drm-fixes-5.19-2022-06-08 for you to fetch changes up to 431d071286524bd4f9ba2e46b1be87b479220174: drm/amdgpu/mes: only invalid/prime icache when finish loading both pipe MES FWs. (2022-06-08 15:39:16 -0400) amd-drm-fixes-5.19-2022-06-08: amdgpu: - DCN 3.1 golden settings fix - eDP fixes - DMCUB fixes - GFX11 fixes and cleanups - VCN fix for yellow carp - GMC11 fixes - RAS fixes - GPUVM TLB flush fixes - SMU13 fixes - VCN3 AV1 regression fix - VCN2 JPEG fix - Other misc fixes amdkfd: - MMU notifier fix - Support for more GC 10.3.x families - Pinned BO handling fix - Partial migration bug fix Alex Deucher (1): drm/amdgpu: update VCN codec support for Yellow Carp Alvin (1): drm/amd/display: Don't clear ref_dtbclk value Aric Cyr (1): drm/amd/display: 3.2.187 Aurabindo Pillai (1): drm/amd/display: remove stale config guards Candice Li (1): drm/amdgpu: Resolve RAS GFX error count issue after cold boot on Arcturus Christian König (2): drm/amdgpu: fix limiting AV1 to the first instance on VCN3 drm/amdgpu: always flush the TLB on gfx8 Evan Quan (2): drm/amd/pm: suppress compile warnings about possible unaligned accesses drm/amdgpu: suppress the compile warning about 64 bit type Guchun Chen (1): Revert "drm/amdgpu: Ensure the DMA engine is deactivated during set ups" Hung, Cruise (1): drm/amd/display: Fix DMUB outbox trace in S4 (#4465) Ilya (1): drm/amd/display: Fix possible infinite loop in DP LT fallback Jesse Zhang (1): drm/amdkfd:Fix fw version for 10.3.6 Jiapeng Chong (1): drm/amdgpu: make program_imu_rlc_ram static Joseph Greathouse (1): drm/amdgpu: Add MODE register to wave debug info in gfx11 Lang Yu (1): drm/amdkfd: add pinned BOs to kfd_bo_list Leung, Martin (1): drm/amd/display: revert Blank eDP on disable/enable drv Mario Limonciello (1): drm/amdkfd: Add GC 10.3.6 and 10.3.7 KFD definitions Mohammad Zafar Ziya (1): drm/amdgpu/jpeg2: Add jpeg vmid update under IB submit Nicholas Kazlauskas (2): drm/amd/display: Pass the new context into disable OTG WA Revert "drm/amd/display: Pass the new context into disable OTG WA" Philip Yang (3): drm/amdkfd: Use mmget_not_zero in MMU notifier drm/amdgpu: Update PDEs flush TLB if PTB/PDB moved drm/amdkfd: Fix partial migration bugs Roman Li (1): drm/amdgpu: fix aper_base for APU Sherry Wang (1): drm/amd/display: Read Golden Settings Table from VBIOS Stanley.Yang (1): drm/amdgpu: fix ras supported check Sunil Khatri (1): drm/amdgpu: enable tmz by default for GC 10.3.7 Yifan Zhang (1): drm/amdgpu/mes: only invalid/prime icache when finish loading both pipe MES FWs. hengzhou (1): drm/amd/display: Wait DMCUB to idle state before reset. sunliming (2): drm/amdgpu: fix a missing break in gfx_v11_0_handle_priv_fault drm/amdgpu: make gfx_v11_0_rlc_stop static drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 13 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c| 9 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c| 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 32 -- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 13 ++- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 6 ++ drivers/gpu/drm/amd/amdgpu/imu_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h | 1 + drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 36 --- drivers/gpu/drm/amd/amdgpu/nv.c| 1 + drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 109 + drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 17 ++-- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 2 + drivers/gpu/drm/amd/amdkfd/kfd_device.c| 18 +++- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 5 +- .../amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c | 11 ++- .../amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h | 2 + .../amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c | 7 +- .../amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c | 3 +- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 106 +--- drivers/gpu/drm/amd/display/dc/dc.h| 5 +-
RE: [PATCH 2/4] drm/amdkfd: Pass MES/RS64 information to sysfs
[AMD Official Use Only - General] I was originally using the MES scheduler version in Thunk to check whether to use userptr or non-paged memory for the queue allocation. Just today though I've changed this to always use non-paged so this isn't needed anymore. Maybe instead we can think about adding this to debugfs, I agree. Thanks, Graham -Original Message- From: Kuehling, Felix Sent: Wednesday, June 8, 2022 3:31 PM To: Sider, Graham ; amd-gfx@lists.freedesktop.org Cc: Yang, Philip ; Joshi, Mukul Subject: Re: [PATCH 2/4] drm/amdkfd: Pass MES/RS64 information to sysfs On 2022-06-07 18:50, Graham Sider wrote: > Make MES/RS64 CP enablement and MES scheduler/MES KIQ versions > available through sysfs. > > Signed-off-by: Graham Sider Why do we need to expose this to user mode applications? They don't interact directly with the scheduler or the KIQ. I cannot think of any reasonable conditions in usermode that would need the scheduler or KIQ version number. It may make sense in debugfs, but not here. Regards, Felix > --- > drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 8 > 1 file changed, 8 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c > b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c > index 02efae4f5549..51c8a285baaf 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c > @@ -571,6 +571,14 @@ static ssize_t node_show(struct kobject *kobj, struct > attribute *attr, > dev->gpu->sdma_fw_version); > sysfs_show_64bit_prop(buffer, offs, "unique_id", > dev->gpu->adev->unique_id); > + sysfs_show_32bit_prop(buffer, offs, "mes_enabled", > + (uint32_t)dev->gpu->adev->enable_mes); > + sysfs_show_32bit_prop(buffer, offs, "rs64_cp_enabled", > + > (uint32_t)dev->gpu->adev->gfx.rs64_enable); > + sysfs_show_32bit_prop(buffer, offs, "mes_sched_version", > + dev->gpu->adev->mes.sched_version); > + sysfs_show_32bit_prop(buffer, offs, "mes_kiq_version", > + dev->gpu->adev->mes.kiq_version); > > } >
RE: [PATCH] drm/amdgpu: Unpin MMIO and DOORBELL BOs only after map count goes to zero
[AMD Official Use Only - General] My response is inline. Regards, Ramesh -Original Message- From: Kuehling, Felix Sent: Thursday, June 9, 2022 1:10 AM To: amd-gfx@lists.freedesktop.org; Errabolu, Ramesh Subject: Re: [PATCH] drm/amdgpu: Unpin MMIO and DOORBELL BOs only after map count goes to zero On 2022-06-08 07:51, Ramesh Errabolu wrote: > In existing code MMIO and DOORBELL BOs are unpinned without ensuring > the condition that their map count has reached zero. Unpinning without > checking this constraint could lead to an error while BO is being > freed. The patch fixes this issue. > > Signed-off-by: Ramesh Errabolu > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 15 +++ > 1 file changed, 7 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index a1de900ba677..e5dc94b745b1 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -1832,13 +1832,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( > > mutex_lock(>lock); > > - /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */ > - if (mem->alloc_flags & > - (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | > - KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { > - amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo); > - } > - > mapped_to_gpu_memory = mem->mapped_to_gpu_memory; > is_imported = mem->is_imported; > mutex_unlock(>lock); > @@ -1855,7 +1848,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( > /* Make sure restore workers don't access the BO any more */ > bo_list_entry = >validate_list; > mutex_lock(_info->lock); > - list_del(_list_entry->head); > + list_del_init(_list_entry->head); Is this an unrelated fix? What is this needed for? I vaguely remember discussing this before, but can't remember the reason. Ramesh: This fix is unrelated to P2P work. I brought this issue to attention while working on IOMMU support on DKMS branch. Basically a user could call free() before the map count goes to zero. The patch is trying fix that. Regards, Felix > mutex_unlock(_info->lock); > > /* No more MMU notifiers */ > @@ -1880,6 +1873,12 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( > > ret = unreserve_bo_and_vms(, false, false); > > + /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */ > + if (mem->alloc_flags & > + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | > + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) > + amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo); > + > /* Free the sync object */ > amdgpu_sync_free(>sync); >
Re: [PATCH] drm/amdkfd: Remove field io_link_count from struct kfd_topology_device
On 2022-06-08 07:51, Ramesh Errabolu wrote: The field is redundant and does not serve any functional role Signed-off-by: Ramesh Errabolu Reviewed-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 2 -- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 1 - drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 1 - 3 files changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index cbfb32b3d235..a5409531a2fd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1040,7 +1040,6 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, props->rec_transfer_size = iolink->recommended_transfer_size; - dev->io_link_count++; dev->node_props.io_links_count++; list_add_tail(>list, >io_link_props); break; @@ -1067,7 +1066,6 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, props2->node_from = id_to; props2->node_to = id_from; props2->kobj = NULL; - to_dev->io_link_count++; to_dev->node_props.io_links_count++; list_add_tail(>list, _dev->io_link_props); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 3e240b22ec91..304322ac39e6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1819,7 +1819,6 @@ static void kfd_topology_update_io_links(int proximity_domain) */ if (iolink->node_to == proximity_domain) { list_del(>list); - dev->io_link_count--; dev->node_props.io_links_count--; } else { if (iolink->node_from > proximity_domain) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 2fb5664e1041..9f6c949186c1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -130,7 +130,6 @@ struct kfd_topology_device { struct list_headmem_props; uint32_tcache_count; struct list_headcache_props; - uint32_tio_link_count; struct list_headio_link_props; struct list_headp2p_link_props; struct list_headperf_props;
Re: [PATCH] drm/amdgpu: Unpin MMIO and DOORBELL BOs only after map count goes to zero
On 2022-06-08 07:51, Ramesh Errabolu wrote: In existing code MMIO and DOORBELL BOs are unpinned without ensuring the condition that their map count has reached zero. Unpinning without checking this constraint could lead to an error while BO is being freed. The patch fixes this issue. Signed-off-by: Ramesh Errabolu --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 15 +++ 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index a1de900ba677..e5dc94b745b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1832,13 +1832,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( mutex_lock(>lock); - /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */ - if (mem->alloc_flags & - (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | -KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { - amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo); - } - mapped_to_gpu_memory = mem->mapped_to_gpu_memory; is_imported = mem->is_imported; mutex_unlock(>lock); @@ -1855,7 +1848,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( /* Make sure restore workers don't access the BO any more */ bo_list_entry = >validate_list; mutex_lock(_info->lock); - list_del(_list_entry->head); + list_del_init(_list_entry->head); Is this an unrelated fix? What is this needed for? I vaguely remember discussing this before, but can't remember the reason. Regards, Felix mutex_unlock(_info->lock); /* No more MMU notifiers */ @@ -1880,6 +1873,12 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( ret = unreserve_bo_and_vms(, false, false); + /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */ + if (mem->alloc_flags & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | +KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) + amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo); + /* Free the sync object */ amdgpu_sync_free(>sync);
Re: [PATCH 2/4] drm/amdkfd: Pass MES/RS64 information to sysfs
On 2022-06-07 18:50, Graham Sider wrote: Make MES/RS64 CP enablement and MES scheduler/MES KIQ versions available through sysfs. Signed-off-by: Graham Sider Why do we need to expose this to user mode applications? They don't interact directly with the scheduler or the KIQ. I cannot think of any reasonable conditions in usermode that would need the scheduler or KIQ version number. It may make sense in debugfs, but not here. Regards, Felix --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 8 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 02efae4f5549..51c8a285baaf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -571,6 +571,14 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->gpu->sdma_fw_version); sysfs_show_64bit_prop(buffer, offs, "unique_id", dev->gpu->adev->unique_id); + sysfs_show_32bit_prop(buffer, offs, "mes_enabled", + (uint32_t)dev->gpu->adev->enable_mes); + sysfs_show_32bit_prop(buffer, offs, "rs64_cp_enabled", + (uint32_t)dev->gpu->adev->gfx.rs64_enable); + sysfs_show_32bit_prop(buffer, offs, "mes_sched_version", + dev->gpu->adev->mes.sched_version); + sysfs_show_32bit_prop(buffer, offs, "mes_kiq_version", + dev->gpu->adev->mes.kiq_version); }
RE: [PATCH 07/16] drm/amd/display: Cap OLED brightness per max frame-average luminance
[AMD Official Use Only - General] Hi Aaron, Yes, the panel brightness should reach max after we send max-fall value as backlight current peak. Thanks, Roman > -Original Message- > From: Aaron Ma > Sent: Wednesday, June 8, 2022 6:02 AM > To: Mahfooz, Hamza ; Li, Roman > > Cc: Pillai, Aurabindo ; Lakha, Bhawanpreet > ; Wentland, Harry > ; Siqueira, Rodrigo > ; Li, Sun peng (Leo) ; > Gutierrez, Agustin ; amd- > g...@lists.freedesktop.org; Zuo, Jerry ; Kotarac, Pavle > ; Zhuo, Qingqing (Lillian) > ; Chiu, Solomon ; > Wang, Chao-kai (Stylon) ; Lin, Wayne > > Subject: Re: [PATCH 07/16] drm/amd/display: Cap OLED brightness per max > frame-average luminance > > Hi Roman: > > Can the panel achieve the max peak luminance if it is limited in frame-average > luminance? > > Regards, > Aaron
Re: [PATCH] drm/amd/display: Use pre-allocated temp struct for bounding box update
On 2022-06-08 13:47, Leo wrote: On 2022-06-08 13:25, Pillai, Aurabindo wrote: [AMD Official Use Only - General] Isnt it cleaner to revert the original patch which removed the temporary variable and then instead allocate the clock_limits array on heap, and later freed at the end of the function? -- I don't quite recall the details, but in FPU context, we should not malloc since it can fault/sleep. More info here: https://yarchive.net/comp/linux/kernel_fp.html It looks like we dont have access to bw_ctx and hence the VBA struct inside the callback to update bounding box. If we can add that, then we could replicate what we did within DML functions like dml32_ModeSupportAndSystemConfigurationFull() to reduce the stack usage. See the struct define: struct dml32_ModeSupportAndSystemConfigurationFull { unsigned int dummy_integer_array[8][DC__NUM_DPP__MAX]; double dummy_double_array[2][DC__NUM_DPP__MAX]; DmlPipe SurfParameters[DC__NUM_DPP__MAX]; double dummy_single[5]; double dummy_single2[5]; SOCParametersList mSOCParameters; unsigned int MaximumSwathWidthSupportLuma; unsigned int MaximumSwathWidthSupportChroma; }; struct dummy_vars { struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation; struct dml32_ModeSupportAndSystemConfigurationFull dml32_ModeSupportAndSystemConfigurationFull; }; We define the dummy vars needed in the format of function_name.variable_name. dummy_vars is added to vba struct and this can be access from bw_ctx. Or we could also add a similar dummy struct to the soc resources (dcn2_1_soc etc) and add the temporary variable there. This way, we have a consistent solution for reducing stack usage. - Leo Regards, Jay -- *From:* Li, Sun peng (Leo) *Sent:* Wednesday, June 8, 2022 12:48 PM *To:* amd-gfx@lists.freedesktop.org *Cc:* Wentland, Harry ; Pillai, Aurabindo ; Siqueira, Rodrigo ; Li, Sun peng (Leo) *Subject:* [PATCH] drm/amd/display: Use pre-allocated temp struct for bounding box update From: Leo Li [Why] There is a theoretical problem in prior patches for reducing the stack size of *update_bw_bounding_box() functions. By modifying the soc.clock_limits[n] struct directly, this can cause unintended behavior as the for loop attempts to swap rows in clock_limits[n]. A temporary struct is still required to make sure we stay functinoally equivalent. [How] Add a temporary clock_limits table to the SOC struct, and use it when swapping rows. Signed-off-by: Leo Li --- .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 33 +- .../amd/display/dc/dml/dcn301/dcn301_fpu.c | 36 ++- .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 64 +++ .../amd/display/dc/dml/display_mode_structs.h | 5 ++ 4 files changed, 82 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index c2fec0d85da4..e247b2270b1d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -2015,9 +2015,8 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params ASSERT(clk_table->num_entries); /* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying over null states later */ - for (i = 0; i < dcn2_1_soc.num_states + 1; i++) { - dcn2_1_soc.clock_limits[i] = dcn2_1_soc.clock_limits[i]; - } + memcpy(_1_soc._clock_tmp, _1_soc.clock_limits, + sizeof(dcn2_1_soc.clock_limits)); for (i = 0; i < clk_table->num_entries; i++) { /* loop backwards*/ @@ -2032,22 +2031,26 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params if (i == 1)
Re: [PATCH] drm/amd/display: Use pre-allocated temp struct for bounding box update
On 2022-06-08 12:48, sunpeng...@amd.com wrote: > From: Leo Li > > [Why] > > There is a theoretical problem in prior patches for reducing the stack > size of *update_bw_bounding_box() functions. > > By modifying the soc.clock_limits[n] struct directly, this can cause > unintended behavior as the for loop attempts to swap rows in > clock_limits[n]. A temporary struct is still required to make sure we > stay functinoally equivalent. > > [How] > > Add a temporary clock_limits table to the SOC struct, and use it when > swapping rows. > > Signed-off-by: Leo Li > --- > .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 33 +- > .../amd/display/dc/dml/dcn301/dcn301_fpu.c| 36 ++- > .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 64 +++ > .../amd/display/dc/dml/display_mode_structs.h | 5 ++ > 4 files changed, 82 insertions(+), 56 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c > b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c > index c2fec0d85da4..e247b2270b1d 100644 > --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c > +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c > @@ -2015,9 +2015,8 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct > clk_bw_params *bw_params > > ASSERT(clk_table->num_entries); > /* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying over > null states later */ > - for (i = 0; i < dcn2_1_soc.num_states + 1; i++) { > - dcn2_1_soc.clock_limits[i] = dcn2_1_soc.clock_limits[i]; > - } Hmm, this for loop didn't make sense. I gave my RB for the previous patch too quickly. > + memcpy(_1_soc._clock_tmp, _1_soc.clock_limits, > +sizeof(dcn2_1_soc.clock_limits)); > > for (i = 0; i < clk_table->num_entries; i++) { > /* loop backwards*/ > @@ -2032,22 +2031,26 @@ void dcn21_update_bw_bounding_box(struct dc *dc, > struct clk_bw_params *bw_params > if (i == 1) > k++; > > - dcn2_1_soc.clock_limits[k].state = k; > - dcn2_1_soc.clock_limits[k].dcfclk_mhz = > clk_table->entries[i].dcfclk_mhz; > - dcn2_1_soc.clock_limits[k].fabricclk_mhz = > clk_table->entries[i].fclk_mhz; > - dcn2_1_soc.clock_limits[k].socclk_mhz = > clk_table->entries[i].socclk_mhz; > - dcn2_1_soc.clock_limits[k].dram_speed_mts = > clk_table->entries[i].memclk_mhz * 2; > + dcn2_1_soc._clock_tmp[k].state = k; > + dcn2_1_soc._clock_tmp[k].dcfclk_mhz = > clk_table->entries[i].dcfclk_mhz; > + dcn2_1_soc._clock_tmp[k].fabricclk_mhz = > clk_table->entries[i].fclk_mhz; > + dcn2_1_soc._clock_tmp[k].socclk_mhz = > clk_table->entries[i].socclk_mhz; > + dcn2_1_soc._clock_tmp[k].dram_speed_mts = > clk_table->entries[i].memclk_mhz * 2; > > - dcn2_1_soc.clock_limits[k].dispclk_mhz = > dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; > - dcn2_1_soc.clock_limits[k].dppclk_mhz = > dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; > - dcn2_1_soc.clock_limits[k].dram_bw_per_chan_gbps = > dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; > - dcn2_1_soc.clock_limits[k].dscclk_mhz = > dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; > - dcn2_1_soc.clock_limits[k].dtbclk_mhz = > dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; > - dcn2_1_soc.clock_limits[k].phyclk_d18_mhz = > dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; > - dcn2_1_soc.clock_limits[k].phyclk_mhz = > dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; > + dcn2_1_soc._clock_tmp[k].dispclk_mhz = > dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; > + dcn2_1_soc._clock_tmp[k].dppclk_mhz = > dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; > + dcn2_1_soc._clock_tmp[k].dram_bw_per_chan_gbps = > dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; > + dcn2_1_soc._clock_tmp[k].dscclk_mhz = > dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; > + dcn2_1_soc._clock_tmp[k].dtbclk_mhz = > dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; > + dcn2_1_soc._clock_tmp[k].phyclk_d18_mhz = > dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; > + dcn2_1_soc._clock_tmp[k].phyclk_mhz = > dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; > I see why we need a tmp array and agree that we shouldn't allocate it inside DML functions. Reviewed-by: Harry Wentland Harry > k++; > } > + > + memcpy(_1_soc.clock_limits, _1_soc._clock_tmp, > +sizeof(dcn2_1_soc.clock_limits)); > + > if (clk_table->num_entries) { > dcn2_1_soc.num_states = clk_table->num_entries + 1; > /* fill in min DF PState */ > diff --git
Re: [PATCH] drm/amd/display: Use pre-allocated temp struct for bounding box update
On 2022-06-08 13:47, Leo wrote: On 2022-06-08 13:25, Pillai, Aurabindo wrote: [AMD Official Use Only - General] Isnt it cleaner to revert the original patch which removed the temporary variable and then instead allocate the clock_limits array on heap, and later freed at the end of the function? -- I don't quite recall the details, but in FPU context, we should not malloc since it can fault/sleep. More info here: https://yarchive.net/comp/linux/kernel_fp.html Yeah... we cannot use malloc inside DML. I also think that we can just apply this patch on top of amd-staging-drm-next without reverting the old patches because we are unaware of any regression caused for those patches, afaik. Thanks Siqueira - Leo Regards, Jay -- *From:* Li, Sun peng (Leo) *Sent:* Wednesday, June 8, 2022 12:48 PM *To:* amd-gfx@lists.freedesktop.org *Cc:* Wentland, Harry ; Pillai, Aurabindo ; Siqueira, Rodrigo ; Li, Sun peng (Leo) *Subject:* [PATCH] drm/amd/display: Use pre-allocated temp struct for bounding box update From: Leo Li [Why] There is a theoretical problem in prior patches for reducing the stack size of *update_bw_bounding_box() functions. By modifying the soc.clock_limits[n] struct directly, this can cause unintended behavior as the for loop attempts to swap rows in clock_limits[n]. A temporary struct is still required to make sure we stay functinoally equivalent. [How] Add a temporary clock_limits table to the SOC struct, and use it when swapping rows. Signed-off-by: Leo Li --- .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 33 +- .../amd/display/dc/dml/dcn301/dcn301_fpu.c | 36 ++- .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 64 +++ .../amd/display/dc/dml/display_mode_structs.h | 5 ++ 4 files changed, 82 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index c2fec0d85da4..e247b2270b1d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -2015,9 +2015,8 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params ASSERT(clk_table->num_entries); /* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying over null states later */ - for (i = 0; i < dcn2_1_soc.num_states + 1; i++) { - dcn2_1_soc.clock_limits[i] = dcn2_1_soc.clock_limits[i]; - } + memcpy(_1_soc._clock_tmp, _1_soc.clock_limits, + sizeof(dcn2_1_soc.clock_limits)); for (i = 0; i < clk_table->num_entries; i++) { /* loop backwards*/ @@ -2032,22 +2031,26 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params if (i == 1) k++; - dcn2_1_soc.clock_limits[k].state = k; - dcn2_1_soc.clock_limits[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; - dcn2_1_soc.clock_limits[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz; - dcn2_1_soc.clock_limits[k].socclk_mhz = clk_table->entries[i].socclk_mhz; - dcn2_1_soc.clock_limits[k].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; + dcn2_1_soc._clock_tmp[k].state = k; + dcn2_1_soc._clock_tmp[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + dcn2_1_soc._clock_tmp[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + dcn2_1_soc._clock_tmp[k].socclk_mhz = clk_table->entries[i].socclk_mhz; + dcn2_1_soc._clock_tmp[k].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; - dcn2_1_soc.clock_limits[k].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - dcn2_1_soc.clock_limits[k].dppclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; -
Re: [PATCH] drm/amd/display: Use pre-allocated temp struct for bounding box update
On 2022-06-08 13:25, Pillai, Aurabindo wrote: > [AMD Official Use Only - General] > > > Isnt it cleaner to revert the original patch which removed the temporary > variable and then instead allocate the clock_limits array on heap, and later > freed at the end of the function? > > -- I don't quite recall the details, but in FPU context, we should not malloc since it can fault/sleep. More info here: https://yarchive.net/comp/linux/kernel_fp.html - Leo > > Regards, > Jay > -- > *From:* Li, Sun peng (Leo) > *Sent:* Wednesday, June 8, 2022 12:48 PM > *To:* amd-gfx@lists.freedesktop.org > *Cc:* Wentland, Harry ; Pillai, Aurabindo > ; Siqueira, Rodrigo ; Li, > Sun peng (Leo) > *Subject:* [PATCH] drm/amd/display: Use pre-allocated temp struct for > bounding box update > > From: Leo Li > > [Why] > > There is a theoretical problem in prior patches for reducing the stack > size of *update_bw_bounding_box() functions. > > By modifying the soc.clock_limits[n] struct directly, this can cause > unintended behavior as the for loop attempts to swap rows in > clock_limits[n]. A temporary struct is still required to make sure we > stay functinoally equivalent. > > [How] > > Add a temporary clock_limits table to the SOC struct, and use it when > swapping rows. > > Signed-off-by: Leo Li > --- > .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 33 +- > .../amd/display/dc/dml/dcn301/dcn301_fpu.c | 36 ++- > .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 64 +++ > .../amd/display/dc/dml/display_mode_structs.h | 5 ++ > 4 files changed, 82 insertions(+), 56 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c > b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c > index c2fec0d85da4..e247b2270b1d 100644 > --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c > +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c > @@ -2015,9 +2015,8 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct > clk_bw_params *bw_params > > ASSERT(clk_table->num_entries); > /* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying > over null states later */ > - for (i = 0; i < dcn2_1_soc.num_states + 1; i++) { > - dcn2_1_soc.clock_limits[i] = dcn2_1_soc.clock_limits[i]; > - } > + memcpy(_1_soc._clock_tmp, _1_soc.clock_limits, > + sizeof(dcn2_1_soc.clock_limits)); > > for (i = 0; i < clk_table->num_entries; i++) { > /* loop backwards*/ > @@ -2032,22 +2031,26 @@ void dcn21_update_bw_bounding_box(struct dc *dc, > struct clk_bw_params *bw_params > if (i == 1) > k++; > > - dcn2_1_soc.clock_limits[k].state = k; > - dcn2_1_soc.clock_limits[k].dcfclk_mhz = > clk_table->entries[i].dcfclk_mhz; > - dcn2_1_soc.clock_limits[k].fabricclk_mhz = > clk_table->entries[i].fclk_mhz; > - dcn2_1_soc.clock_limits[k].socclk_mhz = > clk_table->entries[i].socclk_mhz; > - dcn2_1_soc.clock_limits[k].dram_speed_mts = > clk_table->entries[i].memclk_mhz * 2; > + dcn2_1_soc._clock_tmp[k].state = k; > + dcn2_1_soc._clock_tmp[k].dcfclk_mhz = > clk_table->entries[i].dcfclk_mhz; > + dcn2_1_soc._clock_tmp[k].fabricclk_mhz = > clk_table->entries[i].fclk_mhz; > + dcn2_1_soc._clock_tmp[k].socclk_mhz = > clk_table->entries[i].socclk_mhz; > + dcn2_1_soc._clock_tmp[k].dram_speed_mts = > clk_table->entries[i].memclk_mhz * 2; > > - dcn2_1_soc.clock_limits[k].dispclk_mhz = > dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; > - dcn2_1_soc.clock_limits[k].dppclk_mhz = > dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; > - dcn2_1_soc.clock_limits[k].dram_bw_per_chan_gbps = > dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; > -
Re: [PATCH] drm/amd/display: Use pre-allocated temp struct for bounding box update
[AMD Official Use Only - General] Isnt it cleaner to revert the original patch which removed the temporary variable and then instead allocate the clock_limits array on heap, and later freed at the end of the function? -- Regards, Jay From: Li, Sun peng (Leo) Sent: Wednesday, June 8, 2022 12:48 PM To: amd-gfx@lists.freedesktop.org Cc: Wentland, Harry ; Pillai, Aurabindo ; Siqueira, Rodrigo ; Li, Sun peng (Leo) Subject: [PATCH] drm/amd/display: Use pre-allocated temp struct for bounding box update From: Leo Li [Why] There is a theoretical problem in prior patches for reducing the stack size of *update_bw_bounding_box() functions. By modifying the soc.clock_limits[n] struct directly, this can cause unintended behavior as the for loop attempts to swap rows in clock_limits[n]. A temporary struct is still required to make sure we stay functinoally equivalent. [How] Add a temporary clock_limits table to the SOC struct, and use it when swapping rows. Signed-off-by: Leo Li --- .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 33 +- .../amd/display/dc/dml/dcn301/dcn301_fpu.c| 36 ++- .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 64 +++ .../amd/display/dc/dml/display_mode_structs.h | 5 ++ 4 files changed, 82 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index c2fec0d85da4..e247b2270b1d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -2015,9 +2015,8 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params ASSERT(clk_table->num_entries); /* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying over null states later */ - for (i = 0; i < dcn2_1_soc.num_states + 1; i++) { - dcn2_1_soc.clock_limits[i] = dcn2_1_soc.clock_limits[i]; - } + memcpy(_1_soc._clock_tmp, _1_soc.clock_limits, + sizeof(dcn2_1_soc.clock_limits)); for (i = 0; i < clk_table->num_entries; i++) { /* loop backwards*/ @@ -2032,22 +2031,26 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params if (i == 1) k++; - dcn2_1_soc.clock_limits[k].state = k; - dcn2_1_soc.clock_limits[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; - dcn2_1_soc.clock_limits[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz; - dcn2_1_soc.clock_limits[k].socclk_mhz = clk_table->entries[i].socclk_mhz; - dcn2_1_soc.clock_limits[k].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; + dcn2_1_soc._clock_tmp[k].state = k; + dcn2_1_soc._clock_tmp[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + dcn2_1_soc._clock_tmp[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + dcn2_1_soc._clock_tmp[k].socclk_mhz = clk_table->entries[i].socclk_mhz; + dcn2_1_soc._clock_tmp[k].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; - dcn2_1_soc.clock_limits[k].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - dcn2_1_soc.clock_limits[k].dppclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; - dcn2_1_soc.clock_limits[k].dram_bw_per_chan_gbps = dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; - dcn2_1_soc.clock_limits[k].dscclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; - dcn2_1_soc.clock_limits[k].dtbclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; - dcn2_1_soc.clock_limits[k].phyclk_d18_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; - dcn2_1_soc.clock_limits[k].phyclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + dcn2_1_soc._clock_tmp[k].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; + dcn2_1_soc._clock_tmp[k].dppclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + dcn2_1_soc._clock_tmp[k].dram_bw_per_chan_gbps = dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + dcn2_1_soc._clock_tmp[k].dscclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + dcn2_1_soc._clock_tmp[k].dtbclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + dcn2_1_soc._clock_tmp[k].phyclk_d18_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + dcn2_1_soc._clock_tmp[k].phyclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; k++; } + + memcpy(_1_soc.clock_limits, _1_soc._clock_tmp, + sizeof(dcn2_1_soc.clock_limits)); + if (clk_table->num_entries) {
[PATCH] drm/amd/display: Use pre-allocated temp struct for bounding box update
From: Leo Li [Why] There is a theoretical problem in prior patches for reducing the stack size of *update_bw_bounding_box() functions. By modifying the soc.clock_limits[n] struct directly, this can cause unintended behavior as the for loop attempts to swap rows in clock_limits[n]. A temporary struct is still required to make sure we stay functinoally equivalent. [How] Add a temporary clock_limits table to the SOC struct, and use it when swapping rows. Signed-off-by: Leo Li --- .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 33 +- .../amd/display/dc/dml/dcn301/dcn301_fpu.c| 36 ++- .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 64 +++ .../amd/display/dc/dml/display_mode_structs.h | 5 ++ 4 files changed, 82 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index c2fec0d85da4..e247b2270b1d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -2015,9 +2015,8 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params ASSERT(clk_table->num_entries); /* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying over null states later */ - for (i = 0; i < dcn2_1_soc.num_states + 1; i++) { - dcn2_1_soc.clock_limits[i] = dcn2_1_soc.clock_limits[i]; - } + memcpy(_1_soc._clock_tmp, _1_soc.clock_limits, + sizeof(dcn2_1_soc.clock_limits)); for (i = 0; i < clk_table->num_entries; i++) { /* loop backwards*/ @@ -2032,22 +2031,26 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params if (i == 1) k++; - dcn2_1_soc.clock_limits[k].state = k; - dcn2_1_soc.clock_limits[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; - dcn2_1_soc.clock_limits[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz; - dcn2_1_soc.clock_limits[k].socclk_mhz = clk_table->entries[i].socclk_mhz; - dcn2_1_soc.clock_limits[k].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; + dcn2_1_soc._clock_tmp[k].state = k; + dcn2_1_soc._clock_tmp[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + dcn2_1_soc._clock_tmp[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + dcn2_1_soc._clock_tmp[k].socclk_mhz = clk_table->entries[i].socclk_mhz; + dcn2_1_soc._clock_tmp[k].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; - dcn2_1_soc.clock_limits[k].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - dcn2_1_soc.clock_limits[k].dppclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; - dcn2_1_soc.clock_limits[k].dram_bw_per_chan_gbps = dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; - dcn2_1_soc.clock_limits[k].dscclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; - dcn2_1_soc.clock_limits[k].dtbclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; - dcn2_1_soc.clock_limits[k].phyclk_d18_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; - dcn2_1_soc.clock_limits[k].phyclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + dcn2_1_soc._clock_tmp[k].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; + dcn2_1_soc._clock_tmp[k].dppclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + dcn2_1_soc._clock_tmp[k].dram_bw_per_chan_gbps = dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + dcn2_1_soc._clock_tmp[k].dscclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + dcn2_1_soc._clock_tmp[k].dtbclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + dcn2_1_soc._clock_tmp[k].phyclk_d18_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + dcn2_1_soc._clock_tmp[k].phyclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; k++; } + + memcpy(_1_soc.clock_limits, _1_soc._clock_tmp, + sizeof(dcn2_1_soc.clock_limits)); + if (clk_table->num_entries) { dcn2_1_soc.num_states = clk_table->num_entries + 1; /* fill in min DF PState */ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c index 62cf283d9f41..e4863f0bf0f6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c @@ -254,6 +254,9 @@ void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dc_assert_fp_enabled(); +
Re: [RFC PATCH v2 00/27] DRM.debug on DYNAMIC_DEBUG, add trace events
On Mon, Jun 06, 2022 at 08:59:36AM -0600, jim.cro...@gmail.com wrote: > On Wed, May 25, 2022 at 9:02 AM Daniel Vetter wrote: > > > On Mon, May 16, 2022 at 04:56:13PM -0600, Jim Cromie wrote: > > > DRM.debug API is 23 macros, issuing 10 exclusive categories of debug > > > messages. By rough count, they are used 5140 times in the kernel. > > > These all call drm_dbg or drm_devdbg, which call drm_debug_enabled(), > > > which checks bits in global __drm_debug. Some of these are page-flips > > > and vblanks, and get called often. > > > > > > DYNAMIC_DEBUG (with CONFIG_JUMP_LABEL) is built to avoid this kind of > > > work, with NOOPd jump/callsites. > > > > > > This patchset is RFC because: > > > - it touches 2.5 subsystems: dyndbg, drm, tracefs (new events) > > > - dyndbg class support is built for drm, needs it for validation > > > - new api, used by drm > > > - big memory impact, with 5100 new pr-debug callsites. > > > - drm class bikeshedding opportunities > > > - others, names etc. > > > > Thanks a lot for keeping on pushing this! > > > > > > > > DYNAMIC_DEBUG: > > > > > > > > > RFC: > > > > > > dynamic_debug_register_classes() cannot act early enough to be in > > > effect at module-load. So this will not work as you'd reasonably > > > expect: > > > > > > modprobe test_dynamic_debug dyndbg='+pfm; class FOO +pfmlt' > > > > > > The 1st query:+pfm will be enabled during load, but in the 2nd query, > > > "class FOO" will be unknown at load time. Early class enablement > > > would be nice. DYNAMIC_DEBUG_CLASSES is a static initializer, which > > > is certainly early enough, but Im missing a trick, suggestions? > > > > So maybe I'm just totally overloading this work here so feel free to > > ignore or postpone, but: Could we do the dynamic_debug_register_classes() > > automatically at module load as a new special section? And then throw in a > > bit of kbuild so that in a given subsystem every driver gets the same > > class names by default and everything would just work, without having to > > sprinkle calls to dynamic_debug_register_classes() all over the place? > > > > This is now done; Ive added __dyndbg_classes section. > load_module() now grabs it from the .ko > and ddebug_add_module() attaches it to the module's ddebug_table record. > for builtins, dynamic_debug_init feeds the builtin class-maps to > ddebug_add_module > > bash-5.1# modprobe test_dynamic_debug dyndbg="class FOO +p" > [ 88.374722] dyndbg: class[0]: nm:test_dynamic_debug base:20 len:7 ty:1 > [ 88.375158] dyndbg: 0: EMERG > [ 88.375345] dyndbg: 1: DANGER > [ 88.375540] dyndbg: 2: ERROR > [ 88.375726] dyndbg: 3: WARNING > [ 88.375930] dyndbg: 4: NOTICE > [ 88.376130] dyndbg: 5: INFO > [ 88.376310] dyndbg: 6: DEBUG > [ 88.376499] dyndbg: class[1]: nm:test_dynamic_debug base:12 len:3 ty:1 > [ 88.376903] dyndbg: 0: ONE > [ 88.377079] dyndbg: 1: TWO > [ 88.377253] dyndbg: 2: THREE > [ 88.377441] dyndbg: class[2]: nm:test_dynamic_debug base:8 len:3 ty:0 > [ 88.377837] dyndbg: 0: bing > [ 88.378022] dyndbg: 1: bong > [ 88.378203] dyndbg: 2: boom > [ 88.378387] dyndbg: class[3]: nm:test_dynamic_debug base:4 len:3 ty:0 > [ 88.378800] dyndbg: 0: Foo > [ 88.378986] dyndbg: 1: Bar > [ 88.379167] dyndbg: 2: Buzz > [ 88.379348] dyndbg: class[4]: nm:test_dynamic_debug base:0 len:3 ty:0 > [ 88.379757] dyndbg: 0: FOO > [ 88.379938] dyndbg: 1: BAR > [ 88.380136] dyndbg: 2: BUZZ > [ 88.380410] dyndbg: module:test_dynamic_debug attached 5 classes > [ 88.380881] dyndbg: 24 debug prints in module test_dynamic_debug > [ 88.381315] dyndbg: module: test_dynamic_debug dyndbg="class FOO +p" > [ 88.381714] dyndbg: query 0: "class FOO +p" mod:test_dynamic_debug > [ 88.382109] dyndbg: split into words: "class" "FOO" "+p" > [ 88.382445] dyndbg: op='+' > [ 88.382616] dyndbg: flags=0x1 > [ 88.382802] dyndbg: *flagsp=0x1 *maskp=0x > [ 88.383101] dyndbg: parsed: func="" file="" module="test_dynamic_debug" > format="" lineno=0-0 class=FOO > [ 88.383740] dyndbg: applied: func="" file="" module="test_dynamic_debug" > format="" lineno=0-0 class=FOO > [ 88.384324] dyndbg: processed 1 queries, with 2 matches, 0 errs > bash-5.1# > > so its working at module-load time. Awesome! > > For the entire class approach, did you spot another subsystem that could > > benefit from this and maybe make a more solid case that this is something > > good? > > > > I had been working on the premise that ~4k drm*dbg callsites was a good > case. Oh I'm happy with just drm, but occasionally we've done stuff in drm that the wider kernel community found a bit silly. So bit more acks/validation from outside the dri-devel echo chamber would be great, whatever form it is. > verbosity-levels - with x missing. > > the next revision adds something, which "kinda works". > But I think I'll rip it out, and do this simpler approach instead: > > implement a verbose/levels param & callback, which
Re: [PATCH 07/16] drm/amd/display: Cap OLED brightness per max frame-average luminance
Hi Roman: Can the panel achieve the max peak luminance if it is limited in frame-average luminance? Regards, Aaron
Re: [PATCH] drm/amdgpu: fix limiting AV1 to the first instance on VCN3
I'll push this with the TLB fix. Alex On Wed, Jun 8, 2022 at 9:47 AM Christian König wrote: > > I need to look into this more deeply when I'm back from sick leave. > > Till then this workaround should be sufficient since VCN3 is the only > callback which tries to adjust the instance. > > Regards, > Christian. > > Am 07.06.22 um 22:22 schrieb Alex Deucher: > > We'll need to implement the parse callbacks for vcn4 as well if we > > haven't already. > > > > Alex > > > > On Tue, Jun 7, 2022 at 4:20 PM Dong, Ruijing wrote: > >> [AMD Official Use Only - General] > >> > >> I can see for VCN4, AV1 dec/enc also need to limit to the first instance. > >> > >> Thanks, > >> Ruijing > >> > >> -Original Message- > >> From: amd-gfx On Behalf Of Alex > >> Deucher > >> Sent: Friday, June 3, 2022 10:12 AM > >> To: Christian König > >> Cc: Pelloux-Prayer, Pierre-Eric ; > >> amd-gfx list > >> Subject: Re: [PATCH] drm/amdgpu: fix limiting AV1 to the first instance on > >> VCN3 > >> > >> Do the other uvd/vce/vcn ring parse functions need a similar fix? > >> > >> Alex > >> > >> > >> On Fri, Jun 3, 2022 at 10:08 AM Alex Deucher wrote: > >>> On Fri, Jun 3, 2022 at 8:10 AM Christian König > >>> wrote: > Am 03.06.22 um 14:08 schrieb Pierre-Eric Pelloux-Prayer: > > Hi Christian, > > > > The patch is: Tested-by: Pierre-Eric Pelloux-Prayer > > > > > > Could you add a reference to > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitlab.freedesktop.org%2Fdrm%2Famd%2F-%2Fissues%2F2037data=05%7C01%7CRuijing.Dong%40amd.com%7C5ba73dfe91ba47e21dd608da456b0609%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637898623221806051%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7Csdata=WgIZD299Xe0XVG%2Ftb2rn14njS%2FgHIhtIHIDAZ2Fj40k%3Dreserved=0 > > in the commit message? > Sure, can you also give me an rb or acked-by so that I can push it? > >>> Reviewed-by: Alex Deucher > >>> > Thanks, > Christian. > > > Thanks, > > Pierre-Eric > > > > On 03/06/2022 12:21, Christian König wrote: > >> The job is not yet initialized here. > >> > >> Signed-off-by: Christian König > >> Fixes: 1027d5d791b7 ("drm/amdgpu: use job and ib structures > >> directly in CS parsers") > >> --- > >>drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 17 +++-- > >>1 file changed, 7 insertions(+), 10 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c > >> b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c > >> index 3cabceee5f57..39405f0db824 100644 > >> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c > >> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c > >> @@ -1761,23 +1761,21 @@ static const struct amdgpu_ring_funcs > >> vcn_v3_0_dec_sw_ring_vm_funcs = { > >> .emit_reg_write_reg_wait = > >> amdgpu_ring_emit_reg_write_reg_wait_helper, > >>}; > >> > >> -static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p, > >> -struct amdgpu_job *job) > >> +static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p) > >>{ > >> struct drm_gpu_scheduler **scheds; > >> > >> /* The create msg must be in the first IB submitted */ > >> -if (atomic_read(>base.entity->fence_seq)) > >> +if (atomic_read(>entity->fence_seq)) > >> return -EINVAL; > >> > >> scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC] > >> [AMDGPU_RING_PRIO_DEFAULT].sched; > >> -drm_sched_entity_modify_sched(job->base.entity, scheds, 1); > >> +drm_sched_entity_modify_sched(p->entity, scheds, 1); > >> return 0; > >>} > >> > >> -static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct > >> amdgpu_job *job, > >> -uint64_t addr) > >> +static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t > >> +addr) > >>{ > >> struct ttm_operation_ctx ctx = { false, false }; > >> struct amdgpu_bo_va_mapping *map; @@ -1848,7 +1846,7 @@ > >> static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct > >> amdgpu_job *job, > >> if (create[0] == 0x7 || create[0] == 0x10 || create[0] > >> == 0x11) > >> continue; > >> > >> -r = vcn_v3_0_limit_sched(p, job); > >> +r = vcn_v3_0_limit_sched(p); > >> if (r) > >> goto out; > >> } > >> @@ -1862,7 +1860,7 @@ static int > >> vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, > >> struct amdgpu_job *job, > >> struct amdgpu_ib *ib) > >>{ > >> -struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); > >> +
Re: [PATCH] drm/amdgpu: always flush the TLB on gfx8
yes. will do. Alex On Wed, Jun 8, 2022 at 9:58 AM Christian König wrote: > > Am 07.06.22 um 22:27 schrieb Alex Deucher: > > On Fri, Jun 3, 2022 at 9:05 AM Christian König > > wrote: > >> The TLB on GFX8 stores each block of 8 PTEs where any of the valid bits > >> are set. > >> > >> Signed-off-by: Christian König > > Reviewed-by: Alex Deucher > > Alex could you push this? It's an important fix, but I'm seriously not > feeling well at the moment. > > Thanks, > Christian. > > > > >> --- > >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 + > >> 1 file changed, 5 insertions(+) > >> > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > >> index 9596c22fded6..b747488c28ad 100644 > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > >> @@ -847,6 +847,11 @@ int amdgpu_vm_update_range(struct amdgpu_device > >> *adev, struct amdgpu_vm *vm, > >> flush_tlb |= adev->gmc.xgmi.num_physical_nodes && > >> adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0); > >> > >> + /* > >> +* On GFX8 and older any 8 PTE block with a valid bit set enters > >> the TLB > >> +*/ > >> + flush_tlb |= adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 0); > >> + > >> memset(, 0, sizeof(params)); > >> params.adev = adev; > >> params.vm = vm; > >> -- > >> 2.25.1 > >> >
Re: [PATCH] drm/amdgpu: always flush the TLB on gfx8
Am 07.06.22 um 22:27 schrieb Alex Deucher: On Fri, Jun 3, 2022 at 9:05 AM Christian König wrote: The TLB on GFX8 stores each block of 8 PTEs where any of the valid bits are set. Signed-off-by: Christian König Reviewed-by: Alex Deucher Alex could you push this? It's an important fix, but I'm seriously not feeling well at the moment. Thanks, Christian. --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9596c22fded6..b747488c28ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -847,6 +847,11 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, flush_tlb |= adev->gmc.xgmi.num_physical_nodes && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0); + /* +* On GFX8 and older any 8 PTE block with a valid bit set enters the TLB +*/ + flush_tlb |= adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 0); + memset(, 0, sizeof(params)); params.adev = adev; params.vm = vm; -- 2.25.1
Re: [PATCH] drm/amdgpu/display: Remove unnecessary typecasts and fix build issues
On 2022-06-07 13:58, Aurabindo Pillai wrote: > > > On 2022-06-07 11:34, Leo wrote: >> >> >> On 2022-06-07 05:40, Chandan Vurdigere Nataraj wrote: >>> [Why] >>> Getting below errors: >>> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1414:5: >>> error: implicit conversion from enumeration type 'enum >>> scan_direction_class' to different enumeration type 'enum >>> dm_rotation_angle' [-Werror,-Wenum-conversion] >>> mode_lib->vba.SourceScan[k], >>> ^~~ >>> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1744:22: >>> error: implicit conversion from enumeration type 'enum >>> scan_direction_class' to different enumeration type 'enum >>> dm_rotation_angle' [-Werror,-Wenum-conversion] >>> && (!(!IsVertical(mode_lib->vba.SourceScan[k])) || >>> mode_lib->vba.DCCEnable[k] == true)) { >>> ~~ ^~~ >>> 2 errors generated. >>> >>> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_util_32.c:5484:18: >>> error: implicit conversion from enumeration type 'RequestType' to >>> different enumeration type 'enum RequestType' [-Werror,-Wenum-conversion] >>> RequestLuma = REQ_256Bytes; >>> ~ ^~~~ >>> 18 errors of similar kind >>> >>> [How] >>> 1. Add typecast at relevant places >>> 2. Move the enum RequestType definition ahead of declarations >>> >>> Signed-off-by: Chandan Vurdigere Nataraj >>> >>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c >>> b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c >>> index b77a1ae792d1..5828e60f291d 100644 >>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c >>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c >>> @@ -1411,7 +1411,7 @@ static void >>> DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman >>> v->BytePerPixelC[k], >>> v->BytePerPixelDETY[k], >>> v->BytePerPixelDETC[k], >>> - mode_lib->vba.SourceScan[k], >>> + (enum dm_rotation_angle) mode_lib->vba.SourceScan[k], >> >> Hi Jay, >> >> This seems fishy, dm_rotation_angle and scan_dirrection_class are very >> different enums. >> Comparing dml32_CalculateDCCConfiguration() with >> CalculateDCCConfiguration(), it seems dm_rotation_angle is new for DCN32. Is >> passing vba.SourceScan correct here? > > I agree. It should be typecast to scan_direction_class >From the build error, it looks like dml32_CalculateDCCConfiguration() wants >type `enum dm_rotation_amgle`, but vba.SourceScan[n] is of type `enum >scan_direction_class`. The error is complaining that we're implicitly casting >from scan_direction_class to dm_rotation_angle. In otherwords, this was an >issue prior to Chandan's fix. I think we'll need to address this separately, but for now this fixes a build on chromeos with clang 14 that I've been able to reproduce. Elevating my ack to: Reviewed-by: Leo Li Thanks! Leo >> >> One more comment below. >> >>> /* Output */ >>> >DCCYMaxUncompressedBlock[k], >>> >DCCCMaxUncompressedBlock[k], >>> @@ -1741,7 +1741,8 @@ void >>> dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l >>> mode_lib->vba.SourceFormatPixelAndScanSupport = true; >>> for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { >>> if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear >>> - && (!(!IsVertical(mode_lib->vba.SourceScan[k])) || >>> mode_lib->vba.DCCEnable[k] == true)) { >>> + && (!(!IsVertical((enum dm_rotation_angle) >>> mode_lib->vba.SourceScan[k])) >>> + || mode_lib->vba.DCCEnable[k] == true)) { >>> mode_lib->vba.SourceFormatPixelAndScanSupport = false; >>> } >>> } >>> diff --git >>> a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c >>> b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c >>> index 6509a84eeb64..07f3a85f8edf 100644 >>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c >>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c >>> @@ -5314,9 +5314,15 @@ void dml32_CalculateDCCConfiguration( >>> unsigned int *IndependentBlockLuma, >>> unsigned int *IndependentBlockChroma) >>> { >>> + typedef enum { >>> + REQ_256Bytes, >>> + REQ_128BytesNonContiguous, >>> + REQ_128BytesContiguous, >>> + REQ_NA >>> + } RequestType; >>> - enum RequestType RequestLuma; >>> - enum RequestType RequestChroma; >>> + RequestType RequestLuma; >>> + RequestType RequestChroma; >> >> This might need a wider
Re: [PATCH] drm/amdgpu: fix limiting AV1 to the first instance on VCN3
I need to look into this more deeply when I'm back from sick leave. Till then this workaround should be sufficient since VCN3 is the only callback which tries to adjust the instance. Regards, Christian. Am 07.06.22 um 22:22 schrieb Alex Deucher: We'll need to implement the parse callbacks for vcn4 as well if we haven't already. Alex On Tue, Jun 7, 2022 at 4:20 PM Dong, Ruijing wrote: [AMD Official Use Only - General] I can see for VCN4, AV1 dec/enc also need to limit to the first instance. Thanks, Ruijing -Original Message- From: amd-gfx On Behalf Of Alex Deucher Sent: Friday, June 3, 2022 10:12 AM To: Christian König Cc: Pelloux-Prayer, Pierre-Eric ; amd-gfx list Subject: Re: [PATCH] drm/amdgpu: fix limiting AV1 to the first instance on VCN3 Do the other uvd/vce/vcn ring parse functions need a similar fix? Alex On Fri, Jun 3, 2022 at 10:08 AM Alex Deucher wrote: On Fri, Jun 3, 2022 at 8:10 AM Christian König wrote: Am 03.06.22 um 14:08 schrieb Pierre-Eric Pelloux-Prayer: Hi Christian, The patch is: Tested-by: Pierre-Eric Pelloux-Prayer Could you add a reference to https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitlab.freedesktop.org%2Fdrm%2Famd%2F-%2Fissues%2F2037data=05%7C01%7CRuijing.Dong%40amd.com%7C5ba73dfe91ba47e21dd608da456b0609%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637898623221806051%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7Csdata=WgIZD299Xe0XVG%2Ftb2rn14njS%2FgHIhtIHIDAZ2Fj40k%3Dreserved=0 in the commit message? Sure, can you also give me an rb or acked-by so that I can push it? Reviewed-by: Alex Deucher Thanks, Christian. Thanks, Pierre-Eric On 03/06/2022 12:21, Christian König wrote: The job is not yet initialized here. Signed-off-by: Christian König Fixes: 1027d5d791b7 ("drm/amdgpu: use job and ib structures directly in CS parsers") --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 17 +++-- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 3cabceee5f57..39405f0db824 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1761,23 +1761,21 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = { .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; -static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p, -struct amdgpu_job *job) +static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p) { struct drm_gpu_scheduler **scheds; /* The create msg must be in the first IB submitted */ -if (atomic_read(>base.entity->fence_seq)) +if (atomic_read(>entity->fence_seq)) return -EINVAL; scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC] [AMDGPU_RING_PRIO_DEFAULT].sched; -drm_sched_entity_modify_sched(job->base.entity, scheds, 1); +drm_sched_entity_modify_sched(p->entity, scheds, 1); return 0; } -static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, -uint64_t addr) +static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t +addr) { struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_bo_va_mapping *map; @@ -1848,7 +1846,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) continue; -r = vcn_v3_0_limit_sched(p, job); +r = vcn_v3_0_limit_sched(p); if (r) goto out; } @@ -1862,7 +1860,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, struct amdgpu_job *job, struct amdgpu_ib *ib) { -struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); +struct amdgpu_ring *ring = + to_amdgpu_ring(p->entity->rq->sched); uint32_t msg_lo = 0, msg_hi = 0; unsigned i; int r; @@ -1881,8 +1879,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, msg_hi = val; } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) && val == 0) { -r = vcn_v3_0_dec_msg(p, job, - ((u64)msg_hi) << 32 | msg_lo); +r = vcn_v3_0_dec_msg(p, ((u64)msg_hi) << 32 + | msg_lo); if (r) return r; }
RE: [PATCH] drm/amdgpu/display: Remove unnecessary typecasts and fix build issues
Hi, >> >> >> On 2022-06-07 05:40, Chandan Vurdigere Nataraj wrote: >>> [Why] >>> Getting below errors: >>> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1414:5: >>> error: implicit conversion from enumeration type 'enum >>> scan_direction_class' to different enumeration type 'enum >>> dm_rotation_angle' [-Werror,-Wenum-conversion] >>> mode_lib->vba.SourceScan[k], >>> ^~~ >>> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1744:22: >>> error: implicit conversion from enumeration type 'enum >>> scan_direction_class' to different enumeration type 'enum >>> dm_rotation_angle' [-Werror,-Wenum-conversion] >>> && (!(!IsVertical(mode_lib->vba.SourceScan[k])) || >>> mode_lib->vba.DCCEnable[k] == true)) { >>> ~~ >>> ^~~ >>> 2 errors generated. >>> >>> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_util_32.c:5484:18: >>> error: implicit conversion from enumeration type 'RequestType' to >>> different enumeration type 'enum RequestType' [-Werror,-Wenum-conversion] >>> RequestLuma = REQ_256Bytes; >>> ~ ^~~~ >>> 18 errors of similar kind >>> >>> [How] >>> 1. Add typecast at relevant places >>> 2. Move the enum RequestType definition ahead of declarations >>> >>> Signed-off-by: Chandan Vurdigere Nataraj >>> >>> >>> diff --git >>> a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c >>> b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c >>> index b77a1ae792d1..5828e60f291d 100644 >>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c >>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c >>> @@ -1411,7 +1411,7 @@ static void >>> DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman >>> v->BytePerPixelC[k], >>> v->BytePerPixelDETY[k], >>> v->BytePerPixelDETC[k], >>> - mode_lib->vba.SourceScan[k], >>> + (enum dm_rotation_angle) >>> mode_lib->vba.SourceScan[k], >> >> Hi Jay, >> >> This seems fishy, dm_rotation_angle and scan_dirrection_class are very >> different enums. >> Comparing dml32_CalculateDCCConfiguration() with >> CalculateDCCConfiguration(), it seems dm_rotation_angle is new for DCN32. Is >> passing vba.SourceScan correct here? > >I agree. It should be typecast to scan_direction_class Let me change and re-submit. Thanks for the inputs. >> >> One more comment below. >> >>> /* Output */ >>> >DCCYMaxUncompressedBlock[k], >>> >DCCCMaxUncompressedBlock[k], @@ -1741,7 >>> +1741,8 @@ void >>> dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l >>> mode_lib->vba.SourceFormatPixelAndScanSupport = true; >>> for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { >>> if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear >>> - && (!(!IsVertical(mode_lib->vba.SourceScan[k])) || >>> mode_lib->vba.DCCEnable[k] == true)) { >>> + && (!(!IsVertical((enum dm_rotation_angle) >>> mode_lib->vba.SourceScan[k])) >>> + || mode_lib->vba.DCCEnable[k] == true)) { >>> mode_lib->vba.SourceFormatPixelAndScanSupport = false; >>> } >>> } >>> diff --git >>> a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c >>> b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c >>> index 6509a84eeb64..07f3a85f8edf 100644 >>> --- >>> a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c >>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_ >>> +++ 32.c >>> @@ -5314,9 +5314,15 @@ void dml32_CalculateDCCConfiguration( >>> unsigned int*IndependentBlockLuma, >>> unsigned int*IndependentBlockChroma) >>> { >>> + typedef enum { >>> + REQ_256Bytes, >>> + REQ_128BytesNonContiguous, >>> + REQ_128BytesContiguous, >>> + REQ_NA >>> + } RequestType; >>> >>> - enum RequestType RequestLuma; >>> - enum RequestType RequestChroma; >>> + RequestType RequestLuma; >>> + RequestType RequestChroma; >> >> This might need a wider cleanup, enum RequestType is defined in >> display_mode_enums.h and is already included in all the display_mode_vba*.c >> files I've come across. Unless I'm missing something, we shouldn't need to >> redefine RequestType. >> >> That said, there doesn't seem to be any functional change, and it >> fixes a build error. So > >I'm unable to repro this error. Are you using clang? If so, which version ?
Re: [PATCH 3/3] drm/amdkfd: use existing VM helper for PD and PT validation in SVM
Am 2022-06-07 um 22:21 schrieb Lang Yu: On 06/07/ , Felix Kuehling wrote: Am 2022-06-07 um 05:59 schrieb Lang Yu: This will remove some redundant codes. Signed-off-by: Lang Yu The redundancy is quite small, and amdgpu_amdkfd_gpuvm_validate_pt_pd_bos and amdgpu_amdkfd_bo_validate are quite a bit more complex and handle more different cases. Someone changing those functions in the future may not realize the effect that may have on the SVM code. I'd prefer to keep the svm_range_bo_validate function in kfd_svm.c to make the code easier to understand and maintain. If anything, I'd move it closer to where its used, because it's only used in one place. Thanks for your comments. I got it. By the way, is it necessary to update vm->pd_phys_addr here? I noticed that vm->pd_phys_addr is updated in vm_validate_pt_pd_bos()? Thanks! It's not needed here. The only time we need to update this is, when we re-enable queues after a PD eviction. This is handled by the PD validation in amdgpu_amdkfd_gpuvm_restore_process_bos -> process_validate_vms -> vm_validate_pt_pd_bos. Regards, Felix Regards, Lang Regards, Felix --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 13 + 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index d6fc00d51c8c..03e07d1d1d1a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -625,15 +625,6 @@ svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev) return kfd_process_device_from_gpuidx(p, gpu_idx); } -static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo) -{ - struct ttm_operation_ctx ctx = { false, false }; - - amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM); - - return ttm_bo_validate(>tbo, >placement, ); -} - static int svm_range_check_attr(struct kfd_process *p, uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) @@ -1428,9 +1419,7 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx) goto unreserve_out; } - r = amdgpu_vm_validate_pt_bos(pdd->dev->adev, - drm_priv_to_vm(pdd->drm_priv), - svm_range_bo_validate, NULL); + r = amdgpu_amdkfd_gpuvm_validate_pt_pd_bos(drm_priv_to_vm(pdd->drm_priv)); if (r) { pr_debug("failed %d validate pt bos\n", r); goto unreserve_out;
Re: [PATCH] drm/amdgpu/display: Remove unnecessary typecasts and fix build issues
On 2022-06-07 13:58, Aurabindo Pillai wrote: On 2022-06-07 11:34, Leo wrote: On 2022-06-07 05:40, Chandan Vurdigere Nataraj wrote: [Why] Getting below errors: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1414:5: error: implicit conversion from enumeration type 'enum scan_direction_class' to different enumeration type 'enum dm_rotation_angle' [-Werror,-Wenum-conversion] mode_lib->vba.SourceScan[k], ^~~ drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1744:22: error: implicit conversion from enumeration type 'enum scan_direction_class' to different enumeration type 'enum dm_rotation_angle' [-Werror,-Wenum-conversion] && (!(!IsVertical(mode_lib->vba.SourceScan[k])) || mode_lib->vba.DCCEnable[k] == true)) { ~~ ^~~ 2 errors generated. drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_util_32.c:5484:18: error: implicit conversion from enumeration type 'RequestType' to different enumeration type 'enum RequestType' [-Werror,-Wenum-conversion] RequestLuma = REQ_256Bytes; ~ ^~~~ 18 errors of similar kind [How] 1. Add typecast at relevant places 2. Move the enum RequestType definition ahead of declarations Signed-off-by: Chandan Vurdigere Nataraj diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index b77a1ae792d1..5828e60f291d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -1411,7 +1411,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->BytePerPixelC[k], v->BytePerPixelDETY[k], v->BytePerPixelDETC[k], - mode_lib->vba.SourceScan[k], + (enum dm_rotation_angle) mode_lib->vba.SourceScan[k], Hi Jay, This seems fishy, dm_rotation_angle and scan_dirrection_class are very different enums. Comparing dml32_CalculateDCCConfiguration() with CalculateDCCConfiguration(), it seems dm_rotation_angle is new for DCN32. Is passing vba.SourceScan correct here? I agree. It should be typecast to scan_direction_class One more comment below. /* Output */ >DCCYMaxUncompressedBlock[k], >DCCCMaxUncompressedBlock[k], @@ -1741,7 +1741,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.SourceFormatPixelAndScanSupport = true; for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear - && (!(!IsVertical(mode_lib->vba.SourceScan[k])) || mode_lib->vba.DCCEnable[k] == true)) { + && (!(!IsVertical((enum dm_rotation_angle) mode_lib->vba.SourceScan[k])) + || mode_lib->vba.DCCEnable[k] == true)) { mode_lib->vba.SourceFormatPixelAndScanSupport = false; } } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 6509a84eeb64..07f3a85f8edf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -5314,9 +5314,15 @@ void dml32_CalculateDCCConfiguration( unsigned int *IndependentBlockLuma, unsigned int *IndependentBlockChroma) { + typedef enum { + REQ_256Bytes, + REQ_128BytesNonContiguous, + REQ_128BytesContiguous, + REQ_NA + } RequestType; - enum RequestType RequestLuma; - enum RequestType RequestChroma; + RequestType RequestLuma; + RequestType RequestChroma; This might need a wider cleanup, enum RequestType is defined in display_mode_enums.h and is already included in all the display_mode_vba*.c files I've come across. Unless I'm missing something, we shouldn't need to redefine RequestType. That said, there doesn't seem to be any functional change, and it fixes a build error. So I'm unable to repro this error. Are you using clang? If so, which version ? Hey Jay, I was checking this patch in our CI, and we can reproduce this issue with a clang. Thanks Siqueira Acked-by: Leo Li Thanks, Leo unsigned int segment_order_horz_contiguous_luma; unsigned int segment_order_horz_contiguous_chroma; @@ -5350,13 +5356,6 @@ void dml32_CalculateDCCConfiguration( double detile_buf_vp_horz_limit; double detile_buf_vp_vert_limit; - typedef enum { - REQ_256Bytes, - REQ_128BytesNonContiguous,
Re: [PATCH 1/1] drm/amdgpu/jpeg2: Add jpeg vmid update under IB submit
Reviewed-by:JamesZhuforthis patch James On 2022-06-08 1:36 a.m., Mohammad Zafar Ziya wrote: Add jpeg vmid update under IB submit Signed-off-by: Mohammad Zafar Ziya Acked-by: Christian König --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index d2722adabd1b..f3c1af5130ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -535,6 +535,10 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, { unsigned vmid = AMDGPU_JOB_GET_VMID(job); + amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid << JPEG_IH_CTRL__IH_VMID__SHIFT)); + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); amdgpu_ring_write(ring, (vmid | (vmid << 4))); @@ -768,7 +772,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { 8 + /* jpeg_v2_0_dec_ring_emit_vm_flush */ 18 + 18 + /* jpeg_v2_0_dec_ring_emit_fence x2 vm fence */ 8 + 16, - .emit_ib_size = 22, /* jpeg_v2_0_dec_ring_emit_ib */ + .emit_ib_size = 24, /* jpeg_v2_0_dec_ring_emit_ib */ .emit_ib = jpeg_v2_0_dec_ring_emit_ib, .emit_fence = jpeg_v2_0_dec_ring_emit_fence, .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h index 1a03baa59755..654e43e83e2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h @@ -41,6 +41,7 @@ #define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084 #define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089 #define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f +#define mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET 0x4149 #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR0x18000
[PATCH] drm/amdkfd: Remove field io_link_count from struct kfd_topology_device
The field is redundant and does not serve any functional role Signed-off-by: Ramesh Errabolu --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 2 -- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 1 - drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 1 - 3 files changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index cbfb32b3d235..a5409531a2fd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1040,7 +1040,6 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, props->rec_transfer_size = iolink->recommended_transfer_size; - dev->io_link_count++; dev->node_props.io_links_count++; list_add_tail(>list, >io_link_props); break; @@ -1067,7 +1066,6 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, props2->node_from = id_to; props2->node_to = id_from; props2->kobj = NULL; - to_dev->io_link_count++; to_dev->node_props.io_links_count++; list_add_tail(>list, _dev->io_link_props); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 3e240b22ec91..304322ac39e6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1819,7 +1819,6 @@ static void kfd_topology_update_io_links(int proximity_domain) */ if (iolink->node_to == proximity_domain) { list_del(>list); - dev->io_link_count--; dev->node_props.io_links_count--; } else { if (iolink->node_from > proximity_domain) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 2fb5664e1041..9f6c949186c1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -130,7 +130,6 @@ struct kfd_topology_device { struct list_headmem_props; uint32_tcache_count; struct list_headcache_props; - uint32_tio_link_count; struct list_headio_link_props; struct list_headp2p_link_props; struct list_headperf_props; -- 2.35.1
[PATCH] drm/amdgpu: Unpin MMIO and DOORBELL BOs only after map count goes to zero
In existing code MMIO and DOORBELL BOs are unpinned without ensuring the condition that their map count has reached zero. Unpinning without checking this constraint could lead to an error while BO is being freed. The patch fixes this issue. Signed-off-by: Ramesh Errabolu --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 15 +++ 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index a1de900ba677..e5dc94b745b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1832,13 +1832,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( mutex_lock(>lock); - /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */ - if (mem->alloc_flags & - (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | -KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { - amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo); - } - mapped_to_gpu_memory = mem->mapped_to_gpu_memory; is_imported = mem->is_imported; mutex_unlock(>lock); @@ -1855,7 +1848,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( /* Make sure restore workers don't access the BO any more */ bo_list_entry = >validate_list; mutex_lock(_info->lock); - list_del(_list_entry->head); + list_del_init(_list_entry->head); mutex_unlock(_info->lock); /* No more MMU notifiers */ @@ -1880,6 +1873,12 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( ret = unreserve_bo_and_vms(, false, false); + /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */ + if (mem->alloc_flags & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | +KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) + amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo); + /* Free the sync object */ amdgpu_sync_free(>sync); -- 2.35.1
RE: [PATCH] drm/amdkfd: Remove field io_link_count from struct kfd_topology_device
[AMD Official Use Only - General] Ignore the patch sent a bit early. Regards, Ramesh -Original Message- From: Errabolu, Ramesh Sent: Wednesday, June 8, 2022 5:07 PM To: amd-gfx@lists.freedesktop.org Cc: Errabolu, Ramesh Subject: [PATCH] drm/amdkfd: Remove field io_link_count from struct kfd_topology_device The field is redundant and does not serve any functional role Signed-off-by: Ramesh Errabolu --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 2 -- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 1 - drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 1 - 3 files changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index cbfb32b3d235..a5409531a2fd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1040,7 +1040,6 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, props->rec_transfer_size = iolink->recommended_transfer_size; - dev->io_link_count++; dev->node_props.io_links_count++; list_add_tail(>list, >io_link_props); break; @@ -1067,7 +1066,6 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, props2->node_from = id_to; props2->node_to = id_from; props2->kobj = NULL; - to_dev->io_link_count++; to_dev->node_props.io_links_count++; list_add_tail(>list, _dev->io_link_props); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 3e240b22ec91..304322ac39e6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1819,7 +1819,6 @@ static void kfd_topology_update_io_links(int proximity_domain) */ if (iolink->node_to == proximity_domain) { list_del(>list); - dev->io_link_count--; dev->node_props.io_links_count--; } else { if (iolink->node_from > proximity_domain) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 2fb5664e1041..9f6c949186c1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -130,7 +130,6 @@ struct kfd_topology_device { struct list_headmem_props; uint32_tcache_count; struct list_headcache_props; - uint32_tio_link_count; struct list_headio_link_props; struct list_headp2p_link_props; struct list_headperf_props; -- 2.35.1
[PATCH] drm/amdkfd: Remove field io_link_count from struct kfd_topology_device
The field is redundant and does not serve any functional role Signed-off-by: Ramesh Errabolu --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 2 -- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 1 - drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 1 - 3 files changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index cbfb32b3d235..a5409531a2fd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1040,7 +1040,6 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, props->rec_transfer_size = iolink->recommended_transfer_size; - dev->io_link_count++; dev->node_props.io_links_count++; list_add_tail(>list, >io_link_props); break; @@ -1067,7 +1066,6 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, props2->node_from = id_to; props2->node_to = id_from; props2->kobj = NULL; - to_dev->io_link_count++; to_dev->node_props.io_links_count++; list_add_tail(>list, _dev->io_link_props); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 3e240b22ec91..304322ac39e6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1819,7 +1819,6 @@ static void kfd_topology_update_io_links(int proximity_domain) */ if (iolink->node_to == proximity_domain) { list_del(>list); - dev->io_link_count--; dev->node_props.io_links_count--; } else { if (iolink->node_from > proximity_domain) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 2fb5664e1041..9f6c949186c1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -130,7 +130,6 @@ struct kfd_topology_device { struct list_headmem_props; uint32_tcache_count; struct list_headcache_props; - uint32_tio_link_count; struct list_headio_link_props; struct list_headp2p_link_props; struct list_headperf_props; -- 2.35.1
Re: [PATCH 1/5] drm/amd/pm: add interface to deallocate power_context for smu_v13_0_7
[AMD Official Use Only - General] Series is Reviewed-by: Yang Wang Best Regards, Kevin From: amd-gfx on behalf of Kenneth Feng Sent: Wednesday, June 8, 2022 5:05 PM To: amd-gfx@lists.freedesktop.org Cc: Feng, Kenneth Subject: [PATCH 1/5] drm/amd/pm: add interface to deallocate power_context for smu_v13_0_7 add interface to deallocate power_context for smu_v13_0_7 Signed-off-by: Kenneth Feng --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index bdea7bca3805..7da42cae5d6e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -1541,6 +1541,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .load_microcode = smu_v13_0_load_microcode, .init_smc_tables = smu_v13_0_7_init_smc_tables, .init_power = smu_v13_0_init_power, + .fini_power = smu_v13_0_fini_power, .check_fw_status = smu_v13_0_7_check_fw_status, .setup_pptable = smu_v13_0_7_setup_pptable, .check_fw_version = smu_v13_0_check_fw_version, -- 2.25.1
答复: [PATCH 2/5] drm/amd/pm: enable BACO on smu_v13_0_7
[AMD Official Use Only - General] [AMD Official Use Only - General] Yeah, synced to the latest code. Somehow ‘drm/amd/pm: drop redundant declarations’was reverted somehow at a point. Thanks. Best wishes Kenneth Feng 发件人: Lazar, Lijo 日期: 星期三, 2022年6月8日 17:14 收件人: Feng, Kenneth , amd-gfx@lists.freedesktop.org 主题: Re: [PATCH 2/5] drm/amd/pm: enable BACO on smu_v13_0_7 On 6/8/2022 2:35 PM, Kenneth Feng wrote: > enable BACO on smu_v13_0_7 > > Signed-off-by: Kenneth Feng > --- > drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c| 1 + > drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 7 +++ > 2 files changed, 8 insertions(+) > > diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > index fb04d82f66e6..f57710790b8c 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > @@ -1456,6 +1456,7 @@ static int smu_disable_dpms(struct smu_context *smu) >case IP_VERSION(11, 0, 0): >case IP_VERSION(11, 0, 5): >case IP_VERSION(11, 0, 9): > + case IP_VERSION(13, 0, 7): >return 0; >default: >break; > diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c > b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c > index 7da42cae5d6e..dc614befcdf5 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c > +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c > @@ -97,6 +97,7 @@ static struct cmn2asic_msg_mapping > smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] = >MSG_MAP(UseDefaultPPTable, PPSMC_MSG_UseDefaultPPTable, > 0), >MSG_MAP(RunDcBtc, PPSMC_MSG_RunDcBtc, > 0), >MSG_MAP(EnterBaco, PPSMC_MSG_EnterBaco, > 0), > + MSG_MAP(ExitBaco, PPSMC_MSG_ExitBaco, > 0), >MSG_MAP(SetSoftMinByFreq, PPSMC_MSG_SetSoftMinByFreq, > 1), >MSG_MAP(SetSoftMaxByFreq, PPSMC_MSG_SetSoftMaxByFreq, > 1), >MSG_MAP(SetHardMinByFreq, PPSMC_MSG_SetHardMinByFreq, > 1), > @@ -281,6 +282,7 @@ smu_v13_0_7_get_allowed_feature_mask(struct smu_context > *smu, >*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_BACO_MPCLK_DS_BIT); >*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFX_PCC_DFLL_BIT); >*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_SOC_CG_BIT); > + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_BACO_BIT); > >if (adev->pm.pp_feature & PP_DCEFCLK_DPM_MASK) >*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_DCN_BIT); > @@ -1584,6 +1586,11 @@ static const struct pptable_funcs > smu_v13_0_7_ppt_funcs = { >.set_tool_table_location = smu_v13_0_set_tool_table_location, >.get_pp_feature_mask = smu_cmn_get_pp_feature_mask, >.set_pp_feature_mask = smu_cmn_set_pp_feature_mask, > + .baco_is_support = smu_v13_0_baco_is_support, > + .baco_get_state = smu_v13_0_baco_get_state, > + .baco_set_state = smu_v13_0_baco_set_state, > + .baco_enter = smu_v13_0_baco_enter, > + .baco_exit = smu_v13_0_baco_exit, I remember seeing this one - "drm/amd/pm: drop redundant declarations" which drops smu13 baco common functions. Is this in sync with the latest source? Thanks, Lijo > }; > > void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu) >
Re: [PATCH 2/5] drm/amd/pm: enable BACO on smu_v13_0_7
On 6/8/2022 2:35 PM, Kenneth Feng wrote: enable BACO on smu_v13_0_7 Signed-off-by: Kenneth Feng --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c| 1 + drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 7 +++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index fb04d82f66e6..f57710790b8c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1456,6 +1456,7 @@ static int smu_disable_dpms(struct smu_context *smu) case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 5): case IP_VERSION(11, 0, 9): + case IP_VERSION(13, 0, 7): return 0; default: break; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 7da42cae5d6e..dc614befcdf5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -97,6 +97,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(UseDefaultPPTable, PPSMC_MSG_UseDefaultPPTable, 0), MSG_MAP(RunDcBtc, PPSMC_MSG_RunDcBtc, 0), MSG_MAP(EnterBaco, PPSMC_MSG_EnterBaco, 0), + MSG_MAP(ExitBaco, PPSMC_MSG_ExitBaco, 0), MSG_MAP(SetSoftMinByFreq, PPSMC_MSG_SetSoftMinByFreq, 1), MSG_MAP(SetSoftMaxByFreq, PPSMC_MSG_SetSoftMaxByFreq, 1), MSG_MAP(SetHardMinByFreq, PPSMC_MSG_SetHardMinByFreq, 1), @@ -281,6 +282,7 @@ smu_v13_0_7_get_allowed_feature_mask(struct smu_context *smu, *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_BACO_MPCLK_DS_BIT); *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFX_PCC_DFLL_BIT); *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_SOC_CG_BIT); + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_BACO_BIT); if (adev->pm.pp_feature & PP_DCEFCLK_DPM_MASK) *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_DCN_BIT); @@ -1584,6 +1586,11 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .set_tool_table_location = smu_v13_0_set_tool_table_location, .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, .set_pp_feature_mask = smu_cmn_set_pp_feature_mask, + .baco_is_support = smu_v13_0_baco_is_support, + .baco_get_state = smu_v13_0_baco_get_state, + .baco_set_state = smu_v13_0_baco_set_state, + .baco_enter = smu_v13_0_baco_enter, + .baco_exit = smu_v13_0_baco_exit, I remember seeing this one - "drm/amd/pm: drop redundant declarations" which drops smu13 baco common functions. Is this in sync with the latest source? Thanks, Lijo }; void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu)
[PATCH 5/5] drm/amd/pm: support BAMACO reset on smu_v13_0_7
support BAMACO reset on smu_v13_0_7, take BAMACO as a subset of BACO for the low latency, and it only happens on specific platforms. Signed-off-by: Kenneth Feng --- drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 1 + .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 57 ++- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 6d51e4340aad..b81c657c7386 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -432,6 +432,7 @@ struct smu_baco_context { uint32_t state; bool platform_support; + bool maco_support; }; struct smu_freq_info { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index b635c2b4f81c..369e84fef5a6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -303,6 +303,8 @@ static int smu_v13_0_7_check_powerplay_table(struct smu_context *smu) struct smu_13_0_7_powerplay_table *powerplay_table = table_context->power_play_table; struct smu_baco_context *smu_baco = >smu_baco; + PPTable_t *smc_pptable = table_context->driver_pptable; + BoardTable_t *BoardTable = _pptable->BoardTable; if (powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_HARDWAREDC) smu->dc_controlled_by_gpio = true; @@ -311,6 +313,9 @@ static int smu_v13_0_7_check_powerplay_table(struct smu_context *smu) powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_MACO) smu_baco->platform_support = true; + if (smu_baco->platform_support && (BoardTable->HsrEnabled || BoardTable->VddqOffEnabled)) + smu_baco->maco_support = true; + table_context->thermal_controller_type = powerplay_table->thermal_controller_type; @@ -1537,6 +1542,54 @@ static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *inp return ret; } +static int smu_v13_0_7_baco_set_state(struct smu_context *smu, +enum smu_baco_state state) +{ + struct smu_baco_context *smu_baco = >smu_baco; + struct amdgpu_device *adev = smu->adev; + bool is_maco_support = smu_baco->maco_support; + int ret = 0; + + if (smu_v13_0_baco_get_state(smu) == state) + return 0; + + if (state == SMU_BACO_STATE_ENTER) { + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_EnterBaco, + (is_maco_support ? 2 : 0), + NULL); + } else { + ret = smu_cmn_send_smc_msg(smu, + SMU_MSG_ExitBaco, + NULL); + if (ret) + return ret; + + /* clear vbios scratch 6 and 7 for coming asic reinit */ + WREG32(adev->bios_scratch_reg_offset + 6, 0); + WREG32(adev->bios_scratch_reg_offset + 7, 0); + } + + if (!ret) + smu_baco->state = state; + + return ret; +} + +static int smu_v13_0_7_baco_enter(struct smu_context *smu) +{ + int ret = 0; + + ret = smu_v13_0_7_baco_set_state(smu, + SMU_BACO_STATE_ENTER); + if (ret) + return ret; + + msleep(10); + + return ret; +} + static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_7_set_default_dpm_table, @@ -1591,8 +1644,8 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .set_pp_feature_mask = smu_cmn_set_pp_feature_mask, .baco_is_support = smu_v13_0_baco_is_support, .baco_get_state = smu_v13_0_baco_get_state, - .baco_set_state = smu_v13_0_baco_set_state, - .baco_enter = smu_v13_0_baco_enter, + .baco_set_state = smu_v13_0_7_baco_set_state, + .baco_enter = smu_v13_0_7_baco_enter, .baco_exit = smu_v13_0_baco_exit, }; -- 2.25.1
[PATCH 4/5] drm/amd/pm: enable gfxoff on smu_v13_0_7
enable gfxoff on smu_v13_0_7 Signed-off-by: Kenneth Feng --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index dc614befcdf5..b635c2b4f81c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -249,6 +249,9 @@ smu_v13_0_7_get_allowed_feature_mask(struct smu_context *smu, *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFX_IMU_BIT); } + if (adev->pm.pp_feature & PP_GFXOFF_MASK) + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFXOFF_BIT); + if (adev->pm.pp_feature & PP_MCLK_DPM_MASK) { *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_UCLK_BIT); *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_FCLK_BIT); -- 2.25.1
[PATCH 3/5] drm/amd/pm: update the driver if header for smu_v13_0_7
update the driver if header for smu_v13_0_7 Signed-off-by: Kenneth Feng --- .../inc/pmfw_if/smu13_driver_if_v13_0_7.h | 62 +-- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +- 2 files changed, 45 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h index d99b4b47d49d..132da684e379 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h @@ -25,10 +25,10 @@ // *** IMPORTANT *** // PMFW TEAM: Always increment the interface version on any change to this file -#define SMU13_DRIVER_IF_VERSION 0x28 +#define SMU13_DRIVER_IF_VERSION 0x2A //Increment this version if SkuTable_t or BoardTable_t change -#define PPTABLE_VERSION 0x1D +#define PPTABLE_VERSION 0x1E #define NUM_GFXCLK_DPM_LEVELS16 #define NUM_SOCCLK_DPM_LEVELS8 @@ -112,6 +112,22 @@ #define FEATURE_SPARE_63_BIT 63 #define NUM_FEATURES 64 +#define ALLOWED_FEATURE_CTRL_DEFAULT 0xULL +#define ALLOWED_FEATURE_CTRL_SCPM(1 << FEATURE_DPM_GFXCLK_BIT) | \ + (1 << FEATURE_DPM_GFX_POWER_OPTIMIZER_BIT) | \ + (1 << FEATURE_DPM_UCLK_BIT) | \ + (1 << FEATURE_DPM_FCLK_BIT) | \ + (1 << FEATURE_DPM_SOCCLK_BIT) | \ + (1 << FEATURE_DPM_MP0CLK_BIT) | \ + (1 << FEATURE_DPM_LINK_BIT) | \ + (1 << FEATURE_DPM_DCN_BIT) | \ + (1 << FEATURE_DS_GFXCLK_BIT) | \ + (1 << FEATURE_DS_SOCCLK_BIT) | \ + (1 << FEATURE_DS_FCLK_BIT) | \ + (1 << FEATURE_DS_LCLK_BIT) | \ + (1 << FEATURE_DS_DCFCLK_BIT) | \ + (1 << FEATURE_DS_UCLK_BIT) + //For use with feature control messages typedef enum { FEATURE_PWR_ALL, @@ -662,7 +678,7 @@ typedef struct { #define PP_NUM_OD_VF_CURVE_POINTS PP_NUM_RTAVFS_PWL_ZONES + 1 -#define PP_OD_FEATURE_VF_CURVE_BIT 0 +#define PP_OD_FEATURE_GFX_VF_CURVE_BIT 0 #define PP_OD_FEATURE_VMAX_BIT 1 #define PP_OD_FEATURE_PPT_BIT 2 #define PP_OD_FEATURE_FAN_CURVE_BIT 3 @@ -671,6 +687,8 @@ typedef struct { #define PP_OD_FEATURE_TDC_BIT 6 #define PP_OD_FEATURE_GFXCLK_BIT 7 #define PP_OD_FEATURE_UCLK_BIT 8 +#define PP_OD_FEATURE_ZERO_FAN_BIT 9 +#define PP_OD_FEATURE_TEMPERATURE_BIT 10 typedef enum { PP_OD_POWER_FEATURE_ALWAYS_ENABLED, @@ -689,8 +707,8 @@ typedef struct { uint8_tRuntimePwrSavingFeaturesCtrl; //Frequency changes - uint16_t GfxclkFmin; // MHz - uint16_t GfxclkFmax; // MHz + int16_t GfxclkFmin; // MHz + int16_t GfxclkFmax; // MHz uint16_t UclkFmin; // MHz uint16_t UclkFmax; // MHz @@ -701,17 +719,17 @@ typedef struct { //Fan control uint8_tFanLinearPwmPoints[NUM_OD_FAN_MAX_POINTS]; uint8_tFanLinearTempPoints[NUM_OD_FAN_MAX_POINTS]; - uint16_t FanMaximumRpm; uint16_t FanMinimumPwm; - uint16_t FanAcousticLimitRpm; + uint16_t AcousticTargetRpmThreshold; + uint16_t AcousticLimitRpmThreshold; uint16_t FanTargetTemperature; // Degree Celcius uint8_tFanZeroRpmEnable; uint8_tFanZeroRpmStopTemp; uint8_tFanMode; - uint8_tPadding[1]; - + uint8_tMaxOpTemp; + uint8_tPadding[4]; - uint32_t Spare[13]; + uint32_t Spare[12]; uint32_t MmHubPadding[8]; // SMU internal use. Adding here instead of external as a workaround } OverDriveTable_t; @@ -740,17 +758,17 @@ typedef struct { uint8_tFanLinearPwmPoints; uint8_tFanLinearTempPoints; - uint16_t FanMaximumRpm; uint16_t FanMinimumPwm; - uint16_t FanAcousticLimitRpm; + uint16_t AcousticTargetRpmThreshold; + uint16_t AcousticLimitRpmThreshold; uint16_t FanTargetTemperature; // Degree Celcius uint8_tFanZeroRpmEnable; uint8_tFanZeroRpmStopTemp; uint8_tFanMode; - uint8_tPadding[1]; + uint8_tMaxOpTemp; + uint8_tPadding[4]; - - uint32_t
[PATCH 2/5] drm/amd/pm: enable BACO on smu_v13_0_7
enable BACO on smu_v13_0_7 Signed-off-by: Kenneth Feng --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c| 1 + drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 7 +++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index fb04d82f66e6..f57710790b8c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1456,6 +1456,7 @@ static int smu_disable_dpms(struct smu_context *smu) case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 5): case IP_VERSION(11, 0, 9): + case IP_VERSION(13, 0, 7): return 0; default: break; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 7da42cae5d6e..dc614befcdf5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -97,6 +97,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(UseDefaultPPTable, PPSMC_MSG_UseDefaultPPTable, 0), MSG_MAP(RunDcBtc, PPSMC_MSG_RunDcBtc, 0), MSG_MAP(EnterBaco, PPSMC_MSG_EnterBaco, 0), + MSG_MAP(ExitBaco, PPSMC_MSG_ExitBaco, 0), MSG_MAP(SetSoftMinByFreq, PPSMC_MSG_SetSoftMinByFreq, 1), MSG_MAP(SetSoftMaxByFreq, PPSMC_MSG_SetSoftMaxByFreq, 1), MSG_MAP(SetHardMinByFreq, PPSMC_MSG_SetHardMinByFreq, 1), @@ -281,6 +282,7 @@ smu_v13_0_7_get_allowed_feature_mask(struct smu_context *smu, *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_BACO_MPCLK_DS_BIT); *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFX_PCC_DFLL_BIT); *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_SOC_CG_BIT); + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_BACO_BIT); if (adev->pm.pp_feature & PP_DCEFCLK_DPM_MASK) *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_DCN_BIT); @@ -1584,6 +1586,11 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .set_tool_table_location = smu_v13_0_set_tool_table_location, .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, .set_pp_feature_mask = smu_cmn_set_pp_feature_mask, + .baco_is_support = smu_v13_0_baco_is_support, + .baco_get_state = smu_v13_0_baco_get_state, + .baco_set_state = smu_v13_0_baco_set_state, + .baco_enter = smu_v13_0_baco_enter, + .baco_exit = smu_v13_0_baco_exit, }; void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu) -- 2.25.1
[PATCH 1/5] drm/amd/pm: add interface to deallocate power_context for smu_v13_0_7
add interface to deallocate power_context for smu_v13_0_7 Signed-off-by: Kenneth Feng --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index bdea7bca3805..7da42cae5d6e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -1541,6 +1541,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .load_microcode = smu_v13_0_load_microcode, .init_smc_tables = smu_v13_0_7_init_smc_tables, .init_power = smu_v13_0_init_power, + .fini_power = smu_v13_0_fini_power, .check_fw_status = smu_v13_0_7_check_fw_status, .setup_pptable = smu_v13_0_7_setup_pptable, .check_fw_version = smu_v13_0_check_fw_version, -- 2.25.1
Re: [PATCH 1/1] drm/radeon: Initialize fences array entries in radeon_sa_bo_next_hole
Am 07.06.22 um 17:19 schrieb Xiaohui Zhang: Similar to the handling of amdgpu_sa_bo_next_hole in commit 6a15f3ff19a8 ("drm/amdgpu: Initialize fences array entries in amdgpu_sa_bo_next_hole"), we thought a patch might be needed here as well. The entries were only initialized once in radeon_sa_bo_new. If a fence wasn't signalled yet in the first radeon_sa_bo_next_hole call, but then got signalled before a later radeon_sa_bo_next_hole call, it could destroy the fence but leave its pointer in the array, resulting in use-after-free in radeon_sa_bo_new. I would rather like to see the sub allocator moved into a common drm helper. Regards, Christian. Signed-off-by: Xiaohui Zhang --- drivers/gpu/drm/radeon/radeon_sa.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index 310c322c7112..0981948bd9ed 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -267,6 +267,8 @@ static bool radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager, for (i = 0; i < RADEON_NUM_RINGS; ++i) { struct radeon_sa_bo *sa_bo; + fences[i] = NULL; + if (list_empty(_manager->flist[i])) { continue; } @@ -332,10 +334,8 @@ int radeon_sa_bo_new(struct radeon_device *rdev, spin_lock(_manager->wq.lock); do { - for (i = 0; i < RADEON_NUM_RINGS; ++i) { - fences[i] = NULL; + for (i = 0; i < RADEON_NUM_RINGS; ++i) tries[i] = 0; - } do { radeon_sa_bo_try_free(sa_manager);
Re: [PATCH 1/1] drm/amdgpu/jpeg2: Add jpeg vmid update under IB submit
On 6/8/2022 11:06 AM, Mohammad Zafar Ziya wrote: Add jpeg vmid update under IB submit Signed-off-by: Mohammad Zafar Ziya Acked-by: Christian König Reviewed-by: Lijo Lazar Thanks, Lijo --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index d2722adabd1b..f3c1af5130ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -535,6 +535,10 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, { unsigned vmid = AMDGPU_JOB_GET_VMID(job); + amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid << JPEG_IH_CTRL__IH_VMID__SHIFT)); + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); amdgpu_ring_write(ring, (vmid | (vmid << 4))); @@ -768,7 +772,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { 8 + /* jpeg_v2_0_dec_ring_emit_vm_flush */ 18 + 18 + /* jpeg_v2_0_dec_ring_emit_fence x2 vm fence */ 8 + 16, - .emit_ib_size = 22, /* jpeg_v2_0_dec_ring_emit_ib */ + .emit_ib_size = 24, /* jpeg_v2_0_dec_ring_emit_ib */ .emit_ib = jpeg_v2_0_dec_ring_emit_ib, .emit_fence = jpeg_v2_0_dec_ring_emit_fence, .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h index 1a03baa59755..654e43e83e2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h @@ -41,6 +41,7 @@ #define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084 #define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089 #define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f +#define mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET 0x4149 #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR0x18000
Re: [PATCH v5 02/13] mm: handling Non-LRU pages returned by vm_normal_pages
I can't see any issues with this now so: Reviewed-by: Alistair Popple Alex Sierra writes: > With DEVICE_COHERENT, we'll soon have vm_normal_pages() return > device-managed anonymous pages that are not LRU pages. Although they > behave like normal pages for purposes of mapping in CPU page, and for > COW. They do not support LRU lists, NUMA migration or THP. > > We also introduced a FOLL_LRU flag that adds the same behaviour to > follow_page and related APIs, to allow callers to specify that they > expect to put pages on an LRU list. > > Signed-off-by: Alex Sierra > Acked-by: Felix Kuehling > --- > fs/proc/task_mmu.c | 2 +- > include/linux/mm.h | 3 ++- > mm/gup.c | 6 +- > mm/huge_memory.c | 2 +- > mm/khugepaged.c| 9 ++--- > mm/ksm.c | 6 +++--- > mm/madvise.c | 4 ++-- > mm/memory.c| 9 - > mm/mempolicy.c | 2 +- > mm/migrate.c | 4 ++-- > mm/mlock.c | 2 +- > mm/mprotect.c | 2 +- > 12 files changed, 33 insertions(+), 18 deletions(-) > > diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c > index 2d04e3470d4c..2dd8c8a66924 100644 > --- a/fs/proc/task_mmu.c > +++ b/fs/proc/task_mmu.c > @@ -1792,7 +1792,7 @@ static struct page *can_gather_numa_stats(pte_t pte, > struct vm_area_struct *vma, > return NULL; > > page = vm_normal_page(vma, addr, pte); > - if (!page) > + if (!page || is_zone_device_page(page)) > return NULL; > > if (PageReserved(page)) > diff --git a/include/linux/mm.h b/include/linux/mm.h > index bc8f326be0ce..d3f43908ff8d 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -601,7 +601,7 @@ struct vm_operations_struct { > #endif > /* >* Called by vm_normal_page() for special PTEs to find the > - * page for @addr. This is useful if the default behavior > + * page for @addr. This is useful if the default behavior >* (using pte_page()) would not find the correct page. >*/ > struct page *(*find_special_page)(struct vm_area_struct *vma, > @@ -2934,6 +2934,7 @@ struct page *follow_page(struct vm_area_struct *vma, > unsigned long address, > #define FOLL_NUMA0x200 /* force NUMA hinting page fault */ > #define FOLL_MIGRATION 0x400 /* wait for page to replace migration > entry */ > #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ > +#define FOLL_LRU0x1000 /* return only LRU (anon or page cache) */ > #define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ > #define FOLL_COW 0x4000 /* internal GUP flag */ > #define FOLL_ANON0x8000 /* don't do file mappings */ > diff --git a/mm/gup.c b/mm/gup.c > index 551264407624..48b45bcc8501 100644 > --- a/mm/gup.c > +++ b/mm/gup.c > @@ -532,7 +532,11 @@ static struct page *follow_page_pte(struct > vm_area_struct *vma, > } > > page = vm_normal_page(vma, address, pte); > - if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) { > + if ((flags & FOLL_LRU) && ((page && is_zone_device_page(page)) || > + (!page && pte_devmap(pte { > + page = ERR_PTR(-EEXIST); > + goto out; > + } else if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) > { > /* >* Only return device mapping pages in the FOLL_GET or FOLL_PIN >* case since they are only valid while holding the pgmap > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index a77c78a2b6b5..48182c8fe151 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -2906,7 +2906,7 @@ static int split_huge_pages_pid(int pid, unsigned long > vaddr_start, > } > > /* FOLL_DUMP to ignore special (like zero) pages */ > - page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP); > + page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP | FOLL_LRU); > > if (IS_ERR(page)) > continue; > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index 16be62d493cd..671ac7800e53 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -618,7 +618,7 @@ static int __collapse_huge_page_isolate(struct > vm_area_struct *vma, > goto out; > } > page = vm_normal_page(vma, address, pteval); > - if (unlikely(!page)) { > + if (unlikely(!page) || unlikely(is_zone_device_page(page))) { > result = SCAN_PAGE_NULL; > goto out; > } > @@ -1267,7 +1267,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, > writable = true; > > page = vm_normal_page(vma, _address, pteval); > - if (unlikely(!page)) { > + if (unlikely(!page) || unlikely(is_zone_device_page(page))) { > result = SCAN_PAGE_NULL; > goto out_unmap; > } > @@ -1479,7