[PATCH] drm/amd/display: Fix up kdoc for 'optc35_set_odm_combine'
Fixes the following W=1 kernel build warning: drivers/gpu/drm/amd/amdgpu/../display/dc/dcn35/dcn35_optc.c:46: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Enable CRTC Cc: Qingqing Zhuo Cc: Rodrigo Siqueira Cc: Harry Wentland Cc: Aurabindo Pillai Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam --- drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c index 5f7adc83258b..294799d8c34e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c @@ -43,10 +43,15 @@ optc1->tg_shift->field_name, optc1->tg_mask->field_name /** - * Enable CRTC - * Enable CRTC - call ASIC Control Object to enable Timing generator. + * optc35_set_odm_combine() - Enable CRTC - call ASIC Control Object to enable Timing generator. + * + * @optc: timing_generator instance. + * @opp_id: OPP instance. + * @opp_cnt: OPP count. + * @timing: Timing parameters used to configure DCN blocks. + * + * Return: void. */ - static void optc35_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt, struct dc_crtc_timing *timing) { -- 2.25.1
Re: [PATCH] drm/amdkfd: Checkpoint and restore queues on GFX11
Acked-by: Alex Deucher On Fri, Aug 25, 2023 at 4:10 PM David Francis wrote: > > The code in kfd_mqd_manager_v11.c to support criu dump and > restore of queue state was missing. > > Added it; should be equivalent to kfd_mqd_manager_v10.c. > > CC: Felix Kuehling > Signed-off-by: David Francis > --- > .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 41 +++ > 1 file changed, 41 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c > b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c > index 2319467d2d95..2a79d37da95d 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c > @@ -321,6 +321,43 @@ static int get_wave_state(struct mqd_manager *mm, void > *mqd, > return 0; > } > > +static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, > void *ctl_stack_dst) > +{ > + struct v11_compute_mqd *m; > + > + m = get_mqd(mqd); > + > + memcpy(mqd_dst, m, sizeof(struct v11_compute_mqd)); > +} > + > +static void restore_mqd(struct mqd_manager *mm, void **mqd, > + struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, > + struct queue_properties *qp, > + const void *mqd_src, > + const void *ctl_stack_src, const u32 ctl_stack_size) > +{ > + uint64_t addr; > + struct v11_compute_mqd *m; > + > + m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr; > + addr = mqd_mem_obj->gpu_addr; > + > + memcpy(m, mqd_src, sizeof(*m)); > + > + *mqd = m; > + if (gart_addr) > + *gart_addr = addr; > + > + m->cp_hqd_pq_doorbell_control = > + qp->doorbell_off << > + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; > + pr_debug("cp_hqd_pq_doorbell_control 0x%x\n", > + m->cp_hqd_pq_doorbell_control); > + > + qp->is_active = 0; > +} > + > + > static void init_mqd_hiq(struct mqd_manager *mm, void **mqd, > struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, > struct queue_properties *q) > @@ -457,6 +494,8 @@ struct mqd_manager *mqd_manager_init_v11(enum > KFD_MQD_TYPE type, > mqd->is_occupied = kfd_is_occupied_cp; > mqd->mqd_size = sizeof(struct v11_compute_mqd); > mqd->get_wave_state = get_wave_state; > + mqd->checkpoint_mqd = checkpoint_mqd; > + mqd->restore_mqd = restore_mqd; > #if defined(CONFIG_DEBUG_FS) > mqd->debugfs_show_mqd = debugfs_show_mqd; > #endif > @@ -500,6 +539,8 @@ struct mqd_manager *mqd_manager_init_v11(enum > KFD_MQD_TYPE type, > mqd->update_mqd = update_mqd_sdma; > mqd->destroy_mqd = kfd_destroy_mqd_sdma; > mqd->is_occupied = kfd_is_occupied_sdma; > + mqd->checkpoint_mqd = checkpoint_mqd; > + mqd->restore_mqd = restore_mqd; > mqd->mqd_size = sizeof(struct v11_sdma_mqd); > #if defined(CONFIG_DEBUG_FS) > mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; > -- > 2.34.1 >
Re: [PATCH] drm/amdgpu: access RLC_SPM_MC_CNTL through MMIO in SRIOV
Acked-by: Alex Deucher On Mon, Aug 28, 2023 at 2:50 AM ZhenGuo Yin wrote: > > Register RLC_SPM_MC_CNTL is not blocked by L1 policy, VF can > directly access it through MMIO. > > Signed-off-by: ZhenGuo Yin > --- > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 10 ++ > drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 10 ++ > 2 files changed, 4 insertions(+), 16 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > index 0aee9c8288a2..65619f73f717 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > @@ -7901,18 +7901,12 @@ static void gfx_v10_0_update_spm_vmid_internal(struct > amdgpu_device *adev, > > /* not for *_SOC15 */ > reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL); > - if (amdgpu_sriov_is_pp_one_vf(adev)) > - data = RREG32_NO_KIQ(reg); > - else > - data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL); > + data = RREG32_NO_KIQ(reg); > > data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; > data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << > RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; > > - if (amdgpu_sriov_is_pp_one_vf(adev)) > - WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); > - else > - WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); > + WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); > } > > static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned > int vmid) > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > index b0c32521efdc..7f8c5c6fd36e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > @@ -4989,18 +4989,12 @@ static void gfx_v11_0_update_spm_vmid(struct > amdgpu_device *adev, unsigned vmid) > amdgpu_gfx_off_ctrl(adev, false); > > reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); > - if (amdgpu_sriov_is_pp_one_vf(adev)) > - data = RREG32_NO_KIQ(reg); > - else > - data = RREG32(reg); > + data = RREG32_NO_KIQ(reg); > > data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; > data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << > RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; > > - if (amdgpu_sriov_is_pp_one_vf(adev)) > - WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); > - else > - WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); > + WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); > > amdgpu_gfx_off_ctrl(adev, true); > } > -- > 2.35.1 >
Re: [PATCH] drm/amdkfd: Use partial migrations in GPU page faults
On 2023-08-28 16:57, Chen, Xiaogang wrote: On 8/28/2023 2:06 PM, Felix Kuehling wrote: On 2023-08-24 18:08, Xiaogang.Chen wrote: From: Xiaogang Chen This patch implements partial migration in gpu page fault according to migration granularity(default 2MB) and not split svm range in cpu page fault handling. Now a svm range may have pages from both system ram and vram of one gpu. These chagnes are expected to improve migration performance and reduce mmu callback and TLB flush workloads. Signed-off-by: xiaogang chen --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 153 +++ drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 87 - drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 7 +- 4 files changed, 162 insertions(+), 91 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 7d82c7da223a..5a3aa80a1834 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, * svm_migrate_ram_to_vram - migrate svm range from system to device * @prange: range structure * @best_loc: the device to migrate to + * @start_mgr: start page to migrate + * @last_mgr: last page to migrate * @mm: the process mm structure * @trigger: reason of migration * @@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, */ static int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + unsigned long start_mgr, unsigned long last_mgr, struct mm_struct *mm, uint32_t trigger) { unsigned long addr, start, end; @@ -498,9 +501,9 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, unsigned long cpages = 0; long r = 0; - if (prange->actual_loc == best_loc) { - pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n", - prange->svms, prange->start, prange->last, best_loc); + if (!best_loc) { + pr_debug("request svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n", + prange->svms, start_mgr, last_mgr); return 0; } @@ -513,8 +516,8 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms, prange->start, prange->last, best_loc); - start = prange->start << PAGE_SHIFT; - end = (prange->last + 1) << PAGE_SHIFT; + start = start_mgr << PAGE_SHIFT; + end = (last_mgr + 1) << PAGE_SHIFT; r = svm_range_vram_node_new(node, prange, true); if (r) { @@ -544,10 +547,12 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, if (cpages) { prange->actual_loc = best_loc; - svm_range_free_dma_mappings(prange, true); - } else { + /* only free dma mapping in the migrated range */ + svm_range_free_dma_mappings(prange, true, start_mgr - prange->start, + last_mgr - start_mgr + 1); This is wrong. If we only migrated some of the pages, we should not free the DMA mapping array at all. The array is needed as long as there are any valid DMA mappings in it. yes, I realized it after submit. I can not free DMA mapping array at this stage. The concern(also related to comments below) is I do not know how many pages in vram after partial migration. Originally I used bitmap to record that. I used bitmap to record which pages were migrated at each migration functions. Here I do not need use hmm function to get that info, inside each migration function we can know which pages got migrated, then update the bitmap accordingly inside each migration function. I think the condition above with cpages should be updated. Instead of cpages, we need to keep track of a count of pages in VRAM in struct svm_range. See more below. I think you want add a new integer in svm_range to remember how many pages are in vram side for each svm_range, instead of bitmap. There is a problem I saw: when we need split a prange(such as user uses set_attr api) how do we know how many pages in vram for each splitted prange? Right, that's a bit problematic. But it should be a relatively rare corner case. It may be good enough to make a "pessimistic" assumption when splitting ranges that have some pages in VRAM, that everything is in VRAM. And update that to 0 after migrate_to_ram for the entire range, to allow the BO reference to be released. So in the worst case, you keep your DMA addresses and BOs allocated slightly longer than necessary. If that doesn't work, I agree that we need a bitmap with one bit per 4KB page. But I hope that can be avoided. That said, I'm not actually sure why we're freeing the DMA address array after migration to RAM at all. I think we still need it even
Re: [PATCH] drm/amdkfd: Use partial migrations in GPU page faults
On 8/28/2023 2:06 PM, Felix Kuehling wrote: On 2023-08-24 18:08, Xiaogang.Chen wrote: From: Xiaogang Chen This patch implements partial migration in gpu page fault according to migration granularity(default 2MB) and not split svm range in cpu page fault handling. Now a svm range may have pages from both system ram and vram of one gpu. These chagnes are expected to improve migration performance and reduce mmu callback and TLB flush workloads. Signed-off-by: xiaogang chen --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 153 +++ drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 87 - drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 7 +- 4 files changed, 162 insertions(+), 91 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 7d82c7da223a..5a3aa80a1834 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, * svm_migrate_ram_to_vram - migrate svm range from system to device * @prange: range structure * @best_loc: the device to migrate to + * @start_mgr: start page to migrate + * @last_mgr: last page to migrate * @mm: the process mm structure * @trigger: reason of migration * @@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, */ static int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + unsigned long start_mgr, unsigned long last_mgr, struct mm_struct *mm, uint32_t trigger) { unsigned long addr, start, end; @@ -498,9 +501,9 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, unsigned long cpages = 0; long r = 0; - if (prange->actual_loc == best_loc) { - pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n", - prange->svms, prange->start, prange->last, best_loc); + if (!best_loc) { + pr_debug("request svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n", + prange->svms, start_mgr, last_mgr); return 0; } @@ -513,8 +516,8 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms, prange->start, prange->last, best_loc); - start = prange->start << PAGE_SHIFT; - end = (prange->last + 1) << PAGE_SHIFT; + start = start_mgr << PAGE_SHIFT; + end = (last_mgr + 1) << PAGE_SHIFT; r = svm_range_vram_node_new(node, prange, true); if (r) { @@ -544,10 +547,12 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, if (cpages) { prange->actual_loc = best_loc; - svm_range_free_dma_mappings(prange, true); - } else { + /* only free dma mapping in the migrated range */ + svm_range_free_dma_mappings(prange, true, start_mgr - prange->start, + last_mgr - start_mgr + 1); This is wrong. If we only migrated some of the pages, we should not free the DMA mapping array at all. The array is needed as long as there are any valid DMA mappings in it. yes, I realized it after submit. I can not free DMA mapping array at this stage. The concern(also related to comments below) is I do not know how many pages in vram after partial migration. Originally I used bitmap to record that. I used bitmap to record which pages were migrated at each migration functions. Here I do not need use hmm function to get that info, inside each migration function we can know which pages got migrated, then update the bitmap accordingly inside each migration function. I think the condition above with cpages should be updated. Instead of cpages, we need to keep track of a count of pages in VRAM in struct svm_range. See more below. I think you want add a new integer in svm_range to remember how many pages are in vram side for each svm_range, instead of bitmap. There is a problem I saw: when we need split a prange(such as user uses set_attr api) how do we know how many pages in vram for each splitted prange? + } else if (!prange->actual_loc) + /* if all pages from prange are at sys ram */ svm_range_vram_node_free(prange); - } return r < 0 ? r : 0; } @@ -762,6 +767,8 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, * svm_migrate_vram_to_ram - migrate svm range from device to system * @prange: range structure * @mm: process mm, use current->mm if NULL + * @start_mgr: start page need be migrated to sys ram + * @last_mgr: last page need be migrated to sys ram * @trigger: reason of migration * @fault_page: is from vmf->page, svm_migrate_to_ram(), this is CPU page fault callback * @@ -771,7 +778,8 @@ svm_migrate_vma_to_ram(struct kfd_node *node,
Re: [PATCH v3 0/7] GPU workload hints for better performance
On 28/08/2023 17:14, Yadav, Arvind wrote: On 8/28/2023 9:13 PM, Helen Mae Koike Fornazier wrote: On Monday, August 28, 2023 09:26 -03, Arvind Yadav wrote: AMDGPU SOCs supports dynamic workload based power profiles, which can provide fine-tuned performance for a particular type of workload. This patch series adds an interface to set/reset these power profiles based on the submitted job. The driver can dynamically switch the power profiles based on submitted job. This can optimize the power performance when the particular workload is on. Hi Arvind, Would you mind to test your patchset with drm-ci ? There is a amdgpu test there and I would love to get your feedback of the ci. You basically just need to apply the ci patch which is available on https://cgit.freedesktop.org/drm/drm/log/?h=topic/drm-ci There are instruction on the docs, but in short: to configure it, you push your branch to gitlab.freedesktop.org, go to the settings and change the CI/CD configuration file from .gitlab-ci.yml to drivers/gpu/drm/ci/gitlab-ci.yml, and you can trigger a pipeline on your branch to get tests running. (by the time of this writing, gitlab.fdo is under maintenance but should be up soonish). Hi Helen, I tried the steps as mentioned by you but looks like something is missing and build itself is failing. https://gitlab.freedesktop.org/ArvindYadav/drm-next/-/commits/smu_workload Thanks for your feedback! You need to apply this patch https://gitlab.freedesktop.org/ArvindYadav/drm-next/-/commit/cc6dcff192d07f9fe82645fbc4213c97e872156b This patch adds the file drivers/gpu/drm/ci/gitlab-ci.yml for you. And you can drop the patch where gitlab added the ci template. I replied here too https://gitlab.freedesktop.org/ArvindYadav/drm-next/-/commit/cc6dcff192d07f9fe82645fbc4213c97e872156b Could you try again with that patch? Thanks a lot! Helen Regards, ~Arvind Thank you! Helen v2: - Splitting workload_profile_set and workload_profile_put into two separate patches. - Addressed review comment. - Added new suspend function. - Added patch to switches the GPU workload mode for KFD. v3: - Addressed all review comment. - Changed the function name from *_set() to *_get(). - Now clearing all the profile in work handler. - Added *_clear_all function to clear all the power profile. Arvind Yadav (7): drm/amdgpu: Added init/fini functions for workload drm/amdgpu: Add new function to set GPU power profile drm/amdgpu: Add new function to put GPU power profile drm/amdgpu: Add suspend function to clear the GPU power profile. drm/amdgpu: Set/Reset GPU workload profile drm/amdgpu: switch workload context to/from compute Revert "drm/amd/amdgpu: switch on/off vcn power profile mode" drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 5 + drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 14 +- drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c | 226 ++ drivers/gpu/drm/amd/include/amdgpu_workload.h | 61 + 8 files changed, 309 insertions(+), 16 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c create mode 100644 drivers/gpu/drm/amd/include/amdgpu_workload.h -- 2.34.1
Re: [PATCH v3 0/7] GPU workload hints for better performance
On 8/28/2023 9:13 PM, Helen Mae Koike Fornazier wrote: On Monday, August 28, 2023 09:26 -03, Arvind Yadav wrote: AMDGPU SOCs supports dynamic workload based power profiles, which can provide fine-tuned performance for a particular type of workload. This patch series adds an interface to set/reset these power profiles based on the submitted job. The driver can dynamically switch the power profiles based on submitted job. This can optimize the power performance when the particular workload is on. Hi Arvind, Would you mind to test your patchset with drm-ci ? There is a amdgpu test there and I would love to get your feedback of the ci. You basically just need to apply the ci patch which is available on https://cgit.freedesktop.org/drm/drm/log/?h=topic/drm-ci There are instruction on the docs, but in short: to configure it, you push your branch to gitlab.freedesktop.org, go to the settings and change the CI/CD configuration file from .gitlab-ci.yml to drivers/gpu/drm/ci/gitlab-ci.yml, and you can trigger a pipeline on your branch to get tests running. (by the time of this writing, gitlab.fdo is under maintenance but should be up soonish). Hi Helen, I tried the steps as mentioned by you but looks like something is missing and build itself is failing. https://gitlab.freedesktop.org/ArvindYadav/drm-next/-/commits/smu_workload Regards, ~Arvind Thank you! Helen v2: - Splitting workload_profile_set and workload_profile_put into two separate patches. - Addressed review comment. - Added new suspend function. - Added patch to switches the GPU workload mode for KFD. v3: - Addressed all review comment. - Changed the function name from *_set() to *_get(). - Now clearing all the profile in work handler. - Added *_clear_all function to clear all the power profile. Arvind Yadav (7): drm/amdgpu: Added init/fini functions for workload drm/amdgpu: Add new function to set GPU power profile drm/amdgpu: Add new function to put GPU power profile drm/amdgpu: Add suspend function to clear the GPU power profile. drm/amdgpu: Set/Reset GPU workload profile drm/amdgpu: switch workload context to/from compute Revert "drm/amd/amdgpu: switch on/off vcn power profile mode" drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 6 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 5 + drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 14 +- drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c | 226 ++ drivers/gpu/drm/amd/include/amdgpu_workload.h | 61 + 8 files changed, 309 insertions(+), 16 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c create mode 100644 drivers/gpu/drm/amd/include/amdgpu_workload.h -- 2.34.1
Re: [PATCH] drm/amdkfd: Use partial migrations in GPU page faults
On 2023-08-24 18:08, Xiaogang.Chen wrote: From: Xiaogang Chen This patch implements partial migration in gpu page fault according to migration granularity(default 2MB) and not split svm range in cpu page fault handling. Now a svm range may have pages from both system ram and vram of one gpu. These chagnes are expected to improve migration performance and reduce mmu callback and TLB flush workloads. Signed-off-by: xiaogang chen --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 153 +++ drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 87 - drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 7 +- 4 files changed, 162 insertions(+), 91 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 7d82c7da223a..5a3aa80a1834 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, * svm_migrate_ram_to_vram - migrate svm range from system to device * @prange: range structure * @best_loc: the device to migrate to + * @start_mgr: start page to migrate + * @last_mgr: last page to migrate * @mm: the process mm structure * @trigger: reason of migration * @@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, */ static int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + unsigned long start_mgr, unsigned long last_mgr, struct mm_struct *mm, uint32_t trigger) { unsigned long addr, start, end; @@ -498,9 +501,9 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, unsigned long cpages = 0; long r = 0; - if (prange->actual_loc == best_loc) { - pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n", -prange->svms, prange->start, prange->last, best_loc); + if (!best_loc) { + pr_debug("request svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n", +prange->svms, start_mgr, last_mgr); return 0; } @@ -513,8 +516,8 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms, prange->start, prange->last, best_loc); - start = prange->start << PAGE_SHIFT; - end = (prange->last + 1) << PAGE_SHIFT; + start = start_mgr << PAGE_SHIFT; + end = (last_mgr + 1) << PAGE_SHIFT; r = svm_range_vram_node_new(node, prange, true); if (r) { @@ -544,10 +547,12 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, if (cpages) { prange->actual_loc = best_loc; - svm_range_free_dma_mappings(prange, true); - } else { + /* only free dma mapping in the migrated range */ + svm_range_free_dma_mappings(prange, true, start_mgr - prange->start, +last_mgr - start_mgr + 1); This is wrong. If we only migrated some of the pages, we should not free the DMA mapping array at all. The array is needed as long as there are any valid DMA mappings in it. I think the condition above with cpages should be updated. Instead of cpages, we need to keep track of a count of pages in VRAM in struct svm_range. See more below. + } else if (!prange->actual_loc) + /* if all pages from prange are at sys ram */ svm_range_vram_node_free(prange); - } return r < 0 ? r : 0; } @@ -762,6 +767,8 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, * svm_migrate_vram_to_ram - migrate svm range from device to system * @prange: range structure * @mm: process mm, use current->mm if NULL + * @start_mgr: start page need be migrated to sys ram + * @last_mgr: last page need be migrated to sys ram * @trigger: reason of migration * @fault_page: is from vmf->page, svm_migrate_to_ram(), this is CPU page fault callback * @@ -771,7 +778,8 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, * 0 - OK, otherwise error code */ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, - uint32_t trigger, struct page *fault_page) + unsigned long start_mgr, unsigned long last_mgr, + uint32_t trigger, struct page *fault_page) { struct kfd_node *node; struct vm_area_struct *vma; @@ -781,23 +789,30 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, unsigned long upages = 0; long r = 0; + /* this pragne has no any vram page to migrate to sys ram */ if
Re: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 onwards APUs
[AMD Official Use Only - General] Technically the AMD IOMMU uses direct mapping mode for any device which claims to support ATS in order to support the IOMMUv2 functionality, but that was also the case with Raven systems which were problematic when remapping mode was enabled. That said, now that IOMMUv2 support has been removed, there's no reason to use direct mapping in the IOMMU driver so that may change. Alex From: Koenig, Christian Sent: Monday, August 28, 2023 7:30 AM To: Zhang, Yifan ; Christian König ; amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander Subject: Re: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 onwards APUs Well, there seems to be a very basic misunderstood here: The IOMMU isolation level is *not* ASIC dependent! Try to set amd_iommu=force_isolation on the kernel command line. This is a configuration option customers can use to harden their systems and when this isn't properly tested we can't allow page tables in system memory. Regards, Christian. Am 28.08.23 um 13:23 schrieb Zhang, Yifan: > [Public] > > Not yet. It will be only enabled for gfx10.3.3 and later APU initially, IOMMU > is pass through in these ASIC. > > -Original Message- > From: Christian König > Sent: Monday, August 28, 2023 5:41 PM > To: Zhang, Yifan ; amd-gfx@lists.freedesktop.org > Cc: Deucher, Alexander ; Koenig, Christian > > Subject: Re: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for > gfx10 onwards APUs > > Is that now validated with IOMMU in non pass through mode? > > Christian. > > Am 28.08.23 um 10:58 schrieb Zhang, Yifan: >> [AMD Official Use Only - General] >> >> Ping >> >> -Original Message- >> From: Zhang, Yifan >> Sent: Friday, August 25, 2023 8:34 AM >> To: amd-gfx@lists.freedesktop.org >> Cc: Deucher, Alexander ; Koenig, Christian >> ; Zhang, Yifan >> Subject: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 >> onwards APUs >> >> To decrease VRAM pressure for APUs, put page tables to GTT domain for gfx10 >> and newer APUs. >> >> v2: only enable it for gfx10 and newer APUs (Alex, Christian) >> >> Signed-off-by: Yifan Zhang >> --- >>drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 9 ++--- >>1 file changed, 6 insertions(+), 3 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c >> index 96d601e209b8..4603d87c61a0 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c >> @@ -515,10 +515,13 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, >> struct amdgpu_vm *vm, >> bp.size = amdgpu_vm_pt_size(adev, level); >> bp.byte_align = AMDGPU_GPU_PAGE_SIZE; >> >> - if (!adev->gmc.is_app_apu) >> - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; >> - else >> + if (adev->gmc.is_app_apu || >> + ((adev->flags & AMD_IS_APU) && >> + (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 3 >> bp.domain = AMDGPU_GEM_DOMAIN_GTT; >> + else >> + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; >> + >> >> bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); >> bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | >> -- >> 2.37.3 >>
Re: [PATCH] drm/amdkfd: Add missing gfx11 MQD manager callbacks
On 2023-08-25 17:30, Harish Kasiviswanathan wrote: From: Jay Cornwall mqd_stride function was introduced in commit 129c7b6a0217 ("drm/amdkfd: Update MQD management on multi XCC setup") but not assigned for gfx11. Fixes a NULL dereference in debugfs. Signed-off-by: Jay Cornwall Signed-off-by: Harish Kasiviswanathan Reviewed-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 2319467d2d95..0bbf0edbabd4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -457,6 +457,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type, mqd->is_occupied = kfd_is_occupied_cp; mqd->mqd_size = sizeof(struct v11_compute_mqd); mqd->get_wave_state = get_wave_state; + mqd->mqd_stride = kfd_mqd_stride; #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif @@ -472,6 +473,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type, mqd->destroy_mqd = destroy_hiq_mqd; mqd->is_occupied = kfd_is_occupied_cp; mqd->mqd_size = sizeof(struct v11_compute_mqd); + mqd->mqd_stride = kfd_mqd_stride; #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif @@ -501,6 +503,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type, mqd->destroy_mqd = kfd_destroy_mqd_sdma; mqd->is_occupied = kfd_is_occupied_sdma; mqd->mqd_size = sizeof(struct v11_sdma_mqd); + mqd->mqd_stride = kfd_mqd_stride; #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; #endif
Re: [PATCH] drm/amdkfd: use mask to get v9 interrupt sq data bits correctly
On 2023-08-28 11:35, Alex Sierra wrote: Interrupt sq data bits were not taken properly from contextid0 and contextid1. Use macro KFD_CONTEXT_ID_GET_SQ_INT_DATA instead. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index f0731a6a5306..830396b1c3b1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -384,7 +384,7 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, default: break; } - kfd_signal_event_interrupt(pasid, context_id0 & 0xff, 24); + kfd_signal_event_interrupt(pasid, sq_int_data, 24); } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) { kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_DEBUG_DOORBELL_ID(context_id0),
Re: [PATCH v2] drm/amdkfd: Replace pr_err with dev_err
On 2023-08-26 09:41, Asad Kamal wrote: Replace pr_err with dev_err to show the bus-id of failing device with kfd queue errors Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Reviewed-by: Felix Kuehling --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 116 +++--- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- 2 files changed, 71 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index b166f30f083e..cd6cfffd6436 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -232,8 +232,8 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, queue_type = convert_to_mes_queue_type(q->properties.type); if (queue_type < 0) { - pr_err("Queue type not supported with MES, queue:%d\n", - q->properties.type); + dev_err(adev->dev, "Queue type not supported with MES, queue:%d\n", + q->properties.type); return -EINVAL; } queue_input.queue_type = (uint32_t)queue_type; @@ -244,9 +244,9 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, r = adev->mes.funcs->add_hw_queue(>mes, _input); amdgpu_mes_unlock(>mes); if (r) { - pr_err("failed to add hardware queue to MES, doorbell=0x%x\n", + dev_err(adev->dev, "failed to add hardware queue to MES, doorbell=0x%x\n", q->properties.doorbell_off); - pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); + dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); kfd_hws_hang(dqm); } @@ -272,9 +272,9 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q, amdgpu_mes_unlock(>mes); if (r) { - pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n", + dev_err(adev->dev, "failed to remove hardware queue from MES, doorbell=0x%x\n", q->properties.doorbell_off); - pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); + dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); kfd_hws_hang(dqm); } @@ -284,6 +284,7 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q, static int remove_all_queues_mes(struct device_queue_manager *dqm) { struct device_process_node *cur; + struct device *dev = dqm->dev->adev->dev; struct qcm_process_device *qpd; struct queue *q; int retval = 0; @@ -294,7 +295,7 @@ static int remove_all_queues_mes(struct device_queue_manager *dqm) if (q->properties.is_active) { retval = remove_queue_mes(dqm, q, qpd); if (retval) { - pr_err("%s: Failed to remove queue %d for dev %d", + dev_err(dev, "%s: Failed to remove queue %d for dev %d", __func__, q->properties.queue_id, dqm->dev->id); @@ -443,6 +444,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) { + struct device *dev = dqm->dev->adev->dev; int allocated_vmid = -1, i; for (i = dqm->dev->vm_info.first_vmid_kfd; @@ -454,7 +456,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, } if (allocated_vmid < 0) { - pr_err("no more vmid to allocate\n"); + dev_err(dev, "no more vmid to allocate\n"); return -ENOSPC; } @@ -510,10 +512,12 @@ static void deallocate_vmid(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) { + struct device *dev = dqm->dev->adev->dev; + /* On GFX v7, CP doesn't flush TC at dequeue */ if (q->device->adev->asic_type == CHIP_HAWAII) if (flush_texture_cache_nocpsch(q->device, qpd)) - pr_err("Failed to flush TC\n"); + dev_err(dev, "Failed to flush TC\n"); kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); @@ -708,7 +712,7 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process pr_debug("Killing all process wavefronts\n"); if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) { - pr_err("no vmid pasid mapping supported \n"); +
Re: [PATCH (set 1) 00/20] Rid W=1 warnings from GPU
On 8/24/23 14:07, Lee Jones wrote: > On Thu, 24 Aug 2023, Jani Nikula wrote: >> On Thu, 24 Aug 2023, Lee Jones wrote: >>> This set is part of a larger effort attempting to clean-up W=1 >>> kernel builds, which are currently overwhelmingly riddled with >>> niggly little warnings. >> >> The next question is, how do we keep it W=1 clean going forward? > > My plan was to fix them all, then move each warning to W=0. > > Arnd recently submitted a set doing just that for a bunch of them. > > https://lore.kernel.org/all/20230811140327.3754597-1-a...@kernel.org/ > > I like to think a bunch of this is built on top of my previous efforts. > > GPU is a particularly tricky though - the warnings seem to come in faster > than I can squash them. Maybe the maintainers can find a way to test > new patches on merge? One approach for this which has proved effective in Mesa and other projects is to make warnings fatal in CI which must pass for any changes to be merged. There is ongoing work toward introducing this for the DRM subsystem, using gitlab.freedesktop.org CI. -- Earthling Michel Dänzer| https://redhat.com Libre software enthusiast | Mesa and Xwayland developer
Re: [PATCH v3 0/7] GPU workload hints for better performance
On Monday, August 28, 2023 09:26 -03, Arvind Yadav wrote: > AMDGPU SOCs supports dynamic workload based power profiles, which can > provide fine-tuned performance for a particular type of workload. > This patch series adds an interface to set/reset these power profiles > based on the submitted job. The driver can dynamically switch > the power profiles based on submitted job. This can optimize the power > performance when the particular workload is on. Hi Arvind, Would you mind to test your patchset with drm-ci ? There is a amdgpu test there and I would love to get your feedback of the ci. You basically just need to apply the ci patch which is available on https://cgit.freedesktop.org/drm/drm/log/?h=topic/drm-ci There are instruction on the docs, but in short: to configure it, you push your branch to gitlab.freedesktop.org, go to the settings and change the CI/CD configuration file from .gitlab-ci.yml to drivers/gpu/drm/ci/gitlab-ci.yml, and you can trigger a pipeline on your branch to get tests running. (by the time of this writing, gitlab.fdo is under maintenance but should be up soonish). Thank you! Helen > > v2: > - Splitting workload_profile_set and workload_profile_put > into two separate patches. > - Addressed review comment. > - Added new suspend function. > - Added patch to switches the GPU workload mode for KFD. > > v3: > - Addressed all review comment. > - Changed the function name from *_set() to *_get(). > - Now clearing all the profile in work handler. > - Added *_clear_all function to clear all the power profile. > > > Arvind Yadav (7): > drm/amdgpu: Added init/fini functions for workload > drm/amdgpu: Add new function to set GPU power profile > drm/amdgpu: Add new function to put GPU power profile > drm/amdgpu: Add suspend function to clear the GPU power profile. > drm/amdgpu: Set/Reset GPU workload profile > drm/amdgpu: switch workload context to/from compute > Revert "drm/amd/amdgpu: switch on/off vcn power profile mode" > > drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 + > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 8 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 6 + > drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 5 + > drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 14 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c | 226 ++ > drivers/gpu/drm/amd/include/amdgpu_workload.h | 61 + > 8 files changed, 309 insertions(+), 16 deletions(-) > create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c > create mode 100644 drivers/gpu/drm/amd/include/amdgpu_workload.h > > -- > 2.34.1 >
[PATCH] drm/amdkfd: use mask to get v9 interrupt sq data bits correctly
Interrupt sq data bits were not taken properly from contextid0 and contextid1. Use macro KFD_CONTEXT_ID_GET_SQ_INT_DATA instead. Signed-off-by: Alex Sierra --- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index f0731a6a5306..830396b1c3b1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -384,7 +384,7 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, default: break; } - kfd_signal_event_interrupt(pasid, context_id0 & 0xff, 24); + kfd_signal_event_interrupt(pasid, sq_int_data, 24); } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) { kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_DEBUG_DOORBELL_ID(context_id0), -- 2.32.0
Re: [PATCH] drm/amdkfd: ratelimited SQ interrupt messages
On 2023-08-10 12:13, Harish Kasiviswanathan wrote: No functional change. Use ratelimited version of pr_ to avoid overflowing of dmesg buffer Signed-off-by: Harish Kasiviswanathan Reviewed-by: Philip Yang --- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c | 6 +++--- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c | 6 +++--- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c index c7991e07b6be..a7697ec8188e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c @@ -268,7 +268,7 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING); switch (encoding) { case SQ_INTERRUPT_WORD_ENCODING_AUTO: -pr_debug( +pr_debug_ratelimited( "sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf0_full %d, ttrac_buf1_full %d, ttrace_utc_err %d\n", REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_AUTO_CTXID1, SE_ID), @@ -284,7 +284,7 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, THREAD_TRACE_UTC_ERROR)); break; case SQ_INTERRUPT_WORD_ENCODING_INST: -pr_debug("sq_intr: inst, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n", +pr_debug_ratelimited("sq_intr: inst, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n", REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, SE_ID), REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, @@ -310,7 +310,7 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, case SQ_INTERRUPT_WORD_ENCODING_ERROR: sq_intr_err_type = REG_GET_FIELD(context_id0, KFD_CTXID0, ERR_TYPE); -pr_warn("sq_intr: error, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d, err_type %d\n", +pr_warn_ratelimited("sq_intr: error, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d, err_type %d\n", REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, SE_ID), REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c index f933bd231fb9..2a65792fd116 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c @@ -150,7 +150,7 @@ enum SQ_INTERRUPT_ERROR_TYPE { static void print_sq_intr_info_auto(uint32_t context_id0, uint32_t context_id1) { - pr_debug( + pr_debug_ratelimited( "sq_intr: auto, ttrace %d, wlt %d, ttrace_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n", REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE), REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, WLT), @@ -165,7 +165,7 @@ static void print_sq_intr_info_auto(uint32_t context_id0, uint32_t context_id1) static void print_sq_intr_info_inst(uint32_t context_id0, uint32_t context_id1) { - pr_debug( + pr_debug_ratelimited( "sq_intr: inst, data 0x%08x, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n", REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, DATA), REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, SH_ID), @@ -177,7 +177,7 @@ static void print_sq_intr_info_inst(uint32_t context_id0, uint32_t context_id1) static void print_sq_intr_info_error(uint32_t context_id0, uint32_t context_id1) { - pr_warn( + pr_warn_ratelimited( "sq_intr: error, detail 0x%08x, type %d, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n", REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, DETAIL), REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE), diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index f0731a6a5306..02695ccd22d6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -333,7 +333,7 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, encoding = REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, ENCODING); switch (encoding) { case SQ_INTERRUPT_WORD_ENCODING_AUTO: -pr_debug( +pr_debug_ratelimited( "sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n", REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, SE_ID), REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE), @@ -347,7 +347,7 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_UTC_ERROR)); break; case SQ_INTERRUPT_WORD_ENCODING_INST: -
Re: [PATCH v3 0/7] GPU workload hints for better performance
[AMD Official Use Only - General] As mentioned with an older version of this series, this is an 'abuse' of power profile interface. This series is oversimplifying what PMFW algorithms are supposed to be doing. Whatever this series is doing, FW can do it better. To explain in simpler terms - it just tries to boost a profile based on ring type without even knowing how much of activity a job can trigger on a particular ring. A job scheduled to a GFX ring doesn't deserve a profile boost unless it can create a certain level of activity. In CPU terms, a job scheduled to a processor doesn't mean it deserves a frequency boost of that CPU. At minimum it depends on more details like whether that job is compute bound or memory bound or memory bound. While FW algorithms are designed to do that, this series tries to trivialise all such things. Unless you are able to show the tangible benefits in some terms like performance, power, or performance per watt, I don't think this should be the default behaviour where driver tries to override FW just based on job submissions to rings. Thanks, Lijo From: amd-gfx on behalf of Arvind Yadav Sent: Monday, August 28, 2023 5:56:07 PM To: Koenig, Christian ; Deucher, Alexander ; Sharma, Shashank ; Pan, Xinhui ; airl...@gmail.com ; dan...@ffwll.ch ; Kuehling, Felix ; amd-gfx@lists.freedesktop.org Cc: Yadav, Arvind ; linux-ker...@vger.kernel.org ; dri-de...@lists.freedesktop.org Subject: [PATCH v3 0/7] GPU workload hints for better performance AMDGPU SOCs supports dynamic workload based power profiles, which can provide fine-tuned performance for a particular type of workload. This patch series adds an interface to set/reset these power profiles based on the submitted job. The driver can dynamically switch the power profiles based on submitted job. This can optimize the power performance when the particular workload is on. v2: - Splitting workload_profile_set and workload_profile_put into two separate patches. - Addressed review comment. - Added new suspend function. - Added patch to switches the GPU workload mode for KFD. v3: - Addressed all review comment. - Changed the function name from *_set() to *_get(). - Now clearing all the profile in work handler. - Added *_clear_all function to clear all the power profile. Arvind Yadav (7): drm/amdgpu: Added init/fini functions for workload drm/amdgpu: Add new function to set GPU power profile drm/amdgpu: Add new function to put GPU power profile drm/amdgpu: Add suspend function to clear the GPU power profile. drm/amdgpu: Set/Reset GPU workload profile drm/amdgpu: switch workload context to/from compute Revert "drm/amd/amdgpu: switch on/off vcn power profile mode" drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 6 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 5 + drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 14 +- drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c | 226 ++ drivers/gpu/drm/amd/include/amdgpu_workload.h | 61 + 8 files changed, 309 insertions(+), 16 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c create mode 100644 drivers/gpu/drm/amd/include/amdgpu_workload.h -- 2.34.1
RE: [PATCH] drm/amdkfd: Checkpoint and restore queues on GFX11
[AMD Official Use Only - General] Reviewed-by: Harish Kasiviswanathan -Original Message- From: amd-gfx On Behalf Of David Francis Sent: Friday, August 25, 2023 3:14 PM To: amd-gfx@lists.freedesktop.org Cc: Francis, David ; Kuehling, Felix Subject: [PATCH] drm/amdkfd: Checkpoint and restore queues on GFX11 The code in kfd_mqd_manager_v11.c to support criu dump and restore of queue state was missing. Added it; should be equivalent to kfd_mqd_manager_v10.c. CC: Felix Kuehling Signed-off-by: David Francis --- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 41 +++ 1 file changed, 41 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 2319467d2d95..2a79d37da95d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -321,6 +321,43 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd, return 0; } +static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst) +{ + struct v11_compute_mqd *m; + + m = get_mqd(mqd); + + memcpy(mqd_dst, m, sizeof(struct v11_compute_mqd)); +} + +static void restore_mqd(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *qp, + const void *mqd_src, + const void *ctl_stack_src, const u32 ctl_stack_size) +{ + uint64_t addr; + struct v11_compute_mqd *m; + + m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr; + addr = mqd_mem_obj->gpu_addr; + + memcpy(m, mqd_src, sizeof(*m)); + + *mqd = m; + if (gart_addr) + *gart_addr = addr; + + m->cp_hqd_pq_doorbell_control = + qp->doorbell_off << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + pr_debug("cp_hqd_pq_doorbell_control 0x%x\n", + m->cp_hqd_pq_doorbell_control); + + qp->is_active = 0; +} + + static void init_mqd_hiq(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -457,6 +494,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type, mqd->is_occupied = kfd_is_occupied_cp; mqd->mqd_size = sizeof(struct v11_compute_mqd); mqd->get_wave_state = get_wave_state; + mqd->checkpoint_mqd = checkpoint_mqd; + mqd->restore_mqd = restore_mqd; #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif @@ -500,6 +539,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_sdma; mqd->destroy_mqd = kfd_destroy_mqd_sdma; mqd->is_occupied = kfd_is_occupied_sdma; + mqd->checkpoint_mqd = checkpoint_mqd; + mqd->restore_mqd = restore_mqd; mqd->mqd_size = sizeof(struct v11_sdma_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; -- 2.34.1
Re: [PATCH v2 19/34] drm/amd/display: decouple steps for mapping CRTC degamma to DC plane
On 08/28, Pekka Paalanen wrote: > On Mon, 28 Aug 2023 09:45:44 +0100 > Joshua Ashton wrote: > > > Degamma has always been on the plane on AMD. CRTC DEGAMMA_LUT has actually > > just been applying it to every plane pre-blend. > > I've never seen that documented anywhere. > > It has seemed obvious, that since we have KMS objects for planes and > CRTCs, stuff on the CRTC does not do plane stuff before blending. That > also has not been documented in the past, but it seemed the most > logical choice. > > Even today > https://dri.freedesktop.org/docs/drm/gpu/drm-kms.html#color-management-properties > make no mention of whether they apply before or after blending. It's mentioned in the next section: https://dri.freedesktop.org/docs/drm/gpu/amdgpu/display/display-manager.html#dc-color-capabilities-between-dcn-generations In hindsight, maybe it isn't the best place... > > > Degamma makes no sense after blending anyway. > > If the goal is to allow blending in optical or other space, you are > correct. However, APIs do not need to make sense to exist, like most of > the options of "Colorspace" connector property. > > I have always thought the CRTC DEGAMMA only exists to allow the CRTC > CTM to work in linear or other space. > > I have at times been puzzled by what the DEGAMMA and CTM are actually > good for. > > > The entire point is for it to happen before blending to blend in linear > > space. Otherwise DEGAMMA_LUT and REGAMMA_LUT are the exact same thing... > > The CRTC CTM is between CRTC DEGAMMA and CRTC GAMMA, meaning they are > not interchangeable. > > I have literally believed that DRM KMS UAPI simply does not support > blending in optical space, unless your framebuffers are in optical > which no-one does, until the color management properties are added to > KMS planes. This never even seemed weird, because non-linear blending > is so common. > > So I have been misunderstanding the CRTC DEGAMMA property forever. Am I > the only one? Do all drivers agree today at what point does CRTC > DEGAMMA apply, before blending on all planes or after blending? > I'd like to know current userspace cases on Linux of this CRTC DEGAMMA LUT. > Does anyone know of any doc about that? From what I retrieved about the introduction of CRTC color props[1], it seems the main concern at that point was getting a linear space for CTM[2] and CRTC degamma property seems to have followed intel requirements, but didn't find anything about the blending space. AFAIU, we have just interpreted that all CRTC color properties for DRM interface are after blending[3]. Can this be seen in another way? [1] https://patchwork.freedesktop.org/series/2720/ [2] https://codereview.chromium.org/1182063002 [3] https://dri.freedesktop.org/docs/drm/_images/dcn3_cm_drm_current.svg > > If drivers do not agree on the behaviour of a KMS property, then that > property is useless for generic userspace. > > > Thanks, > pq > > > > On Tuesday, 22 August 2023, Pekka Paalanen > > wrote: > > > On Thu, 10 Aug 2023 15:02:59 -0100 > > > Melissa Wen wrote: > > > > > >> The next patch adds pre-blending degamma to AMD color mgmt pipeline, but > > >> pre-blending degamma caps (DPP) is currently in use to provide DRM CRTC > > >> atomic degamma or implict degamma on legacy gamma. Detach degamma usage > > >> regarging CRTC color properties to manage plane and CRTC color > > >> correction combinations. > > >> > > >> Reviewed-by: Harry Wentland > > >> Signed-off-by: Melissa Wen > > >> --- > > >> .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 59 +-- > > >> 1 file changed, 41 insertions(+), 18 deletions(-) > > >> > > >> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c > > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c > > >> index 68e9f2c62f2e..74eb02655d96 100644 > > >> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c > > >> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c > > >> @@ -764,20 +764,9 @@ int amdgpu_dm_update_crtc_color_mgmt(struct > > dm_crtc_state *crtc) > > >> return 0; > > >> } > > >> > > >> -/** > > >> - * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC > > plane. > > >> - * @crtc: amdgpu_dm crtc state > > >> - * @dc_plane_state: target DC surface > > >> - * > > >> - * Update the underlying dc_stream_state's input transfer function > > (ITF) in > > >> - * preparation for hardware commit. The transfer function used depends > > on > > >> - * the preparation done on the stream for color management. > > >> - * > > >> - * Returns: > > >> - * 0 on success. -ENOMEM if mem allocation fails. > > >> - */ > > >> -int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, > > >> - struct dc_plane_state > > *dc_plane_state) > > >> +static int > > >> +map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc, > > >> + struct dc_plane_state *dc_plane_state) > > >> { > > >>
Re: [PATCH] drm/amdkfd: Add missing gfx11 MQD manager callbacks
[AMD Official Use Only - General] Acked-by: Alex Deucher From: amd-gfx on behalf of Harish Kasiviswanathan Sent: Friday, August 25, 2023 5:30 PM To: amd-gfx@lists.freedesktop.org Cc: Cornwall, Jay ; Kasiviswanathan, Harish Subject: [PATCH] drm/amdkfd: Add missing gfx11 MQD manager callbacks From: Jay Cornwall mqd_stride function was introduced in commit 129c7b6a0217 ("drm/amdkfd: Update MQD management on multi XCC setup") but not assigned for gfx11. Fixes a NULL dereference in debugfs. Signed-off-by: Jay Cornwall Signed-off-by: Harish Kasiviswanathan --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 2319467d2d95..0bbf0edbabd4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -457,6 +457,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type, mqd->is_occupied = kfd_is_occupied_cp; mqd->mqd_size = sizeof(struct v11_compute_mqd); mqd->get_wave_state = get_wave_state; + mqd->mqd_stride = kfd_mqd_stride; #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif @@ -472,6 +473,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type, mqd->destroy_mqd = destroy_hiq_mqd; mqd->is_occupied = kfd_is_occupied_cp; mqd->mqd_size = sizeof(struct v11_compute_mqd); + mqd->mqd_stride = kfd_mqd_stride; #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif @@ -501,6 +503,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type, mqd->destroy_mqd = kfd_destroy_mqd_sdma; mqd->is_occupied = kfd_is_occupied_sdma; mqd->mqd_size = sizeof(struct v11_sdma_mqd); + mqd->mqd_stride = kfd_mqd_stride; #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; #endif -- 2.34.1
Re: [PATCH v2 31/34] drm/amd/display: set stream gamut remap matrix to MPC for DCN301
On Fri, 25 Aug 2023 13:37:08 -0100 Melissa Wen wrote: > On 08/22, Pekka Paalanen wrote: > > On Thu, 10 Aug 2023 15:03:11 -0100 > > Melissa Wen wrote: > > > > > dc->caps.color.mpc.gamut_remap says there is a post-blending color block > > > for gamut remap matrix for DCN3 HW family and newer versions. However, > > > those drivers still follow DCN10 programming that remap stream > > > gamut_remap_matrix to DPP (pre-blending). > > > > That's ok only as long as CRTC degamma is pass-through. Blending itself > > is a linear operation, so it doesn't matter if a matrix is applied to > > the blending result or to all blending inputs. But you cannot move a > > matrix operation to the other side of a non-linear operation, and you > > cannot move a non-linear operation across blending. > > Oh, I'm not moving it, what I'm doing here is the opposite and fixing > it. This patch puts each pre- and post-blending CTM in their right > place, since we have the HW caps for it on DCN3+... Or are you just > pointing out the implementation mistake on old driver versions? It's just the old mistake. I hope no-one complains, forcing you to revert this fix as a regression. Thanks, pq > > > To enable pre-blending and post-blending gamut_remap matrix supports at > > > the same time, set stream gamut_remap to MPC and plane gamut_remap to > > > DPP for DCN301 that support both. > > > > > > It was tested using IGT KMS color tests for DRM CRTC CTM property and it > > > preserves test results. > > > > > > Signed-off-by: Melissa Wen > > > --- > > > .../drm/amd/display/dc/dcn30/dcn30_hwseq.c| 37 +++ > > > .../drm/amd/display/dc/dcn30/dcn30_hwseq.h| 3 ++ > > > .../drm/amd/display/dc/dcn301/dcn301_init.c | 2 +- > > > 3 files changed, 41 insertions(+), 1 deletion(-) pgp46GyGJt3wv.pgp Description: OpenPGP digital signature
Re: [PATCH v2 19/34] drm/amd/display: decouple steps for mapping CRTC degamma to DC plane
On Mon, 28 Aug 2023 09:45:44 +0100 Joshua Ashton wrote: > Degamma has always been on the plane on AMD. CRTC DEGAMMA_LUT has actually > just been applying it to every plane pre-blend. I've never seen that documented anywhere. It has seemed obvious, that since we have KMS objects for planes and CRTCs, stuff on the CRTC does not do plane stuff before blending. That also has not been documented in the past, but it seemed the most logical choice. Even today https://dri.freedesktop.org/docs/drm/gpu/drm-kms.html#color-management-properties make no mention of whether they apply before or after blending. > Degamma makes no sense after blending anyway. If the goal is to allow blending in optical or other space, you are correct. However, APIs do not need to make sense to exist, like most of the options of "Colorspace" connector property. I have always thought the CRTC DEGAMMA only exists to allow the CRTC CTM to work in linear or other space. I have at times been puzzled by what the DEGAMMA and CTM are actually good for. > The entire point is for it to happen before blending to blend in linear > space. Otherwise DEGAMMA_LUT and REGAMMA_LUT are the exact same thing... The CRTC CTM is between CRTC DEGAMMA and CRTC GAMMA, meaning they are not interchangeable. I have literally believed that DRM KMS UAPI simply does not support blending in optical space, unless your framebuffers are in optical which no-one does, until the color management properties are added to KMS planes. This never even seemed weird, because non-linear blending is so common. So I have been misunderstanding the CRTC DEGAMMA property forever. Am I the only one? Do all drivers agree today at what point does CRTC DEGAMMA apply, before blending on all planes or after blending? Does anyone know of any doc about that? If drivers do not agree on the behaviour of a KMS property, then that property is useless for generic userspace. Thanks, pq > On Tuesday, 22 August 2023, Pekka Paalanen > wrote: > > On Thu, 10 Aug 2023 15:02:59 -0100 > > Melissa Wen wrote: > > > >> The next patch adds pre-blending degamma to AMD color mgmt pipeline, but > >> pre-blending degamma caps (DPP) is currently in use to provide DRM CRTC > >> atomic degamma or implict degamma on legacy gamma. Detach degamma usage > >> regarging CRTC color properties to manage plane and CRTC color > >> correction combinations. > >> > >> Reviewed-by: Harry Wentland > >> Signed-off-by: Melissa Wen > >> --- > >> .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 59 +-- > >> 1 file changed, 41 insertions(+), 18 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c > >> index 68e9f2c62f2e..74eb02655d96 100644 > >> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c > >> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c > >> @@ -764,20 +764,9 @@ int amdgpu_dm_update_crtc_color_mgmt(struct > dm_crtc_state *crtc) > >> return 0; > >> } > >> > >> -/** > >> - * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC > plane. > >> - * @crtc: amdgpu_dm crtc state > >> - * @dc_plane_state: target DC surface > >> - * > >> - * Update the underlying dc_stream_state's input transfer function > (ITF) in > >> - * preparation for hardware commit. The transfer function used depends > on > >> - * the preparation done on the stream for color management. > >> - * > >> - * Returns: > >> - * 0 on success. -ENOMEM if mem allocation fails. > >> - */ > >> -int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, > >> - struct dc_plane_state > *dc_plane_state) > >> +static int > >> +map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc, > >> + struct dc_plane_state *dc_plane_state) > >> { > >> const struct drm_color_lut *degamma_lut; > >> enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB; > >> @@ -800,8 +789,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct > dm_crtc_state *crtc, > >>_size); > >> ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES); > >> > >> - dc_plane_state->in_transfer_func->type = > >> - TF_TYPE_DISTRIBUTED_POINTS; > >> + dc_plane_state->in_transfer_func->type = > TF_TYPE_DISTRIBUTED_POINTS; > >> > >> /* > >>* This case isn't fully correct, but also fairly > >> @@ -837,7 +825,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct > dm_crtc_state *crtc, > >> degamma_lut, degamma_size); > >> if (r) > >> return r; > >> - } else if (crtc->cm_is_degamma_srgb) { > >> + } else { > >> /* > >>* For legacy gamma support we need the regamma input > >>* in linear space. Assume that the input is
Re: [PATCH v2 19/34] drm/amd/display: decouple steps for mapping CRTC degamma to DC plane
On Fri, 25 Aug 2023 13:29:44 -0100 Melissa Wen wrote: > On 08/22, Pekka Paalanen wrote: > > On Thu, 10 Aug 2023 15:02:59 -0100 > > Melissa Wen wrote: > > > > > The next patch adds pre-blending degamma to AMD color mgmt pipeline, but > > > pre-blending degamma caps (DPP) is currently in use to provide DRM CRTC > > > atomic degamma or implict degamma on legacy gamma. Detach degamma usage > > > regarging CRTC color properties to manage plane and CRTC color > > > correction combinations. > > > > > > Reviewed-by: Harry Wentland > > > Signed-off-by: Melissa Wen > > > --- > > > .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 59 +-- > > > 1 file changed, 41 insertions(+), 18 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c > > > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c > > > index 68e9f2c62f2e..74eb02655d96 100644 > > > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c > > > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c > > > @@ -764,20 +764,9 @@ int amdgpu_dm_update_crtc_color_mgmt(struct > > > dm_crtc_state *crtc) > > > return 0; > > > } > > > > > > -/** > > > - * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC > > > plane. > > > - * @crtc: amdgpu_dm crtc state > > > - * @dc_plane_state: target DC surface > > > - * > > > - * Update the underlying dc_stream_state's input transfer function (ITF) > > > in > > > - * preparation for hardware commit. The transfer function used depends on > > > - * the preparation done on the stream for color management. > > > - * > > > - * Returns: > > > - * 0 on success. -ENOMEM if mem allocation fails. > > > - */ > > > -int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, > > > - struct dc_plane_state *dc_plane_state) > > > +static int > > > +map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc, > > > + struct dc_plane_state *dc_plane_state) > > > { > > > const struct drm_color_lut *degamma_lut; > > > enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB; > > > @@ -800,8 +789,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct > > > dm_crtc_state *crtc, > > >_size); > > > ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES); > > > > > > - dc_plane_state->in_transfer_func->type = > > > - TF_TYPE_DISTRIBUTED_POINTS; > > > + dc_plane_state->in_transfer_func->type = > > > TF_TYPE_DISTRIBUTED_POINTS; > > > > > > /* > > >* This case isn't fully correct, but also fairly > > > @@ -837,7 +825,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct > > > dm_crtc_state *crtc, > > > degamma_lut, degamma_size); > > > if (r) > > > return r; > > > - } else if (crtc->cm_is_degamma_srgb) { > > > + } else { > > > /* > > >* For legacy gamma support we need the regamma input > > >* in linear space. Assume that the input is sRGB. > > > @@ -847,8 +835,43 @@ int amdgpu_dm_update_plane_color_mgmt(struct > > > dm_crtc_state *crtc, > > > > > > if (tf != TRANSFER_FUNCTION_SRGB && > > > !mod_color_calculate_degamma_params(NULL, > > > - dc_plane_state->in_transfer_func, NULL, false)) > > > + > > > dc_plane_state->in_transfer_func, > > > + NULL, false)) > > > return -ENOMEM; > > > + } > > > + > > > + return 0; > > > +} > > > + > > > +/** > > > + * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC > > > plane. > > > + * @crtc: amdgpu_dm crtc state > > > + * @dc_plane_state: target DC surface > > > + * > > > + * Update the underlying dc_stream_state's input transfer function (ITF) > > > in > > > + * preparation for hardware commit. The transfer function used depends on > > > + * the preparation done on the stream for color management. > > > + * > > > + * Returns: > > > + * 0 on success. -ENOMEM if mem allocation fails. > > > + */ > > > +int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, > > > + struct dc_plane_state *dc_plane_state) > > > +{ > > > + bool has_crtc_cm_degamma; > > > + int ret; > > > + > > > + has_crtc_cm_degamma = (crtc->cm_has_degamma || > > > crtc->cm_is_degamma_srgb); > > > + if (has_crtc_cm_degamma){ > > > + /* AMD HW doesn't have post-blending degamma caps. When DRM > > > + * CRTC atomic degamma is set, we maps it to DPP degamma block > > > + * (pre-blending) or, on legacy gamma, we use DPP degamma to > > > + * linearize (implicit degamma) from sRGB/BT709 according to > > > + * the input space. > > > > Uhh, you can't just move degamma before blending if KMS userspace > > wants it after blending. That would be
RE: [PATCH 00/21] DC Patches August 23, 2023
[Public] Hi all, This week this patchset was tested on the following systems: * Lenovo ThinkBook T13s Gen4 with AMD Ryzen 5 6600U * MSI Gaming X Trio RX 6800 * Gigabyte Gaming OC RX 7900 XTX These systems were tested on the following display/connection types: * eDP, (1080p 60hz [5650U]) (1920x1200 60hz [6600U]) (2560x1600 120hz[6600U]) * VGA and DVI (1680x1050 60hz [DP to VGA/DVI, USB-C to VGA/DVI]) * DP/HDMI/USB-C (1440p 170hz, 4k 60hz, 4k 144hz, 4k 240hz [Includes USB-C to DP/HDMI adapters]) * Thunderbolt (LG Ultrafine 5k) * MST (Startech MST14DP123DP [DP to 3x DP] and 2x 4k 60Hz displays) * DSC (with Cable Matters 101075 [DP to 3x DP] with 3x 4k60 displays, and HP Hook G2 with 1 4k60 display) * USB 4 (Kensington SD5700T and 1x 4k 60Hz display) * PCON (Club3D CAC-1085 and 1x 4k 144Hz display [at 4k 120HZ, as that is the max the adapter supports]) The testing is a mix of automated and manual tests. Manual testing includes (but is not limited to): * Changing display configurations and settings * Benchmark testing * Feature testing (Freesync, etc.) Automated testing includes (but is not limited to): * Script testing (scripts to automate some of the manual checks) * IGT testing The patchset consists of the amd-staging-drm-next branch (Head commit - 1d35782cce09 drm/amdgpu: Fix the return for gpu mode1_reset) with new patches added on top of it. Tested on Ubuntu 22.04.3, on Wayland and X11, using Gnome for both. Tested-by: Daniel Wheeler Thank you, Dan Wheeler Sr. Technologist | AMD SW Display -- 1 Commerce Valley Dr E, Thornhill, ON L3T 7X6 amd.com -Original Message- From: Mahfooz, Hamza Sent: Wednesday, August 23, 2023 11:58 AM To: amd-gfx@lists.freedesktop.org Cc: Wentland, Harry ; Li, Sun peng (Leo) ; Lakha, Bhawanpreet ; Siqueira, Rodrigo ; Pillai, Aurabindo ; Zhuo, Qingqing (Lillian) ; Li, Roman ; Lin, Wayne ; Wang, Chao-kai (Stylon) ; Chiu, Solomon ; Kotarac, Pavle ; Gutierrez, Agustin ; Zuo, Jerry ; Mahfooz, Hamza ; Wheeler, Daniel Subject: [PATCH 00/21] DC Patches August 23, 2023 This DC patch-set brings improvements in multiple areas. In summary, we highlight: * DCN315 fixes * DCN31 fixes * DPIA fixes * Dump the pipe topology when it updates * Misc code cleanups * New debugfs interface to query the current ODM combine configuration * ODM fixes * Potential deadlock while waiting for MPC idle fix * Support for windowed MPO ODM Cc: Daniel Wheeler Aurabindo Pillai (2): drm/amd/display: Fix incorrect comment drm/amd/display: Add debugfs interface for ODM combine info Charlene Liu (1): drm/amd/display: correct z8_watermark 16bit to 20bit mask Dillon Varone (1): drm/amd/display: Skip dmub memory flush when not needed Ethan Bitnun (1): drm/amd/display: Add support for 1080p SubVP to reduce idle power Fudong Wang (1): drm/amd/display: Add smu write msg id fail retry process Gabe Teeger (1): drm/amd/display: Remove wait while locked Martin Leung (1): drm/amd/display: 3.2.249 Mustapha Ghaddar (1): drm/amd/display: Add DPIA Link Encoder Assignment Fix Wenjing Liu (12): Partially revert "drm/amd/display: update add plane to context logic with a new algorithm" drm/amd/display: update blank state on ODM changes drm/amd/display: always switch off ODM before committing more streams drm/amd/display: add comments to add plane functions drm/amd/display: rename function to add otg master for stream drm/amd/display: add new resource interface for acquiring sec opp heads and release pipe drm/amd/display: add new resource interfaces to update odm mpc slice count drm/amd/display: add more pipe resource interfaces drm/amd/display: use new pipe allocation interface in dcn32 fpu drm/amd/display: switch to new ODM policy for windowed MPO ODM support drm/amd/display: add pipe topology update log drm/amd/display: fix pipe topology logging error .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 36 +- drivers/gpu/drm/amd/display/dc/Makefile |1 + .../display/dc/clk_mgr/dcn315/dcn315_smu.c| 20 +- drivers/gpu/drm/amd/display/dc/core/dc.c | 68 +- .../drm/amd/display/dc/core/dc_link_enc_cfg.c | 35 +- .../gpu/drm/amd/display/dc/core/dc_resource.c | 1473 + .../gpu/drm/amd/display/dc/core/dc_stream.c |2 +- drivers/gpu/drm/amd/display/dc/dc.h |3 +- .../drm/amd/display/dc/dcn20/dcn20_hwseq.c| 59 +- .../drm/amd/display/dc/dcn20/dcn20_resource.c | 22 +- .../drm/amd/display/dc/dcn20/dcn20_resource.h |4 +- .../amd/display/dc/dcn201/dcn201_resource.c |1 + .../drm/amd/display/dc/dcn21/dcn21_resource.c |1 + .../drm/amd/display/dc/dcn30/dcn30_resource.c |1 + .../amd/display/dc/dcn301/dcn301_resource.c |
[PATCH v3 7/7] Revert "drm/amd/amdgpu: switch on/off vcn power profile mode"
This reverts commit 5ce71f59bb9bd3d8a09b96afdbc92975cb6dc303. Reason for revert: New amdgpu_workload_profile* api is added to switch on/off profile mode. These new api will allow to change the GPU power profile based on a submitted job. Cc: Christian Koenig Cc: Alex Deucher Acked-by: Shashank Sharma Signed-off-by: Arvind Yadav --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 14 ++ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 2d94f1b63bd6..70777fcfa626 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -363,7 +363,6 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) container_of(work, struct amdgpu_device, vcn.idle_work.work); unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; unsigned int i, j; - int r = 0; for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { if (adev->vcn.harvest_config & (1 << j)) @@ -392,10 +391,6 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) if (!fences && !atomic_read(>vcn.total_submission_cnt)) { amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_GATE); - r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, - false); - if (r) - dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); } else { schedule_delayed_work(>vcn.idle_work, VCN_IDLE_TIMEOUT); } @@ -404,16 +399,11 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - int r = 0; atomic_inc(>vcn.total_submission_cnt); - if (!cancel_delayed_work_sync(>vcn.idle_work)) { - r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, - true); - if (r) - dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); - } + if (!cancel_delayed_work_sync(>vcn.idle_work)) + amdgpu_gfx_off_ctrl(adev, false); mutex_lock(>vcn.vcn_pg_lock); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, -- 2.34.1
[PATCH v3 6/7] drm/amdgpu: switch workload context to/from compute
This patch switches the GPU workload mode to/from compute mode, while submitting compute workload. v3: - Addressed the review comment about changing the function name from *_set() to *_get(). Cc: Christian Koenig Signed-off-by: Alex Deucher Reviewed-by: Shashank Sharma Signed-off-by: Arvind Yadav --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 0385f7f69278..fa939ac17120 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -713,9 +713,11 @@ void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle) pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled"); amdgpu_gfx_off_ctrl(adev, idle); } - amdgpu_dpm_switch_power_profile(adev, - PP_SMC_POWER_PROFILE_COMPUTE, - !idle); + + if (idle) + amdgpu_workload_profile_put(adev, AMDGPU_RING_TYPE_COMPUTE); + else + amdgpu_workload_profile_get(adev, AMDGPU_RING_TYPE_COMPUTE); } bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) -- 2.34.1
[PATCH v3 5/7] drm/amdgpu: Set/Reset GPU workload profile
This patch is to switch the GPU workload profile based on the submitted job. The workload profile is reset to default when the job is done. v3: - Addressed the review comment about changing the function name from *_set() to *_get(). Cc: Christian Koenig Cc: Alex Deucher Reviewed-by: Shashank Sharma Signed-off-by: Arvind Yadav --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index c3d9d75143f4..c5032762d497 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -176,6 +176,9 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) static void amdgpu_job_free_cb(struct drm_sched_job *s_job) { struct amdgpu_job *job = to_amdgpu_job(s_job); + struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); + + amdgpu_workload_profile_put(ring->adev, ring->funcs->type); drm_sched_job_cleanup(s_job); @@ -295,6 +298,8 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) DRM_ERROR("Error scheduling IBs (%d)\n", r); } + amdgpu_workload_profile_get(adev, ring->funcs->type); + job->job_run_counter++; amdgpu_job_free_resources(job); -- 2.34.1
[PATCH v3 4/7] drm/amdgpu: Add suspend function to clear the GPU power profile.
This patch adds a suspend function that will clear the GPU power profile before going into suspend state. v2: - Add the new suspend function based on review comment. v3: - Adressed the review comment. - Now clearing all the profile in work handler. Cc: Shashank Sharma Cc: Christian Koenig Cc: Alex Deucher Signed-off-by: Arvind Yadav --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c | 7 +++ drivers/gpu/drm/amd/include/amdgpu_workload.h | 2 ++ 3 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index cd3bf641b630..3b70e657b439 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4212,6 +4212,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) amdgpu_ras_suspend(adev); + amdgpu_workload_profile_suspend(adev); + amdgpu_device_ip_suspend_phase1(adev); if (!adev->in_s0ix) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c index fbe86ee5b8bf..0bab274a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c @@ -197,6 +197,13 @@ void amdgpu_workload_profile_get(struct amdgpu_device *adev, mutex_unlock(>workload_lock); } +void amdgpu_workload_profile_suspend(struct amdgpu_device *adev) +{ + struct amdgpu_smu_workload *workload = >smu_workload; + + amdgpu_power_profile_clear_all(adev, workload); +} + void amdgpu_workload_profile_init(struct amdgpu_device *adev) { adev->smu_workload.adev = adev; diff --git a/drivers/gpu/drm/amd/include/amdgpu_workload.h b/drivers/gpu/drm/amd/include/amdgpu_workload.h index 596a962800e9..34b96a5e7b50 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_workload.h +++ b/drivers/gpu/drm/amd/include/amdgpu_workload.h @@ -52,6 +52,8 @@ void amdgpu_workload_profile_put(struct amdgpu_device *adev, void amdgpu_workload_profile_get(struct amdgpu_device *adev, uint32_t ring_type); +void amdgpu_workload_profile_suspend(struct amdgpu_device *adev); + void amdgpu_workload_profile_init(struct amdgpu_device *adev); void amdgpu_workload_profile_fini(struct amdgpu_device *adev); -- 2.34.1
[PATCH v3 3/7] drm/amdgpu: Add new function to put GPU power profile
This patch adds a function which will clear the GPU power profile after job finished. This is how it works: - schedular will set the GPU power profile based on ring_type. - Schedular will clear the GPU Power profile once job finished. - Here, the *_workload_profile_set function will set the GPU power profile and the *_workload_profile_put function will schedule the smu_delayed_work task after 100ms delay. This smu_delayed_work task will clear a GPU power profile if any new jobs are not scheduled within 100 ms. But if any new job comes within 100ms then the *_workload_profile_set function will cancel this work and set the GPU power profile based on preferences. v2: - Splitting workload_profile_set and workload_profile_put into two separate patches. - Addressed review comment. v3: - Adressed all the review comment. - Now clearing all the profile in work handler. - Added *_clear_all function to clear all the power profile. - scheduling delay work to clear the power profile when refcount becomes zero. Cc: Shashank Sharma Cc: Christian Koenig Cc: Alex Deucher Signed-off-by: Arvind Yadav --- drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c | 118 +- drivers/gpu/drm/amd/include/amdgpu_workload.h | 3 + 2 files changed, 120 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c index 67eacaac6c9b..fbe86ee5b8bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c @@ -24,6 +24,9 @@ #include "amdgpu.h" +/* 100 millsecond timeout */ +#define SMU_IDLE_TIMEOUT msecs_to_jiffies(100) + static enum PP_SMC_POWER_PROFILE ring_to_power_profile(uint32_t ring_type) { @@ -58,16 +61,111 @@ amdgpu_power_profile_set(struct amdgpu_device *adev, return ret; } +static int +amdgpu_power_profile_clear(struct amdgpu_device *adev, + enum PP_SMC_POWER_PROFILE profile) +{ + int ret = amdgpu_dpm_switch_power_profile(adev, profile, false); + + if (!ret) { + /* Clear the bit for the submitted workload profile */ + clear_bit(profile, >smu_workload.submit_workload_status); + } + + return ret; +} + +static void +amdgpu_power_profile_clear_all(struct amdgpu_device *adev, + struct amdgpu_smu_workload *workload) +{ + int ret; + int profile = PP_SMC_POWER_PROFILE_COMPUTE; + + cancel_delayed_work_sync(>power_profile_work); + mutex_lock(>workload_lock); + + /* Clear all the GPU power profile*/ + for (; profile > 0; profile--) { + atomic_set(>power_profile_ref[profile], 0); + ret = amdgpu_power_profile_clear(adev, profile); + if (ret) { + DRM_WARN("Failed to clear workload %s,error = %d\n", +amdgpu_workload_mode_name[profile], ret); + } + } + + workload->submit_workload_status = 0; + mutex_unlock(>workload_lock); +} + +static void +amdgpu_power_profile_idle_work_handler(struct work_struct *work) +{ + + struct amdgpu_smu_workload *workload = container_of(work, + struct amdgpu_smu_workload, + power_profile_work.work); + struct amdgpu_device *adev = workload->adev; + int ret; + int profile; + + mutex_lock(>workload_lock); + + /* Clear all the GPU power profile*/ + for_each_set_bit(profile, >submit_workload_status, +PP_SMC_POWER_PROFILE_CUSTOM) { + if (!atomic_read(>power_profile_ref[profile])) { + ret = amdgpu_power_profile_clear(adev, profile); + if (ret) { + DRM_WARN("Failed to clear workload %s,error = %d\n", +amdgpu_workload_mode_name[profile], ret); + } + } + } + + mutex_unlock(>workload_lock); +} + +void amdgpu_workload_profile_put(struct amdgpu_device *adev, +uint32_t ring_type) +{ + struct amdgpu_smu_workload *workload = >smu_workload; + enum PP_SMC_POWER_PROFILE profile = ring_to_power_profile(ring_type); + int refcount; + + if (profile == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT) + return; + + mutex_lock(>workload_lock); + + refcount = atomic_read(>power_profile_ref[profile]); + if (!refcount) { + DRM_WARN("Power profile %s ref. count error\n", +amdgpu_workload_mode_name[profile]); + } else { + if (refcount == 1) + schedule_delayed_work(>power_profile_work, + SMU_IDLE_TIMEOUT); + + atomic_dec(>power_profile_ref[profile]);
[PATCH v3 2/7] drm/amdgpu: Add new function to set GPU power profile
This patch adds a function which will change the GPU power profile based on a submitted job. This can optimize the power performance when the workload is on. v2: - Splitting workload_profile_set and workload_profile_put into two separate patches. - Addressed review comment. v3: - Adressed all the review comment. - Changing the function name from *_set() to *_get(). - Now setting a power profile when refcount is zero. Cc: Shashank Sharma Cc: Christian Koenig Cc: Alex Deucher Signed-off-by: Arvind Yadav --- drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c | 59 +++ drivers/gpu/drm/amd/include/amdgpu_workload.h | 3 + 2 files changed, 62 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c index 32166f482f77..67eacaac6c9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c @@ -24,6 +24,65 @@ #include "amdgpu.h" +static enum PP_SMC_POWER_PROFILE +ring_to_power_profile(uint32_t ring_type) +{ + switch (ring_type) { + case AMDGPU_RING_TYPE_GFX: + return PP_SMC_POWER_PROFILE_FULLSCREEN3D; + case AMDGPU_RING_TYPE_COMPUTE: + return PP_SMC_POWER_PROFILE_COMPUTE; + case AMDGPU_RING_TYPE_UVD: + case AMDGPU_RING_TYPE_VCE: + case AMDGPU_RING_TYPE_UVD_ENC: + case AMDGPU_RING_TYPE_VCN_DEC: + case AMDGPU_RING_TYPE_VCN_ENC: + case AMDGPU_RING_TYPE_VCN_JPEG: + return PP_SMC_POWER_PROFILE_VIDEO; + default: + return PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + } +} + +static int +amdgpu_power_profile_set(struct amdgpu_device *adev, +enum PP_SMC_POWER_PROFILE profile) +{ + int ret = amdgpu_dpm_switch_power_profile(adev, profile, true); + + if (!ret) { + /* Set the bit for the submitted workload profile */ + set_bit(profile, >smu_workload.submit_workload_status); + } + + return ret; +} + +void amdgpu_workload_profile_get(struct amdgpu_device *adev, +uint32_t ring_type) +{ + struct amdgpu_smu_workload *workload = >smu_workload; + enum PP_SMC_POWER_PROFILE profile = ring_to_power_profile(ring_type); + int ret, refcount; + + if (profile == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT) + return; + + mutex_lock(>workload_lock); + + refcount = atomic_read(>power_profile_ref[profile]); + if (!refcount) { + ret = amdgpu_power_profile_set(adev, profile); + if (ret) { + DRM_WARN("Failed to set workload profile to %s, error = %d\n", +amdgpu_workload_mode_name[profile], ret); + } + } + + atomic_inc(>smu_workload.power_profile_ref[profile]); + mutex_unlock(>workload_lock); +} + void amdgpu_workload_profile_init(struct amdgpu_device *adev) { adev->smu_workload.adev = adev; diff --git a/drivers/gpu/drm/amd/include/amdgpu_workload.h b/drivers/gpu/drm/amd/include/amdgpu_workload.h index dc12448764a3..5fc0bc2a74a4 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_workload.h +++ b/drivers/gpu/drm/amd/include/amdgpu_workload.h @@ -46,6 +46,9 @@ static const char * const amdgpu_workload_mode_name[] = { "Window3D" }; +void amdgpu_workload_profile_get(struct amdgpu_device *adev, +uint32_t ring_type); + void amdgpu_workload_profile_init(struct amdgpu_device *adev); void amdgpu_workload_profile_fini(struct amdgpu_device *adev); -- 2.34.1
[PATCH v3 1/7] drm/amdgpu: Added init/fini functions for workload
The'struct amdgpu_smu_workload' initialization/cleanup functions is added by this patch. v2: - Splitting big patch into separate patches. - Added new fini function. v3: - Addressed review comment to change 'power_profile_work' instead of 'smu_delayed_work'. Cc: Christian Koenig Cc: Alex Deucher Reviewed-by: Shashank Sharma Signed-off-by: Arvind Yadav --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 4 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c | 44 +++ drivers/gpu/drm/amd/include/amdgpu_workload.h | 53 +++ 5 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c create mode 100644 drivers/gpu/drm/amd/include/amdgpu_workload.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 415a7fa395c4..6a9e187d61e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -60,7 +60,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ - amdgpu_ring_mux.o + amdgpu_ring_mux.o amdgpu_workload.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 02b827785e39..1939fa1af8a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -107,6 +107,7 @@ #include "amdgpu_fdinfo.h" #include "amdgpu_mca.h" #include "amdgpu_ras.h" +#include "amdgpu_workload.h" #define MAX_GPU_INSTANCE 16 @@ -1050,6 +1051,8 @@ struct amdgpu_device { booljob_hang; booldc_enabled; + + struct amdgpu_smu_workload smu_workload; }; static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5c7d40873ee2..cd3bf641b630 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2243,6 +2243,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) adev->cg_flags &= amdgpu_cg_mask; adev->pg_flags &= amdgpu_pg_mask; + amdgpu_workload_profile_init(adev); + return 0; } @@ -2890,6 +2892,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) { int i, r; + amdgpu_workload_profile_fini(adev); + if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done) amdgpu_virt_release_ras_err_handler_data(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c new file mode 100644 index ..32166f482f77 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" + +void amdgpu_workload_profile_init(struct amdgpu_device *adev) +{ + adev->smu_workload.adev = adev; + adev->smu_workload.submit_workload_status = 0; + adev->smu_workload.initialized = true; + + mutex_init(>smu_workload.workload_lock); +} + +void amdgpu_workload_profile_fini(struct amdgpu_device *adev) +{ + if (!adev->smu_workload.initialized) + return; + + adev->smu_workload.submit_workload_status = 0; + adev->smu_workload.initialized = false; + mutex_destroy(>smu_workload.workload_lock); +} diff --git a/drivers/gpu/drm/amd/include/amdgpu_workload.h
[PATCH v3 0/7] GPU workload hints for better performance
AMDGPU SOCs supports dynamic workload based power profiles, which can provide fine-tuned performance for a particular type of workload. This patch series adds an interface to set/reset these power profiles based on the submitted job. The driver can dynamically switch the power profiles based on submitted job. This can optimize the power performance when the particular workload is on. v2: - Splitting workload_profile_set and workload_profile_put into two separate patches. - Addressed review comment. - Added new suspend function. - Added patch to switches the GPU workload mode for KFD. v3: - Addressed all review comment. - Changed the function name from *_set() to *_get(). - Now clearing all the profile in work handler. - Added *_clear_all function to clear all the power profile. Arvind Yadav (7): drm/amdgpu: Added init/fini functions for workload drm/amdgpu: Add new function to set GPU power profile drm/amdgpu: Add new function to put GPU power profile drm/amdgpu: Add suspend function to clear the GPU power profile. drm/amdgpu: Set/Reset GPU workload profile drm/amdgpu: switch workload context to/from compute Revert "drm/amd/amdgpu: switch on/off vcn power profile mode" drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 6 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 5 + drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 14 +- drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c | 226 ++ drivers/gpu/drm/amd/include/amdgpu_workload.h | 61 + 8 files changed, 309 insertions(+), 16 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c create mode 100644 drivers/gpu/drm/amd/include/amdgpu_workload.h -- 2.34.1
Re: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 onwards APUs
Well, there seems to be a very basic misunderstood here: The IOMMU isolation level is *not* ASIC dependent! Try to set amd_iommu=force_isolation on the kernel command line. This is a configuration option customers can use to harden their systems and when this isn't properly tested we can't allow page tables in system memory. Regards, Christian. Am 28.08.23 um 13:23 schrieb Zhang, Yifan: [Public] Not yet. It will be only enabled for gfx10.3.3 and later APU initially, IOMMU is pass through in these ASIC. -Original Message- From: Christian König Sent: Monday, August 28, 2023 5:41 PM To: Zhang, Yifan ; amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Koenig, Christian Subject: Re: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 onwards APUs Is that now validated with IOMMU in non pass through mode? Christian. Am 28.08.23 um 10:58 schrieb Zhang, Yifan: [AMD Official Use Only - General] Ping -Original Message- From: Zhang, Yifan Sent: Friday, August 25, 2023 8:34 AM To: amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Koenig, Christian ; Zhang, Yifan Subject: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 onwards APUs To decrease VRAM pressure for APUs, put page tables to GTT domain for gfx10 and newer APUs. v2: only enable it for gfx10 and newer APUs (Alex, Christian) Signed-off-by: Yifan Zhang --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index 96d601e209b8..4603d87c61a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -515,10 +515,13 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, bp.size = amdgpu_vm_pt_size(adev, level); bp.byte_align = AMDGPU_GPU_PAGE_SIZE; - if (!adev->gmc.is_app_apu) - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - else + if (adev->gmc.is_app_apu || + ((adev->flags & AMD_IS_APU) && + (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 3 bp.domain = AMDGPU_GEM_DOMAIN_GTT; + else + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | -- 2.37.3
RE: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 onwards APUs
[Public] Not yet. It will be only enabled for gfx10.3.3 and later APU initially, IOMMU is pass through in these ASIC. -Original Message- From: Christian König Sent: Monday, August 28, 2023 5:41 PM To: Zhang, Yifan ; amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Koenig, Christian Subject: Re: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 onwards APUs Is that now validated with IOMMU in non pass through mode? Christian. Am 28.08.23 um 10:58 schrieb Zhang, Yifan: > [AMD Official Use Only - General] > > Ping > > -Original Message- > From: Zhang, Yifan > Sent: Friday, August 25, 2023 8:34 AM > To: amd-gfx@lists.freedesktop.org > Cc: Deucher, Alexander ; Koenig, Christian > ; Zhang, Yifan > Subject: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 > onwards APUs > > To decrease VRAM pressure for APUs, put page tables to GTT domain for gfx10 > and newer APUs. > > v2: only enable it for gfx10 and newer APUs (Alex, Christian) > > Signed-off-by: Yifan Zhang > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 9 ++--- > 1 file changed, 6 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c > index 96d601e209b8..4603d87c61a0 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c > @@ -515,10 +515,13 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, > struct amdgpu_vm *vm, > bp.size = amdgpu_vm_pt_size(adev, level); > bp.byte_align = AMDGPU_GPU_PAGE_SIZE; > > - if (!adev->gmc.is_app_apu) > - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; > - else > + if (adev->gmc.is_app_apu || > + ((adev->flags & AMD_IS_APU) && > + (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 3 > bp.domain = AMDGPU_GEM_DOMAIN_GTT; > + else > + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; > + > > bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); > bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | > -- > 2.37.3 >
Re: [PATCH v2] drm/amd: Simplify the bo size check funciton
Am 28.08.23 um 12:02 schrieb Ma Jun: Simplify the code logic of size check function amdgpu_bo_validate_size Signed-off-by: Ma Jun Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 29 +- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 807ea74ece25..e603ca062fcc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -480,7 +480,7 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, *cpu_addr = NULL; } -/* Validate bo size is bit bigger then the request domain */ +/* Validate bo size is bit bigger than the request domain */ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, unsigned long size, u32 domain) { @@ -490,29 +490,24 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, * If GTT is part of requested domains the check must succeed to * allow fall back to GTT. */ - if (domain & AMDGPU_GEM_DOMAIN_GTT) { + if (domain & AMDGPU_GEM_DOMAIN_GTT) man = ttm_manager_type(>mman.bdev, TTM_PL_TT); - - if (man && size < man->size) - return true; - else if (!man) - WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized"); - goto fail; - } else if (domain & AMDGPU_GEM_DOMAIN_VRAM) { + else if (domain & AMDGPU_GEM_DOMAIN_VRAM) man = ttm_manager_type(>mman.bdev, TTM_PL_VRAM); + else + return true; - if (man && size < man->size) - return true; - goto fail; + if (!man) { + if (domain & AMDGPU_GEM_DOMAIN_GTT) + WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized"); + return false; } /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU, _DOMAIN_DOORBELL */ - return true; + if (size < man->size) + return true; -fail: - if (man) - DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, - man->size); + DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, man->size); return false; }
[PATCH v2] drm/amd: Simplify the bo size check funciton
Simplify the code logic of size check function amdgpu_bo_validate_size Signed-off-by: Ma Jun --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 29 +- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 807ea74ece25..e603ca062fcc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -480,7 +480,7 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, *cpu_addr = NULL; } -/* Validate bo size is bit bigger then the request domain */ +/* Validate bo size is bit bigger than the request domain */ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, unsigned long size, u32 domain) { @@ -490,29 +490,24 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, * If GTT is part of requested domains the check must succeed to * allow fall back to GTT. */ - if (domain & AMDGPU_GEM_DOMAIN_GTT) { + if (domain & AMDGPU_GEM_DOMAIN_GTT) man = ttm_manager_type(>mman.bdev, TTM_PL_TT); - - if (man && size < man->size) - return true; - else if (!man) - WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized"); - goto fail; - } else if (domain & AMDGPU_GEM_DOMAIN_VRAM) { + else if (domain & AMDGPU_GEM_DOMAIN_VRAM) man = ttm_manager_type(>mman.bdev, TTM_PL_VRAM); + else + return true; - if (man && size < man->size) - return true; - goto fail; + if (!man) { + if (domain & AMDGPU_GEM_DOMAIN_GTT) + WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized"); + return false; } /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU, _DOMAIN_DOORBELL */ - return true; + if (size < man->size) + return true; -fail: - if (man) - DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, - man->size); + DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, man->size); return false; } -- 2.34.1
Re: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 onwards APUs
Is that now validated with IOMMU in non pass through mode? Christian. Am 28.08.23 um 10:58 schrieb Zhang, Yifan: [AMD Official Use Only - General] Ping -Original Message- From: Zhang, Yifan Sent: Friday, August 25, 2023 8:34 AM To: amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Koenig, Christian ; Zhang, Yifan Subject: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 onwards APUs To decrease VRAM pressure for APUs, put page tables to GTT domain for gfx10 and newer APUs. v2: only enable it for gfx10 and newer APUs (Alex, Christian) Signed-off-by: Yifan Zhang --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index 96d601e209b8..4603d87c61a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -515,10 +515,13 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, bp.size = amdgpu_vm_pt_size(adev, level); bp.byte_align = AMDGPU_GPU_PAGE_SIZE; - if (!adev->gmc.is_app_apu) - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - else + if (adev->gmc.is_app_apu || + ((adev->flags & AMD_IS_APU) && + (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 3 bp.domain = AMDGPU_GEM_DOMAIN_GTT; + else + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | -- 2.37.3
[PATCH v2] drm/amdgpu: access RLC_SPM_MC_CNTL through MMIO in SRIOV runtime
Register RLC_SPM_MC_CNTL is not blocked by L1 policy, VF can directly access it through MMIO during SRIOV runtime. v2: use SOC15 interface to access registers Signed-off-by: ZhenGuo Yin --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 13 +++-- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 13 +++-- 2 files changed, 6 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 0aee9c8288a2..6ccde07ed63e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -7897,22 +7897,15 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev, unsigned int vmid) { - u32 reg, data; + u32 data; /* not for *_SOC15 */ - reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL); - if (amdgpu_sriov_is_pp_one_vf(adev)) - data = RREG32_NO_KIQ(reg); - else - data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL); + data = RREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL); data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; - if (amdgpu_sriov_is_pp_one_vf(adev)) - WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); - else - WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); + WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); } static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned int vmid) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index b0c32521efdc..337ed771605f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4984,23 +4984,16 @@ static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) { - u32 reg, data; + u32 data; amdgpu_gfx_off_ctrl(adev, false); - reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); - if (amdgpu_sriov_is_pp_one_vf(adev)) - data = RREG32_NO_KIQ(reg); - else - data = RREG32(reg); + data = RREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL); data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; - if (amdgpu_sriov_is_pp_one_vf(adev)) - WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); - else - WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); + WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); amdgpu_gfx_off_ctrl(adev, true); } -- 2.35.1
RE: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 onwards APUs
[AMD Official Use Only - General] Ping -Original Message- From: Zhang, Yifan Sent: Friday, August 25, 2023 8:34 AM To: amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Koenig, Christian ; Zhang, Yifan Subject: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 onwards APUs To decrease VRAM pressure for APUs, put page tables to GTT domain for gfx10 and newer APUs. v2: only enable it for gfx10 and newer APUs (Alex, Christian) Signed-off-by: Yifan Zhang --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index 96d601e209b8..4603d87c61a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -515,10 +515,13 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, bp.size = amdgpu_vm_pt_size(adev, level); bp.byte_align = AMDGPU_GPU_PAGE_SIZE; - if (!adev->gmc.is_app_apu) - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - else + if (adev->gmc.is_app_apu || + ((adev->flags & AMD_IS_APU) && + (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 3 bp.domain = AMDGPU_GEM_DOMAIN_GTT; + else + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | -- 2.37.3
Re: [PATCH v2 19/34] drm/amd/display: decouple steps for mapping CRTC degamma to DC plane
Degamma has always been on the plane on AMD. CRTC DEGAMMA_LUT has actually just been applying it to every plane pre-blend. Degamma makes no sense after blending anyway. The entire point is for it to happen before blending to blend in linear space. Otherwise DEGAMMA_LUT and REGAMMA_LUT are the exact same thing... - Joshie On Tuesday, 22 August 2023, Pekka Paalanen wrote: > On Thu, 10 Aug 2023 15:02:59 -0100 > Melissa Wen wrote: > >> The next patch adds pre-blending degamma to AMD color mgmt pipeline, but >> pre-blending degamma caps (DPP) is currently in use to provide DRM CRTC >> atomic degamma or implict degamma on legacy gamma. Detach degamma usage >> regarging CRTC color properties to manage plane and CRTC color >> correction combinations. >> >> Reviewed-by: Harry Wentland >> Signed-off-by: Melissa Wen >> --- >> .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 59 +-- >> 1 file changed, 41 insertions(+), 18 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c >> index 68e9f2c62f2e..74eb02655d96 100644 >> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c >> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c >> @@ -764,20 +764,9 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) >> return 0; >> } >> >> -/** >> - * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane. >> - * @crtc: amdgpu_dm crtc state >> - * @dc_plane_state: target DC surface >> - * >> - * Update the underlying dc_stream_state's input transfer function (ITF) in >> - * preparation for hardware commit. The transfer function used depends on >> - * the preparation done on the stream for color management. >> - * >> - * Returns: >> - * 0 on success. -ENOMEM if mem allocation fails. >> - */ >> -int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, >> - struct dc_plane_state *dc_plane_state) >> +static int >> +map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc, >> + struct dc_plane_state *dc_plane_state) >> { >> const struct drm_color_lut *degamma_lut; >> enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB; >> @@ -800,8 +789,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, >>_size); >> ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES); >> >> - dc_plane_state->in_transfer_func->type = >> - TF_TYPE_DISTRIBUTED_POINTS; >> + dc_plane_state->in_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; >> >> /* >>* This case isn't fully correct, but also fairly >> @@ -837,7 +825,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, >> degamma_lut, degamma_size); >> if (r) >> return r; >> - } else if (crtc->cm_is_degamma_srgb) { >> + } else { >> /* >>* For legacy gamma support we need the regamma input >>* in linear space. Assume that the input is sRGB. >> @@ -847,8 +835,43 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, >> >> if (tf != TRANSFER_FUNCTION_SRGB && >> !mod_color_calculate_degamma_params(NULL, >> - dc_plane_state->in_transfer_func, NULL, false)) >> + dc_plane_state->in_transfer_func, >> + NULL, false)) >> return -ENOMEM; >> + } >> + >> + return 0; >> +} >> + >> +/** >> + * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane. >> + * @crtc: amdgpu_dm crtc state >> + * @dc_plane_state: target DC surface >> + * >> + * Update the underlying dc_stream_state's input transfer function (ITF) in >> + * preparation for hardware commit. The transfer function used depends on >> + * the preparation done on the stream for color management. >> + * >> + * Returns: >> + * 0 on success. -ENOMEM if mem allocation fails. >> + */ >> +int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, >> + struct dc_plane_state *dc_plane_state) >> +{ >> + bool has_crtc_cm_degamma; >> + int ret; >> + >> + has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb); >> + if (has_crtc_cm_degamma){ >> + /* AMD HW doesn't have post-blending degamma caps. When DRM >> + * CRTC atomic degamma is set, we maps it to DPP degamma block >> + * (pre-blending) or, on legacy gamma, we use DPP degamma to >> + * linearize (implicit degamma) from sRGB/BT709 according to >> + * the input space. > > Uhh, you can't just move degamma before blending if KMS userspace > wants it after blending. That would be incorrect behaviour.
Re: [PATCH] drm/amdgpu: remove unused parameter in amdgpu_vmid_grab_idle
Am 17.08.23 um 09:00 schrieb Yifan Zhang: amdgpu_vm is not used in amdgpu_vmid_grab_idle. Signed-off-by: Yifan Zhang Sorry for the delay, Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index ff1ea99292fb..ddd0891da116 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -188,7 +188,6 @@ static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id, /** * amdgpu_vmid_grab_idle - grab idle VMID * - * @vm: vm to allocate id for * @ring: ring we want to submit job to * @idle: resulting idle VMID * @fence: fence to wait for if no id could be grabbed @@ -196,8 +195,7 @@ static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id, * Try to find an idle VMID, if none is idle add a fence to wait to the sync * object. Returns -ENOMEM when we are out of memory. */ -static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, -struct amdgpu_ring *ring, +static int amdgpu_vmid_grab_idle(struct amdgpu_ring *ring, struct amdgpu_vmid **idle, struct dma_fence **fence) { @@ -405,7 +403,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, int r = 0; mutex_lock(_mgr->lock); - r = amdgpu_vmid_grab_idle(vm, ring, , fence); + r = amdgpu_vmid_grab_idle(ring, , fence); if (r || !idle) goto error;
Re: [PATCH] drm/amd: Simplify the size check funciton
On 8/28/2023 2:00 PM, Christian König wrote: > Am 28.08.23 um 07:09 schrieb Ma, Jun: >> Hi Christian, >> >> On 8/25/2023 4:08 PM, Christian König wrote: >>> >>> Am 25.08.23 um 07:22 schrieb Ma Jun: Simplify the code logic of size check function amdgpu_bo_validate_size Signed-off-by: Ma Jun --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 28 +- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 807ea74ece25..4c95db954a76 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -480,7 +480,7 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, *cpu_addr = NULL; } -/* Validate bo size is bit bigger then the request domain */ +/* Validate bo size is bit bigger than the request domain */ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, unsigned long size, u32 domain) { @@ -490,29 +490,23 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, * If GTT is part of requested domains the check must succeed to * allow fall back to GTT. */ - if (domain & AMDGPU_GEM_DOMAIN_GTT) { + if (domain & AMDGPU_GEM_DOMAIN_GTT) man = ttm_manager_type(>mman.bdev, TTM_PL_TT); - - if (man && size < man->size) - return true; - else if (!man) - WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized"); - goto fail; - } else if (domain & AMDGPU_GEM_DOMAIN_VRAM) { + else if (domain & AMDGPU_GEM_DOMAIN_VRAM) man = ttm_manager_type(>mman.bdev, TTM_PL_VRAM); + else + return true; - if (man && size < man->size) - return true; - goto fail; + if (!man) { + WARN_ON_ONCE("Mem mananger of mem domain %d is uninitialized", domain); + return false; } >>> That change here is not correct. It's perfectly valid for userspace to >>> request VRAM even if VRAM isn't initialized. >>> >>> Only the GTT manager is mandatory. That's why the code previously looked >>> like it does. >>> >> Thanks for your explanation. >> How about changing the code to the following? >> >> + if (!man) { >> + if (domain & AMDGPU_GEM_DOMAIN_GTT) >> + WARN_ON_ONCE("GTT domain requested but GTT mem >> manager uninitialized"); >> + return false; >> } > > Of hand that looks like it should work, but I need to see the full patch. > Thanks, I'll send v2 with this change. Regards, Ma Jun > Regards, > Christian. > >> >> Regards, >> Ma Jun >> >>> regards, >>> Christian. >>> /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU, _DOMAIN_DOORBELL */ - return true; + if (size < man->size) + return true; -fail: - if (man) - DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, -man->size); + DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, man->size); return false; } >
[PATCH] drm/amdgpu: access RLC_SPM_MC_CNTL through MMIO in SRIOV
Register RLC_SPM_MC_CNTL is not blocked by L1 policy, VF can directly access it through MMIO. Signed-off-by: ZhenGuo Yin --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 10 ++ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 10 ++ 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 0aee9c8288a2..65619f73f717 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -7901,18 +7901,12 @@ static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev, /* not for *_SOC15 */ reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL); - if (amdgpu_sriov_is_pp_one_vf(adev)) - data = RREG32_NO_KIQ(reg); - else - data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL); + data = RREG32_NO_KIQ(reg); data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; - if (amdgpu_sriov_is_pp_one_vf(adev)) - WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); - else - WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); + WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); } static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned int vmid) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index b0c32521efdc..7f8c5c6fd36e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4989,18 +4989,12 @@ static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) amdgpu_gfx_off_ctrl(adev, false); reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); - if (amdgpu_sriov_is_pp_one_vf(adev)) - data = RREG32_NO_KIQ(reg); - else - data = RREG32(reg); + data = RREG32_NO_KIQ(reg); data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; - if (amdgpu_sriov_is_pp_one_vf(adev)) - WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); - else - WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); + WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); amdgpu_gfx_off_ctrl(adev, true); } -- 2.35.1
Re: [PATCH] drm/amd: Simplify the size check funciton
Am 28.08.23 um 07:09 schrieb Ma, Jun: Hi Christian, On 8/25/2023 4:08 PM, Christian König wrote: Am 25.08.23 um 07:22 schrieb Ma Jun: Simplify the code logic of size check function amdgpu_bo_validate_size Signed-off-by: Ma Jun --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 28 +- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 807ea74ece25..4c95db954a76 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -480,7 +480,7 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, *cpu_addr = NULL; } -/* Validate bo size is bit bigger then the request domain */ +/* Validate bo size is bit bigger than the request domain */ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, unsigned long size, u32 domain) { @@ -490,29 +490,23 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, * If GTT is part of requested domains the check must succeed to * allow fall back to GTT. */ - if (domain & AMDGPU_GEM_DOMAIN_GTT) { + if (domain & AMDGPU_GEM_DOMAIN_GTT) man = ttm_manager_type(>mman.bdev, TTM_PL_TT); - - if (man && size < man->size) - return true; - else if (!man) - WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized"); - goto fail; - } else if (domain & AMDGPU_GEM_DOMAIN_VRAM) { + else if (domain & AMDGPU_GEM_DOMAIN_VRAM) man = ttm_manager_type(>mman.bdev, TTM_PL_VRAM); + else + return true; - if (man && size < man->size) - return true; - goto fail; + if (!man) { + WARN_ON_ONCE("Mem mananger of mem domain %d is uninitialized", domain); + return false; } That change here is not correct. It's perfectly valid for userspace to request VRAM even if VRAM isn't initialized. Only the GTT manager is mandatory. That's why the code previously looked like it does. Thanks for your explanation. How about changing the code to the following? + if (!man) { + if (domain & AMDGPU_GEM_DOMAIN_GTT) + WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized"); + return false; } Of hand that looks like it should work, but I need to see the full patch. Regards, Christian. Regards, Ma Jun regards, Christian. /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU, _DOMAIN_DOORBELL */ - return true; + if (size < man->size) + return true; -fail: - if (man) - DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, - man->size); + DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, man->size); return false; }