Re: [PATCH] drm/amdgpu: change read of GPU clock counter on Vega10 VF
On 2019-11-05 6:06 p.m., Alex Deucher wrote: > On Tue, Nov 5, 2019 at 5:26 PM Huang, JinHuiEric > wrote: >> Using unified VBIOS has performance drop in sriov environment. >> The fix is switching to another register instead. >> >> Signed-off-by: Eric Huang >> --- >> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 19 --- >> 1 file changed, 16 insertions(+), 3 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> index 829d623..e44a3ea 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> @@ -3885,9 +3885,22 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct >> amdgpu_device *adev) >> uint64_t clock; >> >> mutex_lock(&adev->gfx.gpu_clock_mutex); >> - WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); >> - clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | >> - ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << >> 32ULL); >> + if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) { >> + uint32_t tmp, lsb, msb, i = 0; >> + do { >> + if (i != 0) >> + udelay(1); >> + tmp = RREG32_SOC15(GC, 0, >> mmRLC_REFCLOCK_TIMESTAMP_MSB); >> + lsb = RREG32_SOC15(GC, 0, >> mmRLC_REFCLOCK_TIMESTAMP_LSB); >> + msb = RREG32_SOC15(GC, 0, >> mmRLC_REFCLOCK_TIMESTAMP_MSB); >> + i++; >> + } while (unlikely(tmp != msb) && (i < adev->usec_timeout)); >> + clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL); >> + } else { >> + WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); >> + clock = (uint64_t)RREG32_SOC15(GC, 0, >> mmRLC_GPU_CLOCK_COUNT_LSB) | >> + ((uint64_t)RREG32_SOC15(GC, 0, >> mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); >> + } > Is there a reason we can't use the same regs on bare metal and SR-IOV? > I'd like to minimize the deltas if possible. As Jerry's request, this change will avoid P1 policy protection on RLC_GPU_CLOCK_COUNT_LSB/MSB. Eric > > Alex > >> mutex_unlock(&adev->gfx.gpu_clock_mutex); >> return clock; >> } >> -- >> 2.7.4 >> >> ___ >> amd-gfx mailing list >> amd-gfx@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: change read of GPU clock counter on Vega10 VF
Using unified VBIOS has performance drop in sriov environment. The fix is switching to another register instead. Signed-off-by: Eric Huang --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 19 --- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 829d623..e44a3ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3885,9 +3885,22 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) uint64_t clock; mutex_lock(&adev->gfx.gpu_clock_mutex); - WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); - clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | - ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) { + uint32_t tmp, lsb, msb, i = 0; + do { + if (i != 0) + udelay(1); + tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB); + lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB); + msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB); + i++; + } while (unlikely(tmp != msb) && (i < adev->usec_timeout)); + clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL); + } else { + WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); + clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | + ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + } mutex_unlock(&adev->gfx.gpu_clock_mutex); return clock; } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: change read of GPU clock counter on Vega10 VF
Using unified VBIOS has performance drop in sriov environment. The fix is switching to another register instead. Signed-off-by: Eric Huang --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 18 +++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 829d623..6770bd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3885,9 +3885,21 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) uint64_t clock; mutex_lock(&adev->gfx.gpu_clock_mutex); - WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); - clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | - ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) { + uint32_t tmp, lsb, msb, i = 0; + do { + tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB); + lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB); + msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB); + i++; + udelay(1); + } while (unlikely(tmp != msb) && (i < adev->usec_timeout)); + clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL); + } else { + WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); + clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | + ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + } mutex_unlock(&adev->gfx.gpu_clock_mutex); return clock; } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: remove PT BOs when unmapping
Hi Christian, I found the reason why page tables are not freed when unmapping. All the pts are reserved, then they are not freed until vm fini. So the consequences are old pts and new pts for the same VAs will exist till vm fini. In KFD big buffer strees test, multiple times of mapping and unmapping a big range of system memory causes huge vram pts usage accumulation. I tried to avoid generating duplicated pts during unmapping in amdgpu_vm_update_ptes() by skipping amdgpu_vm_free_pts() and not reserving the lowest pts, but they didn't work with VM fault. The only way working is skipping whole function amdgpu_vm_update_ptes(), but it seems wrong, because we have to update GPU VM MMU. So there is no bug in amdgpu_vm_update_ptes(), but the accumulation of pts vram usage is an overhead. Do you think what we can do to get better solution? Regards, Eric On 2019-10-31 10:33 a.m., Huang, JinHuiEric wrote: > The hardware is vega10 and test is KFDMemoryTest.BigBufferStressTest. > More detail is on Jira SWDEV-201443. > > Regards, > > Eric > > On 2019-10-31 10:08 a.m., StDenis, Tom wrote: >> I could try it on my carrizo/polaris setup. Is there a test procedure I >> could folllow to trigger the changed code paths? >> >> >> Tom >> >> On 2019-10-31 6:41 a.m., Koenig, Christian wrote: >>> Just tested this and amdgpu_vm_update_ptes() indeed works as expected. >>> >>> When you free at least a 2MB the lowest level of page tables is freed >>> up again. >>> >>> BTW: What hardware have you tested this on? On gfx8 and older it is >>> expected that page tables are never freed. >>> >>> Regards, >>> Christian. >>> >>> Am 30.10.19 um 19:11 schrieb Christian König: >>>> Then I don't see how this patch actually changes anything. >>>> >>>> Could only be a bug in amdgpu_vm_update_ptes(). Going to investigate >>>> this, but I won't have time to look into the ticket in detail. >>>> >>>> Regards, >>>> Christian. >>>> >>>> Am 30.10.19 um 19:00 schrieb Huang, JinHuiEric: >>>>> Actually I do prevent to remove in-use pts by this: >>>>> >>>>> + r = amdgpu_vm_remove_ptes(adev, vm, >>>>> + (mapping->start + 0x1ff) & (~0x1ffll), >>>>> + (mapping->last + 1) & (~0x1ffll)); >>>>> >>>>> Which is only removing aligned page table for 2M. And I have tested >>>>> it at least on KFD tests without anything broken. >>>>> >>>>> By the way, I am not familiar with memory staff. This patch is the >>>>> best I can do for now. Could you take a look at the Jira ticket >>>>> SWDEV-201443 ? and find the better solution. Thanks! >>>>> >>>>> Regards, >>>>> >>>>> Eric >>>>> >>>>> On 2019-10-30 1:57 p.m., Christian König wrote: >>>>>> One thing I've forgotten: >>>>>> >>>>>> What you could maybe do to improve the situation is to join >>>>>> adjacent ranges in amdgpu_vm_clear_freed(), but I'm not sure how >>>>>> the chances are that the ranges are freed all together. >>>>>> >>>>>> The only other alternative I can see would be to check the mappings >>>>>> of a range in amdgpu_update_ptes() and see if you could walk the >>>>>> tree up if the valid flag is not set and there are no mappings left >>>>>> for a page table. >>>>>> >>>>>> Regards, >>>>>> Christian. >>>>>> >>>>>> Am 30.10.19 um 18:42 schrieb Koenig, Christian: >>>>>>>> The vaild flag doesn't take effect in this function. >>>>>>> That's irrelevant. >>>>>>> >>>>>>> See what amdgpu_vm_update_ptes() does is to first determine the >>>>>>> fragment size: >>>>>>>> amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end); >>>>>>> Then we walk down the tree: >>>>>>>> amdgpu_vm_pt_start(adev, params->vm, start, &cursor); >>>>>>>> while (cursor.pfn < end) { >>>>>>> And make sure that the page tables covering the address range are >>>>>>> actually allocated: >>>>>>>>
Re: [PATCH] drm/amdgpu: remove PT BOs when unmapping
The hardware is vega10 and test is KFDMemoryTest.BigBufferStressTest. More detail is on Jira SWDEV-201443. Regards, Eric On 2019-10-31 10:08 a.m., StDenis, Tom wrote: > I could try it on my carrizo/polaris setup. Is there a test procedure I > could folllow to trigger the changed code paths? > > > Tom > > On 2019-10-31 6:41 a.m., Koenig, Christian wrote: >> Just tested this and amdgpu_vm_update_ptes() indeed works as expected. >> >> When you free at least a 2MB the lowest level of page tables is freed >> up again. >> >> BTW: What hardware have you tested this on? On gfx8 and older it is >> expected that page tables are never freed. >> >> Regards, >> Christian. >> >> Am 30.10.19 um 19:11 schrieb Christian König: >>> Then I don't see how this patch actually changes anything. >>> >>> Could only be a bug in amdgpu_vm_update_ptes(). Going to investigate >>> this, but I won't have time to look into the ticket in detail. >>> >>> Regards, >>> Christian. >>> >>> Am 30.10.19 um 19:00 schrieb Huang, JinHuiEric: >>>> Actually I do prevent to remove in-use pts by this: >>>> >>>> + r = amdgpu_vm_remove_ptes(adev, vm, >>>> + (mapping->start + 0x1ff) & (~0x1ffll), >>>> + (mapping->last + 1) & (~0x1ffll)); >>>> >>>> Which is only removing aligned page table for 2M. And I have tested >>>> it at least on KFD tests without anything broken. >>>> >>>> By the way, I am not familiar with memory staff. This patch is the >>>> best I can do for now. Could you take a look at the Jira ticket >>>> SWDEV-201443 ? and find the better solution. Thanks! >>>> >>>> Regards, >>>> >>>> Eric >>>> >>>> On 2019-10-30 1:57 p.m., Christian König wrote: >>>>> One thing I've forgotten: >>>>> >>>>> What you could maybe do to improve the situation is to join >>>>> adjacent ranges in amdgpu_vm_clear_freed(), but I'm not sure how >>>>> the chances are that the ranges are freed all together. >>>>> >>>>> The only other alternative I can see would be to check the mappings >>>>> of a range in amdgpu_update_ptes() and see if you could walk the >>>>> tree up if the valid flag is not set and there are no mappings left >>>>> for a page table. >>>>> >>>>> Regards, >>>>> Christian. >>>>> >>>>> Am 30.10.19 um 18:42 schrieb Koenig, Christian: >>>>>>> The vaild flag doesn't take effect in this function. >>>>>> That's irrelevant. >>>>>> >>>>>> See what amdgpu_vm_update_ptes() does is to first determine the >>>>>> fragment size: >>>>>>> amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end); >>>>>> Then we walk down the tree: >>>>>>> amdgpu_vm_pt_start(adev, params->vm, start, &cursor); >>>>>>> while (cursor.pfn < end) { >>>>>> And make sure that the page tables covering the address range are >>>>>> actually allocated: >>>>>>> r = amdgpu_vm_alloc_pts(params->adev, params->vm, >>>>>>> &cursor); >>>>>> Then we update the tables with the flags and addresses and free up >>>>>> subsequent tables in the case of huge pages or freed up areas: >>>>>>> /* Free all child entries */ >>>>>>> while (cursor.pfn < frag_start) { >>>>>>> amdgpu_vm_free_pts(adev, params->vm, &cursor); >>>>>>> amdgpu_vm_pt_next(adev, &cursor); >>>>>>> } >>>>>> This is the maximum you can free, cause all other page tables are >>>>>> not completely covered by the range and so potentially still in use. >>>>>> >>>>>> And I have the strong suspicion that this is what your patch is >>>>>> actually doing wrong. In other words you are also freeing page >>>>>> tables which are only partially covered by the range and so >>>>>> potentially still in use. >>>>>> >>>>>> Since we don'
Re: [PATCH] drm/amdgpu: remove PT BOs when unmapping
Actually I do prevent to remove in-use pts by this: + r = amdgpu_vm_remove_ptes(adev, vm, + (mapping->start + 0x1ff) & (~0x1ffll), + (mapping->last + 1) & (~0x1ffll)); Which is only removing aligned page table for 2M. And I have tested it at least on KFD tests without anything broken. By the way, I am not familiar with memory staff. This patch is the best I can do for now. Could you take a look at the Jira ticket SWDEV-201443 ? and find the better solution. Thanks! Regards, Eric On 2019-10-30 1:57 p.m., Christian König wrote: One thing I've forgotten: What you could maybe do to improve the situation is to join adjacent ranges in amdgpu_vm_clear_freed(), but I'm not sure how the chances are that the ranges are freed all together. The only other alternative I can see would be to check the mappings of a range in amdgpu_update_ptes() and see if you could walk the tree up if the valid flag is not set and there are no mappings left for a page table. Regards, Christian. Am 30.10.19 um 18:42 schrieb Koenig, Christian: The vaild flag doesn't take effect in this function. That's irrelevant. See what amdgpu_vm_update_ptes() does is to first determine the fragment size: amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end); Then we walk down the tree: amdgpu_vm_pt_start(adev, params->vm, start, &cursor); while (cursor.pfn < end) { And make sure that the page tables covering the address range are actually allocated: r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor); Then we update the tables with the flags and addresses and free up subsequent tables in the case of huge pages or freed up areas: /* Free all child entries */ while (cursor.pfn < frag_start) { amdgpu_vm_free_pts(adev, params->vm, &cursor); amdgpu_vm_pt_next(adev, &cursor); } This is the maximum you can free, cause all other page tables are not completely covered by the range and so potentially still in use. And I have the strong suspicion that this is what your patch is actually doing wrong. In other words you are also freeing page tables which are only partially covered by the range and so potentially still in use. Since we don't have any tracking how many entries in a page table are currently valid and how many are invalid we actually can't implement what you are trying to do here. So the patch is definitely somehow broken. Regards, Christian. Am 30.10.19 um 17:55 schrieb Huang, JinHuiEric: The vaild flag doesn't take effect in this function. amdgpu_vm_alloc_pts() is always executed that only depended on "cursor.pfn < end". The valid flag has only been checked on here for asic below GMC v9: if (adev->asic_type < CHIP_VEGA10 && (flags & AMDGPU_PTE_VALID))... Regards, Eric On 2019-10-30 12:30 p.m., Koenig, Christian wrote: Am 30.10.2019 17:19 schrieb "Huang, JinHuiEric" <mailto:jinhuieric.hu...@amd.com>: I tested it that it saves a lot of vram on KFD big buffer stress test. I think there are two reasons: 1. Calling amdgpu_vm_update_ptes() during unmapping will allocate unnecessary pts, because there is no flag to determine if the VA is mapping or unmapping in function amdgpu_vm_update_ptes(). It saves the most of memory. That's not correct. The valid flag is used for this. 2. Intentionally removing those unmapping pts is logical expectation, although it is not removing so much pts. Well I actually don't see a change to what update_ptes is doing and have the strong suspicion that the patch is simply broken. You either free page tables which are potentially still in use or update_pte doesn't free page tables when the valid but is not set. Regards, Christian. Regards, Eric On 2019-10-30 11:57 a.m., Koenig, Christian wrote: Am 30.10.2019 16:47 schrieb "Kuehling, Felix" <mailto:felix.kuehl...@amd.com>: On 2019-10-30 9:52 a.m., Christian König wrote: > Am 29.10.19 um 21:06 schrieb Huang, JinHuiEric: >> The issue is PT BOs are not freed when unmapping VA, >> which causes vram usage accumulated is huge in some >> memory stress test, such as kfd big buffer stress test. >> Function amdgpu_vm_bo_update_mapping() is called by both >> amdgpu_vm_bo_update() and amdgpu_vm_clear_freed(). The >> solution is replacing amdgpu_vm_bo_update_mapping() in >> amdgpu_vm_clear_freed() with removing PT BOs function >> to save vram usage. > > NAK, that is intentional behavior. > > Otherwise we can run into out of memory situations when page tables > need to be allocated again under stress. That's a bit a
Re: [PATCH] drm/amdgpu: remove PT BOs when unmapping
The vaild flag doesn't take effect in this function. amdgpu_vm_alloc_pts() is always executed that only depended on "cursor.pfn < end". The valid flag has only been checked on here for asic below GMC v9: if (adev->asic_type < CHIP_VEGA10 && (flags & AMDGPU_PTE_VALID))... Regards, Eric On 2019-10-30 12:30 p.m., Koenig, Christian wrote: Am 30.10.2019 17:19 schrieb "Huang, JinHuiEric" <mailto:jinhuieric.hu...@amd.com>: I tested it that it saves a lot of vram on KFD big buffer stress test. I think there are two reasons: 1. Calling amdgpu_vm_update_ptes() during unmapping will allocate unnecessary pts, because there is no flag to determine if the VA is mapping or unmapping in function amdgpu_vm_update_ptes(). It saves the most of memory. That's not correct. The valid flag is used for this. 2. Intentionally removing those unmapping pts is logical expectation, although it is not removing so much pts. Well I actually don't see a change to what update_ptes is doing and have the strong suspicion that the patch is simply broken. You either free page tables which are potentially still in use or update_pte doesn't free page tables when the valid but is not set. Regards, Christian. Regards, Eric On 2019-10-30 11:57 a.m., Koenig, Christian wrote: Am 30.10.2019 16:47 schrieb "Kuehling, Felix" <mailto:felix.kuehl...@amd.com>: On 2019-10-30 9:52 a.m., Christian König wrote: > Am 29.10.19 um 21:06 schrieb Huang, JinHuiEric: >> The issue is PT BOs are not freed when unmapping VA, >> which causes vram usage accumulated is huge in some >> memory stress test, such as kfd big buffer stress test. >> Function amdgpu_vm_bo_update_mapping() is called by both >> amdgpu_vm_bo_update() and amdgpu_vm_clear_freed(). The >> solution is replacing amdgpu_vm_bo_update_mapping() in >> amdgpu_vm_clear_freed() with removing PT BOs function >> to save vram usage. > > NAK, that is intentional behavior. > > Otherwise we can run into out of memory situations when page tables > need to be allocated again under stress. That's a bit arbitrary and inconsistent. We are freeing page tables in other situations, when a mapping uses huge pages in amdgpu_vm_update_ptes. Why not when a mapping is destroyed completely? I'm actually a bit surprised that the huge-page handling in amdgpu_vm_update_ptes isn't kicking in to free up lower-level page tables when a BO is unmapped. Well it does free the lower level, and that is already causing problems (that's why I added the reserved space). What we don't do is freeing the higher levels. E.g. when you free a 2MB BO we free the lowest level, if we free a 1GB BO we free the two lowest levels etc... The problem with freeing the higher levels is that you don't know who is also using this. E.g. we would need to check all entries when we unmap one. It's simply not worth it for a maximum saving of 2MB per VM. Writing this I'm actually wondering how you ended up in this issue? There shouldn't be much savings from this. Regards, Christian. Regards, Felix > > Regards, > Christian. > >> >> Change-Id: Ic24e35bff8ca85265b418a642373f189d972a924 >> Signed-off-by: Eric Huang >> <mailto:jinhuieric.hu...@amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 56 >> +- >> 1 file changed, 48 insertions(+), 8 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> index 0f4c3b2..8a480c7 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> @@ -1930,6 +1930,51 @@ static void amdgpu_vm_prt_fini(struct >> amdgpu_device *adev, struct amdgpu_vm *vm) >> } >> /** >> + * amdgpu_vm_remove_ptes - free PT BOs >> + * >> + * @adev: amdgpu device structure >> + * @vm: amdgpu vm structure >> + * @start: start of mapped range >> + * @end: end of mapped entry >> + * >> + * Free the page table level. >> + */ >> +static int amdgpu_vm_remove_ptes(struct amdgpu_device *adev, >> +struct amdgpu_vm *vm, uint64_t start, uint64_t end) >> +{ >> +struct amdgpu_vm_pt_cursor cursor; >> +unsigned shift, num_entries; >> + >> +amdgpu_vm_pt_start(adev, vm, start, &cursor); >> +while (cursor.level < AMDGPU_VM_PTB) { >> +if (!amdgpu_vm_pt_descendant(adev, &cursor)) >> +return -ENOENT; >> +} >> + >> +while (cursor.pfn < end) { >> +amdgpu_vm_free_table(cursor.entry); >> +num_entries = amdgpu_vm_num_entries(adev, cursor
Re: [PATCH] drm/amdgpu: remove PT BOs when unmapping
I tested it that it saves a lot of vram on KFD big buffer stress test. I think there are two reasons: 1. Calling amdgpu_vm_update_ptes() during unmapping will allocate unnecessary pts, because there is no flag to determine if the VA is mapping or unmapping in function amdgpu_vm_update_ptes(). It saves the most of memory. 2. Intentionally removing those unmapping pts is logical expectation, although it is not removing so much pts. Regards, Eric On 2019-10-30 11:57 a.m., Koenig, Christian wrote: Am 30.10.2019 16:47 schrieb "Kuehling, Felix" <mailto:felix.kuehl...@amd.com>: On 2019-10-30 9:52 a.m., Christian König wrote: > Am 29.10.19 um 21:06 schrieb Huang, JinHuiEric: >> The issue is PT BOs are not freed when unmapping VA, >> which causes vram usage accumulated is huge in some >> memory stress test, such as kfd big buffer stress test. >> Function amdgpu_vm_bo_update_mapping() is called by both >> amdgpu_vm_bo_update() and amdgpu_vm_clear_freed(). The >> solution is replacing amdgpu_vm_bo_update_mapping() in >> amdgpu_vm_clear_freed() with removing PT BOs function >> to save vram usage. > > NAK, that is intentional behavior. > > Otherwise we can run into out of memory situations when page tables > need to be allocated again under stress. That's a bit arbitrary and inconsistent. We are freeing page tables in other situations, when a mapping uses huge pages in amdgpu_vm_update_ptes. Why not when a mapping is destroyed completely? I'm actually a bit surprised that the huge-page handling in amdgpu_vm_update_ptes isn't kicking in to free up lower-level page tables when a BO is unmapped. Well it does free the lower level, and that is already causing problems (that's why I added the reserved space). What we don't do is freeing the higher levels. E.g. when you free a 2MB BO we free the lowest level, if we free a 1GB BO we free the two lowest levels etc... The problem with freeing the higher levels is that you don't know who is also using this. E.g. we would need to check all entries when we unmap one. It's simply not worth it for a maximum saving of 2MB per VM. Writing this I'm actually wondering how you ended up in this issue? There shouldn't be much savings from this. Regards, Christian. Regards, Felix > > Regards, > Christian. > >> >> Change-Id: Ic24e35bff8ca85265b418a642373f189d972a924 >> Signed-off-by: Eric Huang >> <mailto:jinhuieric.hu...@amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 56 >> +- >> 1 file changed, 48 insertions(+), 8 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> index 0f4c3b2..8a480c7 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> @@ -1930,6 +1930,51 @@ static void amdgpu_vm_prt_fini(struct >> amdgpu_device *adev, struct amdgpu_vm *vm) >> } >> /** >> + * amdgpu_vm_remove_ptes - free PT BOs >> + * >> + * @adev: amdgpu device structure >> + * @vm: amdgpu vm structure >> + * @start: start of mapped range >> + * @end: end of mapped entry >> + * >> + * Free the page table level. >> + */ >> +static int amdgpu_vm_remove_ptes(struct amdgpu_device *adev, >> +struct amdgpu_vm *vm, uint64_t start, uint64_t end) >> +{ >> +struct amdgpu_vm_pt_cursor cursor; >> +unsigned shift, num_entries; >> + >> +amdgpu_vm_pt_start(adev, vm, start, &cursor); >> +while (cursor.level < AMDGPU_VM_PTB) { >> +if (!amdgpu_vm_pt_descendant(adev, &cursor)) >> +return -ENOENT; >> +} >> + >> +while (cursor.pfn < end) { >> +amdgpu_vm_free_table(cursor.entry); >> +num_entries = amdgpu_vm_num_entries(adev, cursor.level - 1); >> + >> +if (cursor.entry != &cursor.parent->entries[num_entries - 1]) { >> +/* Next ptb entry */ >> +shift = amdgpu_vm_level_shift(adev, cursor.level - 1); >> +cursor.pfn += 1ULL << shift; >> +cursor.pfn &= ~((1ULL << shift) - 1); >> +cursor.entry++; >> +} else { >> +/* Next ptb entry in next pd0 entry */ >> +amdgpu_vm_pt_ancestor(&cursor); >> +shift = amdgpu_vm_level_shift(adev, cursor.level - 1); >> +cursor.pfn += 1ULL << shift; >> +cursor.pfn &= ~((1ULL << shift) - 1); >> +amdgpu_vm_pt_descendant(adev, &cursor); >> +} >> +
[PATCH] drm/amdgpu: remove PT BOs when unmapping
The issue is PT BOs are not freed when unmapping VA, which causes vram usage accumulated is huge in some memory stress test, such as kfd big buffer stress test. Function amdgpu_vm_bo_update_mapping() is called by both amdgpu_vm_bo_update() and amdgpu_vm_clear_freed(). The solution is replacing amdgpu_vm_bo_update_mapping() in amdgpu_vm_clear_freed() with removing PT BOs function to save vram usage. Change-Id: Ic24e35bff8ca85265b418a642373f189d972a924 Signed-off-by: Eric Huang --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 56 +- 1 file changed, 48 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0f4c3b2..8a480c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1930,6 +1930,51 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) } /** + * amdgpu_vm_remove_ptes - free PT BOs + * + * @adev: amdgpu device structure + * @vm: amdgpu vm structure + * @start: start of mapped range + * @end: end of mapped entry + * + * Free the page table level. + */ +static int amdgpu_vm_remove_ptes(struct amdgpu_device *adev, + struct amdgpu_vm *vm, uint64_t start, uint64_t end) +{ + struct amdgpu_vm_pt_cursor cursor; + unsigned shift, num_entries; + + amdgpu_vm_pt_start(adev, vm, start, &cursor); + while (cursor.level < AMDGPU_VM_PTB) { + if (!amdgpu_vm_pt_descendant(adev, &cursor)) + return -ENOENT; + } + + while (cursor.pfn < end) { + amdgpu_vm_free_table(cursor.entry); + num_entries = amdgpu_vm_num_entries(adev, cursor.level - 1); + + if (cursor.entry != &cursor.parent->entries[num_entries - 1]) { + /* Next ptb entry */ + shift = amdgpu_vm_level_shift(adev, cursor.level - 1); + cursor.pfn += 1ULL << shift; + cursor.pfn &= ~((1ULL << shift) - 1); + cursor.entry++; + } else { + /* Next ptb entry in next pd0 entry */ + amdgpu_vm_pt_ancestor(&cursor); + shift = amdgpu_vm_level_shift(adev, cursor.level - 1); + cursor.pfn += 1ULL << shift; + cursor.pfn &= ~((1ULL << shift) - 1); + amdgpu_vm_pt_descendant(adev, &cursor); + } + } + + return 0; +} + +/** * amdgpu_vm_clear_freed - clear freed BOs in the PT * * @adev: amdgpu_device pointer @@ -1949,7 +1994,6 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct dma_fence **fence) { struct amdgpu_bo_va_mapping *mapping; - uint64_t init_pte_value = 0; struct dma_fence *f = NULL; int r; @@ -1958,13 +2002,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); - if (vm->pte_support_ats && - mapping->start < AMDGPU_GMC_HOLE_START) - init_pte_value = AMDGPU_PTE_DEFAULT_ATC; + r = amdgpu_vm_remove_ptes(adev, vm, + (mapping->start + 0x1ff) & (~0x1ffll), + (mapping->last + 1) & (~0x1ffll)); - r = amdgpu_vm_bo_update_mapping(adev, vm, false, NULL, - mapping->start, mapping->last, - init_pte_value, 0, NULL, &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); if (r) { dma_fence_put(f); @@ -1980,7 +2021,6 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, } return 0; - } /** -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: fix gfx ib test failed in sriov
It partially reverts the regression of commit e4a67e6cf14c258619f ("drm/amdgpu/psp: move TMR to cpu invisible vram region") which causes gfx ib test failed when driver loading in sriov system. Signed-off-by: Eric Huang --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 16 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 1 + 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 9f7cc5b..9f91ced 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -261,9 +261,14 @@ static int psp_tmr_init(struct psp_context *psp) } } - ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &psp->tmr_bo, &psp->tmr_mc_addr, NULL); + if (amdgpu_sriov_vf(psp->adev)) + ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); + else + ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &psp->tmr_bo, &psp->tmr_mc_addr, NULL); return ret; } @@ -1216,7 +1221,10 @@ static int psp_hw_fini(void *handle) psp_ring_destroy(psp, PSP_RING_TYPE__KM); - amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, NULL); + if (amdgpu_sriov_vf(adev)) + amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); + else + amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, NULL); amdgpu_bo_free_kernel(&psp->fw_pri_bo, &psp->fw_pri_mc_addr, &psp->fw_pri_buf); amdgpu_bo_free_kernel(&psp->fence_buf_bo, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index bc0947f..b73d4aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -171,6 +171,7 @@ struct psp_context /* tmr buffer */ struct amdgpu_bo*tmr_bo; uint64_ttmr_mc_addr; + void*tmr_buf; /* asd firmware and buffer */ const struct firmware *asd_fw; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdkfd: fix cp hang in eviction
ping. On 2019-07-10 11:20 a.m., Huang, JinHuiEric wrote: > The cp hang occurs in OCL conformance test only on supermicro > platform which has 40 cores and the test generates 40 threads. > The root cause is race condition in non-protected flags. > > The fix is to add flags of is_evicted and is_active(init_mqd()) > into protected area. > > Signed-off-by: Eric Huang > --- > drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 16 +--- > 1 file changed, 9 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > index 9ffdda5..f23e17b 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > @@ -1157,12 +1157,7 @@ static int create_queue_cpsch(struct > device_queue_manager *dqm, struct queue *q, > > mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( > q->properties.type)]; > - /* > - * Eviction state logic: mark all queues as evicted, even ones > - * not currently active. Restoring inactive queues later only > - * updates the is_evicted flag but is a no-op otherwise. > - */ > - q->properties.is_evicted = !!qpd->evicted; > + > if (q->properties.type == KFD_QUEUE_TYPE_SDMA || > q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) > dqm->asic_ops.init_sdma_vm(dqm, q, qpd); > @@ -1173,9 +1168,16 @@ static int create_queue_cpsch(struct > device_queue_manager *dqm, struct queue *q, > retval = -ENOMEM; > goto out_deallocate_doorbell; > } > + > + dqm_lock(dqm); > + /* > + * Eviction state logic: mark all queues as evicted, even ones > + * not currently active. Restoring inactive queues later only > + * updates the is_evicted flag but is a no-op otherwise. > + */ > + q->properties.is_evicted = !!qpd->evicted; > mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, > &q->gart_mqd_addr, &q->properties); > - dqm_lock(dqm); > > list_add(&q->list, &qpd->queues_list); > qpd->queue_count++; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Recall: [PATCH] drm/amdkfd: fix cp hang in eviction
Huang, JinHuiEric would like to recall the message, "[PATCH] drm/amdkfd: fix cp hang in eviction". ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdkfd: fix cp hang in eviction
The cp hang occurs in OCL conformance test only on supermicro platform which has 40 cores and the test generates 40 threads. The root cause is race condition in non-protected flags. The fix is to add flags of is_evicted and is_active(init_mqd()) into protected area. Signed-off-by: Eric Huang --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 16 +--- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 9ffdda5..f23e17b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1157,12 +1157,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; - /* -* Eviction state logic: mark all queues as evicted, even ones -* not currently active. Restoring inactive queues later only -* updates the is_evicted flag but is a no-op otherwise. -*/ - q->properties.is_evicted = !!qpd->evicted; + if (q->properties.type == KFD_QUEUE_TYPE_SDMA || q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) dqm->asic_ops.init_sdma_vm(dqm, q, qpd); @@ -1173,9 +1168,16 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, retval = -ENOMEM; goto out_deallocate_doorbell; } + + dqm_lock(dqm); + /* +* Eviction state logic: mark all queues as evicted, even ones +* not currently active. Restoring inactive queues later only +* updates the is_evicted flag but is a no-op otherwise. +*/ + q->properties.is_evicted = !!qpd->evicted; mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); - dqm_lock(dqm); list_add(&q->list, &qpd->queues_list); qpd->queue_count++; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdkfd: fix cp hang in eviction
The cp hang occurs in OCL conformance test only on supermicro platform which has 40 cores and the test generates 40 threads. The root cause is race condition in non-protected flags. The fix is to add flags of is_evicted and is_active(init_mqd()) into protected area. Signed-off-by: Eric Huang --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 17 ++--- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 9ffdda5..535c981 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1157,12 +1157,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; - /* -* Eviction state logic: mark all queues as evicted, even ones -* not currently active. Restoring inactive queues later only -* updates the is_evicted flag but is a no-op otherwise. -*/ - q->properties.is_evicted = !!qpd->evicted; + if (q->properties.type == KFD_QUEUE_TYPE_SDMA || q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) dqm->asic_ops.init_sdma_vm(dqm, q, qpd); @@ -1173,9 +1168,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, retval = -ENOMEM; goto out_deallocate_doorbell; } + + dqm_lock(dqm); + /* +* Eviction state logic: mark all queues as evicted, even ones +* not currently active. Restoring inactive queues later only +* updates the is_evicted flag but is a no-op otherwise. +*/ + q->properties.is_evicted = !!qpd->evicted; + q->properties.is_suspended = false; mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); - dqm_lock(dqm); list_add(&q->list, &qpd->queues_list); qpd->queue_count++; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdkfd: fix a compiling error when CONFIG_HSA_AMD disalbed
Oops. I submitted it moments ago. Thanks for your catch anyway. Eric On 2019-03-06 11:34 a.m., William Lewis wrote: Typo in patch description: s/disalbed/disabled/ On 3/6/19 10:11 AM, Deucher, Alexander wrote: Add a line like: Fixes: 9032ea09e2d2ef ("drm/amdkfd: add RAS ECC event support") With that added: Reviewed-by: Alex Deucher <mailto:alexander.deuc...@amd.com> From: amd-gfx <mailto:amd-gfx-boun...@lists.freedesktop.org> on behalf of Huang, JinHuiEric <mailto:jinhuieric.hu...@amd.com> Sent: Wednesday, March 6, 2019 10:59 AM To: amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> Cc: Huang, JinHuiEric Subject: [PATCH] drm/amdkfd: fix a compiling error when CONFIG_HSA_AMD disalbed It fixes a commpiling error on commit 9032ea09e2d2ef0d10e5cd793713bf2eb21643c5 drm/amdkfd: add RAS ECC event support Change-Id: I8792767726e28eed3a3fcf9072f608701be13c79 Signed-off-by: Eric Huang <mailto:jinhuieric.hu...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index fe1d736..acf8ae0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -640,4 +640,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd) void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) { } + +void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) +{ +} #endif -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> https://lists.freedesktop.org/mailman/listinfo/amd-gfx<https://nam02.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7C%7C2cf93d5576b848c6d66708d6a24e5eb2%7C84df9e7fe9f640afb435%7C1%7C0%7C636874854807263322&sdata=AV62VQT1o%2F6gVK1eHdzGT4vmCEJveX6C26npgyRgZtY%3D&reserved=0> ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> https://nam02.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7C%7C2cf93d5576b848c6d66708d6a24e5eb2%7C84df9e7fe9f640afb435%7C1%7C0%7C636874854807293349&sdata=7nutYNQv0XODz%2BUzQTTbIrahZI%2B6%2BG9cETOEkBnUAU4%3D&reserved=0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdkfd: fix a compiling error when CONFIG_HSA_AMD disalbed
It fixes a commpiling error on commit 9032ea09e2d2ef0d10e5cd793713bf2eb21643c5 drm/amdkfd: add RAS ECC event support Change-Id: I8792767726e28eed3a3fcf9072f608701be13c79 Signed-off-by: Eric Huang --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index fe1d736..acf8ae0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -640,4 +640,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd) void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) { } + +void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) +{ +} #endif -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/powerplay: print current clock level when dpm is disabled on vg20
Typo "difabled" -> "disabled" in description. With that fix it is Reviewed-by: Eric Huang On 2019-02-19 4:09 p.m., Liu, Shaoyun wrote: > When DPM for the specific clock is difabled, driver should still print out > current clock info for rocm-smi support on vega20 > > Change-Id: I8669c77bf153caa2cd63a575802eb58747151239 > Signed-off-by: shaoyunl > --- > drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 56 > +++--- > 1 file changed, 28 insertions(+), 28 deletions(-) > > diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c > b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c > index aad79aff..c95e0f3 100644 > --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c > +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c > @@ -2641,9 +2641,8 @@ static int vega20_get_sclks(struct pp_hwmgr *hwmgr, > struct vega20_single_dpm_table *dpm_table = > &(data->dpm_table.gfx_table); > int i, count; > > - PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_GFXCLK].enabled, > - "[GetSclks]: gfxclk dpm not enabled!\n", > - return -EPERM); > + if (!data->smu_features[GNLD_DPM_GFXCLK].enabled) > + return -1; > > count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : > dpm_table->count; > clocks->num_levels = count; > @@ -2670,9 +2669,8 @@ static int vega20_get_memclocks(struct pp_hwmgr *hwmgr, > struct vega20_single_dpm_table *dpm_table = > &(data->dpm_table.mem_table); > int i, count; > > - PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_UCLK].enabled, > - "[GetMclks]: uclk dpm not enabled!\n", > - return -EPERM); > + if (!data->smu_features[GNLD_DPM_UCLK].enabled) > + return -1; > > count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : > dpm_table->count; > clocks->num_levels = data->mclk_latency_table.count = count; > @@ -2696,9 +2694,8 @@ static int vega20_get_dcefclocks(struct pp_hwmgr *hwmgr, > struct vega20_single_dpm_table *dpm_table = > &(data->dpm_table.dcef_table); > int i, count; > > - PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_DCEFCLK].enabled, > - "[GetDcfclocks]: dcefclk dpm not enabled!\n", > - return -EPERM); > + if (!data->smu_features[GNLD_DPM_DCEFCLK].enabled) > + return -1; > > count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : > dpm_table->count; > clocks->num_levels = count; > @@ -2719,9 +2716,8 @@ static int vega20_get_socclocks(struct pp_hwmgr *hwmgr, > struct vega20_single_dpm_table *dpm_table = > &(data->dpm_table.soc_table); > int i, count; > > - PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_SOCCLK].enabled, > - "[GetSocclks]: socclk dpm not enabled!\n", > - return -EPERM); > + if (!data->smu_features[GNLD_DPM_SOCCLK].enabled) > + return -1; > > count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : > dpm_table->count; > clocks->num_levels = count; > @@ -3137,10 +3133,11 @@ static int vega20_print_clock_levels(struct pp_hwmgr > *hwmgr, > "Attempt to get current gfx clk Failed!", > return ret); > > - ret = vega20_get_sclks(hwmgr, &clocks); > - PP_ASSERT_WITH_CODE(!ret, > - "Attempt to get gfx clk levels Failed!", > - return ret); > + if (vega20_get_sclks(hwmgr, &clocks)) { > + size += sprintf(buf + size, "0: %uMhz * (DPM > disabled)\n", > + now / 100); > + break; > + } > > for (i = 0; i < clocks.num_levels; i++) > size += sprintf(buf + size, "%d: %uMhz %s\n", > @@ -3154,10 +3151,11 @@ static int vega20_print_clock_levels(struct pp_hwmgr > *hwmgr, > "Attempt to get current mclk freq Failed!", > return ret); > > - ret = vega20_get_memclocks(hwmgr, &clocks); > - PP_ASSERT_WITH_CODE(!ret, > - "Attempt to get memory clk levels Failed!", > - return ret); > + if (vega20_get_memclocks(hwmgr, &clocks)) { > + size += sprintf(buf + size, "0: %uMhz * (DPM > disabled)\n", > + now / 100); > + break; > + } > > for (i = 0; i < clocks.num_levels; i++) > size += sprintf(buf + size, "%d: %uMhz %s\n", > @@ -3171,10 +3169,11 @@ static int vega20_print_clock_levels(struct pp_hwmgr > *hwmgr, > "Attempt to get current socclk freq Failed!", > return ret); > > - ret = vega20_get_socclocks(hwmgr, &clocks); > - PP_ASS
Re: [PATCH] drm/powerplay: Get fix clock info when dpm is disabled for the clock
"rocm-smi -s" doesn't parse output from driver and it directly display output, so it will not break rocm-smi. Regards, Eric On 2019-02-19 2:51 p.m., Kuehling, Felix wrote: > [+Kent] > > On 2019-02-19 12:08 p.m., Huang, JinHuiEric wrote: > >> It seems redundant to print out all DPM levels when DPM is disabled. >> Probably it looks meaningful to print out "Mhz (DPM >> disabled)". > If you change the format, you risk breaking tools such as rocm-smi. Is > there some other way for rocm-smi to tell that DPM is disabled? > > Regards, > Felix > > >> Regards, >> >> Eric >> >> On 2019-02-15 4:25 p.m., Liu, Shaoyun wrote: >>> When DPM for the specific clock is difabled, driver should still able to >>> get fix clock info from the pptable >>> >>> Change-Id: Ic609203b3b87aa75b0cfd57b57717b3bb89daf48 >>> Signed-off-by: shaoyunl >>> --- >>> drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 16 >>> 1 file changed, 16 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c >>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c >>> index aad79aff..2eae0b4 100644 >>> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c >>> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c >>> @@ -2641,10 +2641,6 @@ static int vega20_get_sclks(struct pp_hwmgr *hwmgr, >>> struct vega20_single_dpm_table *dpm_table = >>> &(data->dpm_table.gfx_table); >>> int i, count; >>> >>> - PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_GFXCLK].enabled, >>> - "[GetSclks]: gfxclk dpm not enabled!\n", >>> - return -EPERM); >>> - >>> count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : >>> dpm_table->count; >>> clocks->num_levels = count; >>> >>> @@ -2670,10 +2666,6 @@ static int vega20_get_memclocks(struct pp_hwmgr >>> *hwmgr, >>> struct vega20_single_dpm_table *dpm_table = >>> &(data->dpm_table.mem_table); >>> int i, count; >>> >>> - PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_UCLK].enabled, >>> - "[GetMclks]: uclk dpm not enabled!\n", >>> - return -EPERM); >>> - >>> count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : >>> dpm_table->count; >>> clocks->num_levels = data->mclk_latency_table.count = count; >>> >>> @@ -2696,10 +2688,6 @@ static int vega20_get_dcefclocks(struct pp_hwmgr >>> *hwmgr, >>> struct vega20_single_dpm_table *dpm_table = >>> &(data->dpm_table.dcef_table); >>> int i, count; >>> >>> - PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_DCEFCLK].enabled, >>> - "[GetDcfclocks]: dcefclk dpm not enabled!\n", >>> - return -EPERM); >>> - >>> count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : >>> dpm_table->count; >>> clocks->num_levels = count; >>> >>> @@ -2719,10 +2707,6 @@ static int vega20_get_socclocks(struct pp_hwmgr >>> *hwmgr, >>> struct vega20_single_dpm_table *dpm_table = >>> &(data->dpm_table.soc_table); >>> int i, count; >>> >>> - PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_SOCCLK].enabled, >>> - "[GetSocclks]: socclk dpm not enabled!\n", >>> - return -EPERM); >>> - >>> count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : >>> dpm_table->count; >>> clocks->num_levels = count; >>> >> ___ >> amd-gfx mailing list >> amd-gfx@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/powerplay: Get fix clock info when dpm is disabled for the clock
It seems redundant to print out all DPM levels when DPM is disabled. Probably it looks meaningful to print out "Mhz (DPM disabled)". Regards, Eric On 2019-02-15 4:25 p.m., Liu, Shaoyun wrote: > When DPM for the specific clock is difabled, driver should still able to > get fix clock info from the pptable > > Change-Id: Ic609203b3b87aa75b0cfd57b57717b3bb89daf48 > Signed-off-by: shaoyunl > --- > drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 16 > 1 file changed, 16 deletions(-) > > diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c > b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c > index aad79aff..2eae0b4 100644 > --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c > +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c > @@ -2641,10 +2641,6 @@ static int vega20_get_sclks(struct pp_hwmgr *hwmgr, > struct vega20_single_dpm_table *dpm_table = > &(data->dpm_table.gfx_table); > int i, count; > > - PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_GFXCLK].enabled, > - "[GetSclks]: gfxclk dpm not enabled!\n", > - return -EPERM); > - > count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : > dpm_table->count; > clocks->num_levels = count; > > @@ -2670,10 +2666,6 @@ static int vega20_get_memclocks(struct pp_hwmgr *hwmgr, > struct vega20_single_dpm_table *dpm_table = > &(data->dpm_table.mem_table); > int i, count; > > - PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_UCLK].enabled, > - "[GetMclks]: uclk dpm not enabled!\n", > - return -EPERM); > - > count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : > dpm_table->count; > clocks->num_levels = data->mclk_latency_table.count = count; > > @@ -2696,10 +2688,6 @@ static int vega20_get_dcefclocks(struct pp_hwmgr > *hwmgr, > struct vega20_single_dpm_table *dpm_table = > &(data->dpm_table.dcef_table); > int i, count; > > - PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_DCEFCLK].enabled, > - "[GetDcfclocks]: dcefclk dpm not enabled!\n", > - return -EPERM); > - > count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : > dpm_table->count; > clocks->num_levels = count; > > @@ -2719,10 +2707,6 @@ static int vega20_get_socclocks(struct pp_hwmgr *hwmgr, > struct vega20_single_dpm_table *dpm_table = > &(data->dpm_table.soc_table); > int i, count; > > - PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_SOCCLK].enabled, > - "[GetSocclks]: socclk dpm not enabled!\n", > - return -EPERM); > - > count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : > dpm_table->count; > clocks->num_levels = count; > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH v2 2/2] drm/amd/powerplay: add override pcie parameters for Vega20 (v2)
Reviewed-by: Eric Huang On 2019-02-05 4:37 p.m., Kasiviswanathan, Harish wrote: > v2: Fix SMU message format > Send override message after SMU enable features > > Change-Id: Ib880c83bc7aa12be370cf6619acfe66e12664c9c > Signed-off-by: Harish Kasiviswanathan > --- > drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 45 > +- > 1 file changed, 27 insertions(+), 18 deletions(-) > > diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c > b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c > index da022ca..e1b1656 100644 > --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c > +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c > @@ -771,40 +771,49 @@ static int vega20_init_smc_table(struct pp_hwmgr *hwmgr) > return 0; > } > > +/* > + * Override PCIe link speed and link width for DPM Level 1. PPTable entries > + * reflect the ASIC capabilities and not the system capabilities. For e.g. > + * Vega20 board in a PCI Gen3 system. In this case, when SMU's tries to > switch > + * to DPM1, it fails as system doesn't support Gen4. > + */ > static int vega20_override_pcie_parameters(struct pp_hwmgr *hwmgr) > { > struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); > - uint32_t pcie_speed = 0, pcie_width = 0, pcie_arg; > + uint32_t pcie_gen = 0, pcie_width = 0, smu_pcie_arg; > int ret; > > if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4) > - pcie_speed = 16; > + pcie_gen = 3; > else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) > - pcie_speed = 8; > + pcie_gen = 2; > else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) > - pcie_speed = 5; > + pcie_gen = 1; > else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1) > - pcie_speed = 2; > + pcie_gen = 0; > > if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X32) > - pcie_width = 32; > + pcie_width = 7; > else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16) > - pcie_width = 16; > + pcie_width = 6; > else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12) > - pcie_width = 12; > + pcie_width = 5; > else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8) > - pcie_width = 8; > - else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4) > pcie_width = 4; > + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4) > + pcie_width = 3; > else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2) > pcie_width = 2; > else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1) > pcie_width = 1; > > - pcie_arg = pcie_width | (pcie_speed << 8); > - > + /* Bit 31:16: LCLK DPM level. 0 is DPM0, and 1 is DPM1 > + * Bit 15:8: PCIE GEN, 0 to 3 corresponds to GEN1 to GEN4 > + * Bit 7:0: PCIE lane width, 1 to 7 corresponds is x1 to x32 > + */ > + smu_pcie_arg = (1 << 16) | (pcie_gen << 8) | pcie_width; > ret = smum_send_msg_to_smc_with_parameter(hwmgr, > - PPSMC_MSG_OverridePcieParameters, pcie_arg); > + PPSMC_MSG_OverridePcieParameters, smu_pcie_arg); > PP_ASSERT_WITH_CODE(!ret, > "[OverridePcieParameters] Attempt to override pcie params > failed!", > return ret); > @@ -1611,11 +1620,6 @@ static int vega20_enable_dpm_tasks(struct pp_hwmgr > *hwmgr) > "[EnableDPMTasks] Failed to initialize SMC table!", > return result); > > - result = vega20_override_pcie_parameters(hwmgr); > - PP_ASSERT_WITH_CODE(!result, > - "[EnableDPMTasks] Failed to override pcie parameters!", > - return result); > - > result = vega20_run_btc(hwmgr); > PP_ASSERT_WITH_CODE(!result, > "[EnableDPMTasks] Failed to run btc!", > @@ -1631,6 +1635,11 @@ static int vega20_enable_dpm_tasks(struct pp_hwmgr > *hwmgr) > "[EnableDPMTasks] Failed to enable all smu features!", > return result); > > + result = vega20_override_pcie_parameters(hwmgr); > + PP_ASSERT_WITH_CODE(!result, > + "[EnableDPMTasks] Failed to override pcie parameters!", > + return result); > + > result = vega20_notify_smc_display_change(hwmgr); > PP_ASSERT_WITH_CODE(!result, > "[EnableDPMTasks] Failed to notify smc display change!", ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amd/powerplay: add override pcie parameters for Vega20
It is to solve RDMA performance issue. Change-Id: I441d2943e504e2ef7d33de0e8773a0f9b8fdb2ca Signed-off-by: Eric Huang --- drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 46 ++ 1 file changed, 46 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 5085b36..7e59bc8 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -771,6 +771,47 @@ static int vega20_init_smc_table(struct pp_hwmgr *hwmgr) return 0; } +static int vega20_override_pcie_parameters(struct pp_hwmgr *hwmgr) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); + uint32_t pcie_speed = 0, pcie_width = 0, pcie_arg; + int ret; + + if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4) + pcie_speed = 16; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) + pcie_speed = 8; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) + pcie_speed = 5; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1) + pcie_speed = 2; + + if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X32) + pcie_width = 32; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16) + pcie_width = 16; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12) + pcie_width = 12; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8) + pcie_width = 8; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4) + pcie_width = 4; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2) + pcie_width = 2; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1) + pcie_width = 1; + + pcie_arg = pcie_width | (pcie_speed << 8); + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_OverridePcieParameters, pcie_arg); + PP_ASSERT_WITH_CODE(!ret, + "[OverridePcieParameters] Attempt to override pcie params failed!", + return ret); + + return 0; +} + static int vega20_set_allowed_featuresmask(struct pp_hwmgr *hwmgr) { struct vega20_hwmgr *data = @@ -1570,6 +1611,11 @@ static int vega20_enable_dpm_tasks(struct pp_hwmgr *hwmgr) "[EnableDPMTasks] Failed to initialize SMC table!", return result); + result = vega20_override_pcie_parameters(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "[EnableDPMTasks] Failed to override pcie parameters!", + return result); + result = vega20_run_btc(hwmgr); PP_ASSERT_WITH_CODE(!result, "[EnableDPMTasks] Failed to run btc!", -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx