Re: [PATCH] drm/amdgpu: change read of GPU clock counter on Vega10 VF

2019-11-05 Thread Huang, JinHuiEric

On 2019-11-05 6:06 p.m., Alex Deucher wrote:
> On Tue, Nov 5, 2019 at 5:26 PM Huang, JinHuiEric
>  wrote:
>> Using unified VBIOS has performance drop in sriov environment.
>> The fix is switching to another register instead.
>>
>> Signed-off-by: Eric Huang 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 19 ---
>>   1 file changed, 16 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> index 829d623..e44a3ea 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> @@ -3885,9 +3885,22 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct 
>> amdgpu_device *adev)
>>  uint64_t clock;
>>
>>  mutex_lock(&adev->gfx.gpu_clock_mutex);
>> -   WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
>> -   clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
>> -   ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 
>> 32ULL);
>> +   if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
>> +   uint32_t tmp, lsb, msb, i = 0;
>> +   do {
>> +   if (i != 0)
>> +   udelay(1);
>> +   tmp = RREG32_SOC15(GC, 0, 
>> mmRLC_REFCLOCK_TIMESTAMP_MSB);
>> +   lsb = RREG32_SOC15(GC, 0, 
>> mmRLC_REFCLOCK_TIMESTAMP_LSB);
>> +   msb = RREG32_SOC15(GC, 0, 
>> mmRLC_REFCLOCK_TIMESTAMP_MSB);
>> +   i++;
>> +   } while (unlikely(tmp != msb) && (i < adev->usec_timeout));
>> +   clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL);
>> +   } else {
>> +   WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
>> +   clock = (uint64_t)RREG32_SOC15(GC, 0, 
>> mmRLC_GPU_CLOCK_COUNT_LSB) |
>> +   ((uint64_t)RREG32_SOC15(GC, 0, 
>> mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
>> +   }
> Is there a reason we can't use the same regs on bare metal and SR-IOV?
>   I'd like to minimize the deltas if possible.

As Jerry's request, this change will avoid P1 policy protection on 
RLC_GPU_CLOCK_COUNT_LSB/MSB.

Eric

>
> Alex
>
>>  mutex_unlock(&adev->gfx.gpu_clock_mutex);
>>  return clock;
>>   }
>> --
>> 2.7.4
>>
>> ___
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: change read of GPU clock counter on Vega10 VF

2019-11-05 Thread Huang, JinHuiEric
Using unified VBIOS has performance drop in sriov environment.
The fix is switching to another register instead.

Signed-off-by: Eric Huang 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 829d623..e44a3ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3885,9 +3885,22 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct 
amdgpu_device *adev)
uint64_t clock;
 
mutex_lock(&adev->gfx.gpu_clock_mutex);
-   WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
-   clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
-   ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 
32ULL);
+   if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
+   uint32_t tmp, lsb, msb, i = 0;
+   do {
+   if (i != 0)
+   udelay(1);
+   tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
+   lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB);
+   msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
+   i++;
+   } while (unlikely(tmp != msb) && (i < adev->usec_timeout));
+   clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL);
+   } else {
+   WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+   clock = (uint64_t)RREG32_SOC15(GC, 0, 
mmRLC_GPU_CLOCK_COUNT_LSB) |
+   ((uint64_t)RREG32_SOC15(GC, 0, 
mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+   }
mutex_unlock(&adev->gfx.gpu_clock_mutex);
return clock;
 }
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: change read of GPU clock counter on Vega10 VF

2019-11-05 Thread Huang, JinHuiEric
Using unified VBIOS has performance drop in sriov environment.
The fix is switching to another register instead.

Signed-off-by: Eric Huang 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 18 +++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 829d623..6770bd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3885,9 +3885,21 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct 
amdgpu_device *adev)
uint64_t clock;
 
mutex_lock(&adev->gfx.gpu_clock_mutex);
-   WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
-   clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
-   ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 
32ULL);
+   if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
+   uint32_t tmp, lsb, msb, i = 0;
+   do {
+   tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
+   lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB);
+   msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
+   i++;
+   udelay(1);
+   } while (unlikely(tmp != msb) && (i < adev->usec_timeout));
+   clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL);
+   } else {
+   WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+   clock = (uint64_t)RREG32_SOC15(GC, 0, 
mmRLC_GPU_CLOCK_COUNT_LSB) |
+   ((uint64_t)RREG32_SOC15(GC, 0, 
mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+   }
mutex_unlock(&adev->gfx.gpu_clock_mutex);
return clock;
 }
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: remove PT BOs when unmapping

2019-11-05 Thread Huang, JinHuiEric
Hi Christian,

I found the reason why page tables are not freed when unmapping. All the 
pts are reserved, then they are not freed until vm fini. So the 
consequences are old pts and new pts for the same VAs will exist till vm 
fini. In KFD big buffer strees test, multiple times of mapping and 
unmapping a big range of system memory causes huge vram pts usage 
accumulation.

I tried to avoid generating duplicated pts during unmapping in 
amdgpu_vm_update_ptes() by skipping amdgpu_vm_free_pts() and not 
reserving the lowest pts, but they didn't work with VM fault. The only 
way working is skipping whole function amdgpu_vm_update_ptes(), but it 
seems wrong, because we have to update GPU VM MMU.

So there is no bug in amdgpu_vm_update_ptes(), but the accumulation of 
pts vram usage is an overhead. Do you think what we can do to get better 
solution?

Regards,

Eric

On 2019-10-31 10:33 a.m., Huang, JinHuiEric wrote:
> The hardware is vega10 and test is KFDMemoryTest.BigBufferStressTest.
> More detail is on Jira SWDEV-201443.
>
> Regards,
>
> Eric
>
> On 2019-10-31 10:08 a.m., StDenis, Tom wrote:
>> I could try it on my carrizo/polaris setup.  Is there a test procedure I
>> could folllow to trigger the changed code paths?
>>
>>
>> Tom
>>
>> On 2019-10-31 6:41 a.m., Koenig, Christian wrote:
>>> Just tested this and amdgpu_vm_update_ptes() indeed works as expected.
>>>
>>> When you free at least a 2MB the lowest level of page tables is freed
>>> up again.
>>>
>>> BTW: What hardware have you tested this on? On gfx8 and older it is
>>> expected that page tables are never freed.
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 30.10.19 um 19:11 schrieb Christian König:
>>>> Then I don't see how this patch actually changes anything.
>>>>
>>>> Could only be a bug in amdgpu_vm_update_ptes(). Going to investigate
>>>> this, but I won't have time to look into the ticket in detail.
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>> Am 30.10.19 um 19:00 schrieb Huang, JinHuiEric:
>>>>> Actually I do prevent to remove in-use pts by this:
>>>>>
>>>>> +   r = amdgpu_vm_remove_ptes(adev, vm,
>>>>> +   (mapping->start + 0x1ff) & (~0x1ffll),
>>>>> +   (mapping->last + 1) & (~0x1ffll));
>>>>>
>>>>> Which is only removing aligned page table for 2M. And I have tested
>>>>> it at least on KFD tests without anything broken.
>>>>>
>>>>> By the way, I am not familiar with memory staff. This patch is the
>>>>> best I can do for now. Could you take a look at the Jira ticket
>>>>> SWDEV-201443 ? and find the better solution. Thanks!
>>>>>
>>>>> Regards,
>>>>>
>>>>> Eric
>>>>>
>>>>> On 2019-10-30 1:57 p.m., Christian König wrote:
>>>>>> One thing I've forgotten:
>>>>>>
>>>>>> What you could maybe do to improve the situation is to join
>>>>>> adjacent ranges in amdgpu_vm_clear_freed(), but I'm not sure how
>>>>>> the chances are that the ranges are freed all together.
>>>>>>
>>>>>> The only other alternative I can see would be to check the mappings
>>>>>> of a range in amdgpu_update_ptes() and see if you could walk the
>>>>>> tree up if the valid flag is not set and there are no mappings left
>>>>>> for a page table.
>>>>>>
>>>>>> Regards,
>>>>>> Christian.
>>>>>>
>>>>>> Am 30.10.19 um 18:42 schrieb Koenig, Christian:
>>>>>>>> The vaild flag doesn't take effect in this function.
>>>>>>> That's irrelevant.
>>>>>>>
>>>>>>> See what amdgpu_vm_update_ptes() does is to first determine the
>>>>>>> fragment size:
>>>>>>>> amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);
>>>>>>> Then we walk down the tree:
>>>>>>>>       amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
>>>>>>>>       while (cursor.pfn < end) {
>>>>>>> And make sure that the page tables covering the address range are
>>>>>>> actually allocated:
>>>>>>>>   

Re: [PATCH] drm/amdgpu: remove PT BOs when unmapping

2019-10-31 Thread Huang, JinHuiEric
The hardware is vega10 and test is KFDMemoryTest.BigBufferStressTest. 
More detail is on Jira SWDEV-201443.

Regards,

Eric

On 2019-10-31 10:08 a.m., StDenis, Tom wrote:
> I could try it on my carrizo/polaris setup.  Is there a test procedure I
> could folllow to trigger the changed code paths?
>
>
> Tom
>
> On 2019-10-31 6:41 a.m., Koenig, Christian wrote:
>> Just tested this and amdgpu_vm_update_ptes() indeed works as expected.
>>
>> When you free at least a 2MB the lowest level of page tables is freed
>> up again.
>>
>> BTW: What hardware have you tested this on? On gfx8 and older it is
>> expected that page tables are never freed.
>>
>> Regards,
>> Christian.
>>
>> Am 30.10.19 um 19:11 schrieb Christian König:
>>> Then I don't see how this patch actually changes anything.
>>>
>>> Could only be a bug in amdgpu_vm_update_ptes(). Going to investigate
>>> this, but I won't have time to look into the ticket in detail.
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 30.10.19 um 19:00 schrieb Huang, JinHuiEric:
>>>> Actually I do prevent to remove in-use pts by this:
>>>>
>>>> +   r = amdgpu_vm_remove_ptes(adev, vm,
>>>> +   (mapping->start + 0x1ff) & (~0x1ffll),
>>>> +   (mapping->last + 1) & (~0x1ffll));
>>>>
>>>> Which is only removing aligned page table for 2M. And I have tested
>>>> it at least on KFD tests without anything broken.
>>>>
>>>> By the way, I am not familiar with memory staff. This patch is the
>>>> best I can do for now. Could you take a look at the Jira ticket
>>>> SWDEV-201443 ? and find the better solution. Thanks!
>>>>
>>>> Regards,
>>>>
>>>> Eric
>>>>
>>>> On 2019-10-30 1:57 p.m., Christian König wrote:
>>>>> One thing I've forgotten:
>>>>>
>>>>> What you could maybe do to improve the situation is to join
>>>>> adjacent ranges in amdgpu_vm_clear_freed(), but I'm not sure how
>>>>> the chances are that the ranges are freed all together.
>>>>>
>>>>> The only other alternative I can see would be to check the mappings
>>>>> of a range in amdgpu_update_ptes() and see if you could walk the
>>>>> tree up if the valid flag is not set and there are no mappings left
>>>>> for a page table.
>>>>>
>>>>> Regards,
>>>>> Christian.
>>>>>
>>>>> Am 30.10.19 um 18:42 schrieb Koenig, Christian:
>>>>>>> The vaild flag doesn't take effect in this function.
>>>>>> That's irrelevant.
>>>>>>
>>>>>> See what amdgpu_vm_update_ptes() does is to first determine the
>>>>>> fragment size:
>>>>>>> amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);
>>>>>> Then we walk down the tree:
>>>>>>>      amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
>>>>>>>      while (cursor.pfn < end) {
>>>>>> And make sure that the page tables covering the address range are
>>>>>> actually allocated:
>>>>>>>      r = amdgpu_vm_alloc_pts(params->adev, params->vm,
>>>>>>> &cursor);
>>>>>> Then we update the tables with the flags and addresses and free up
>>>>>> subsequent tables in the case of huge pages or freed up areas:
>>>>>>>      /* Free all child entries */
>>>>>>>      while (cursor.pfn < frag_start) {
>>>>>>> amdgpu_vm_free_pts(adev, params->vm, &cursor);
>>>>>>> amdgpu_vm_pt_next(adev, &cursor);
>>>>>>>      }
>>>>>> This is the maximum you can free, cause all other page tables are
>>>>>> not completely covered by the range and so potentially still in use.
>>>>>>
>>>>>> And I have the strong suspicion that this is what your patch is
>>>>>> actually doing wrong. In other words you are also freeing page
>>>>>> tables which are only partially covered by the range and so
>>>>>> potentially still in use.
>>>>>>
>>>>>> Since we don'

Re: [PATCH] drm/amdgpu: remove PT BOs when unmapping

2019-10-30 Thread Huang, JinHuiEric
Actually I do prevent to remove in-use pts by this:

+   r = amdgpu_vm_remove_ptes(adev, vm,
+   (mapping->start + 0x1ff) & (~0x1ffll),
+   (mapping->last + 1) & (~0x1ffll));

Which is only removing aligned page table for 2M. And I have tested it at least 
on KFD tests without anything broken.

By the way, I am not familiar with memory staff. This patch is the best I can 
do for now. Could you take a look at the Jira ticket SWDEV-201443 ? and find 
the better solution. Thanks!

Regards,

Eric

On 2019-10-30 1:57 p.m., Christian König wrote:
One thing I've forgotten:

What you could maybe do to improve the situation is to join adjacent ranges in 
amdgpu_vm_clear_freed(), but I'm not sure how the chances are that the ranges 
are freed all together.

The only other alternative I can see would be to check the mappings of a range 
in amdgpu_update_ptes() and see if you could walk the tree up if the valid flag 
is not set and there are no mappings left for a page table.

Regards,
Christian.

Am 30.10.19 um 18:42 schrieb Koenig, Christian:
The vaild flag doesn't take effect in this function.
That's irrelevant.

See what amdgpu_vm_update_ptes() does is to first determine the fragment size:
amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);

Then we walk down the tree:
amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
while (cursor.pfn < end) {

And make sure that the page tables covering the address range are actually 
allocated:
r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor);

Then we update the tables with the flags and addresses and free up subsequent 
tables in the case of huge pages or freed up areas:
/* Free all child entries */
while (cursor.pfn < frag_start) {
amdgpu_vm_free_pts(adev, params->vm, &cursor);
amdgpu_vm_pt_next(adev, &cursor);
}

This is the maximum you can free, cause all other page tables are not 
completely covered by the range and so potentially still in use.

And I have the strong suspicion that this is what your patch is actually doing 
wrong. In other words you are also freeing page tables which are only partially 
covered by the range and so potentially still in use.

Since we don't have any tracking how many entries in a page table are currently 
valid and how many are invalid we actually can't implement what you are trying 
to do here. So the patch is definitely somehow broken.

Regards,
Christian.

Am 30.10.19 um 17:55 schrieb Huang, JinHuiEric:

The vaild flag doesn't take effect in this function. amdgpu_vm_alloc_pts() is 
always executed that only depended on "cursor.pfn < end". The valid flag has 
only been checked on here for asic below GMC v9:

if (adev->asic_type < CHIP_VEGA10 &&
(flags & AMDGPU_PTE_VALID))...

Regards,

Eric

On 2019-10-30 12:30 p.m., Koenig, Christian wrote:


Am 30.10.2019 17:19 schrieb "Huang, JinHuiEric" 
<mailto:jinhuieric.hu...@amd.com>:

I tested it that it saves a lot of vram on KFD big buffer stress test. I think 
there are two reasons:

1. Calling amdgpu_vm_update_ptes() during unmapping will allocate unnecessary 
pts, because there is no flag to determine if the VA is mapping or unmapping in 
function
amdgpu_vm_update_ptes(). It saves the most of memory.

That's not correct. The valid flag is used for this.


2. Intentionally removing those unmapping pts is logical expectation, although 
it is not removing so much pts.

Well I actually don't see a change to what update_ptes is doing and have the 
strong suspicion that the patch is simply broken.

You either free page tables which are potentially still in use or update_pte 
doesn't free page tables when the valid but is not set.

Regards,
Christian.




Regards,

Eric

On 2019-10-30 11:57 a.m., Koenig, Christian wrote:


Am 30.10.2019 16:47 schrieb "Kuehling, Felix" 
<mailto:felix.kuehl...@amd.com>:
On 2019-10-30 9:52 a.m., Christian König wrote:
> Am 29.10.19 um 21:06 schrieb Huang, JinHuiEric:
>> The issue is PT BOs are not freed when unmapping VA,
>> which causes vram usage accumulated is huge in some
>> memory stress test, such as kfd big buffer stress test.
>> Function amdgpu_vm_bo_update_mapping() is called by both
>> amdgpu_vm_bo_update() and amdgpu_vm_clear_freed(). The
>> solution is replacing amdgpu_vm_bo_update_mapping() in
>> amdgpu_vm_clear_freed() with removing PT BOs function
>> to save vram usage.
>
> NAK, that is intentional behavior.
>
> Otherwise we can run into out of memory situations when page tables
> need to be allocated again under stress.

That's a bit a

Re: [PATCH] drm/amdgpu: remove PT BOs when unmapping

2019-10-30 Thread Huang, JinHuiEric
The vaild flag doesn't take effect in this function. amdgpu_vm_alloc_pts() is 
always executed that only depended on "cursor.pfn < end". The valid flag has 
only been checked on here for asic below GMC v9:

if (adev->asic_type < CHIP_VEGA10 &&
(flags & AMDGPU_PTE_VALID))...

Regards,

Eric

On 2019-10-30 12:30 p.m., Koenig, Christian wrote:


Am 30.10.2019 17:19 schrieb "Huang, JinHuiEric" 
<mailto:jinhuieric.hu...@amd.com>:

I tested it that it saves a lot of vram on KFD big buffer stress test. I think 
there are two reasons:

1. Calling amdgpu_vm_update_ptes() during unmapping will allocate unnecessary 
pts, because there is no flag to determine if the VA is mapping or unmapping in 
function
amdgpu_vm_update_ptes(). It saves the most of memory.

That's not correct. The valid flag is used for this.


2. Intentionally removing those unmapping pts is logical expectation, although 
it is not removing so much pts.

Well I actually don't see a change to what update_ptes is doing and have the 
strong suspicion that the patch is simply broken.

You either free page tables which are potentially still in use or update_pte 
doesn't free page tables when the valid but is not set.

Regards,
Christian.




Regards,

Eric

On 2019-10-30 11:57 a.m., Koenig, Christian wrote:


Am 30.10.2019 16:47 schrieb "Kuehling, Felix" 
<mailto:felix.kuehl...@amd.com>:
On 2019-10-30 9:52 a.m., Christian König wrote:
> Am 29.10.19 um 21:06 schrieb Huang, JinHuiEric:
>> The issue is PT BOs are not freed when unmapping VA,
>> which causes vram usage accumulated is huge in some
>> memory stress test, such as kfd big buffer stress test.
>> Function amdgpu_vm_bo_update_mapping() is called by both
>> amdgpu_vm_bo_update() and amdgpu_vm_clear_freed(). The
>> solution is replacing amdgpu_vm_bo_update_mapping() in
>> amdgpu_vm_clear_freed() with removing PT BOs function
>> to save vram usage.
>
> NAK, that is intentional behavior.
>
> Otherwise we can run into out of memory situations when page tables
> need to be allocated again under stress.

That's a bit arbitrary and inconsistent. We are freeing page tables in
other situations, when a mapping uses huge pages in
amdgpu_vm_update_ptes. Why not when a mapping is destroyed completely?

I'm actually a bit surprised that the huge-page handling in
amdgpu_vm_update_ptes isn't kicking in to free up lower-level page
tables when a BO is unmapped.

Well it does free the lower level, and that is already causing problems (that's 
why I added the reserved space).

What we don't do is freeing the higher levels.

E.g. when you free a 2MB BO we free the lowest level, if we free a 1GB BO we 
free the two lowest levels etc...

The problem with freeing the higher levels is that you don't know who is also 
using this. E.g. we would need to check all entries when we unmap one.

It's simply not worth it for a maximum saving of 2MB per VM.

Writing this I'm actually wondering how you ended up in this issue? There 
shouldn't be much savings from this.

Regards,
Christian.


Regards,
   Felix


>
> Regards,
> Christian.
>
>>
>> Change-Id: Ic24e35bff8ca85265b418a642373f189d972a924
>> Signed-off-by: Eric Huang 
>> <mailto:jinhuieric.hu...@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 56
>> +-
>>   1 file changed, 48 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 0f4c3b2..8a480c7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1930,6 +1930,51 @@ static void amdgpu_vm_prt_fini(struct
>> amdgpu_device *adev, struct amdgpu_vm *vm)
>>   }
>> /**
>> + * amdgpu_vm_remove_ptes - free PT BOs
>> + *
>> + * @adev: amdgpu device structure
>> + * @vm: amdgpu vm structure
>> + * @start: start of mapped range
>> + * @end: end of mapped entry
>> + *
>> + * Free the page table level.
>> + */
>> +static int amdgpu_vm_remove_ptes(struct amdgpu_device *adev,
>> +struct amdgpu_vm *vm, uint64_t start, uint64_t end)
>> +{
>> +struct amdgpu_vm_pt_cursor cursor;
>> +unsigned shift, num_entries;
>> +
>> +amdgpu_vm_pt_start(adev, vm, start, &cursor);
>> +while (cursor.level < AMDGPU_VM_PTB) {
>> +if (!amdgpu_vm_pt_descendant(adev, &cursor))
>> +return -ENOENT;
>> +}
>> +
>> +while (cursor.pfn < end) {
>> +amdgpu_vm_free_table(cursor.entry);
>> +num_entries = amdgpu_vm_num_entries(adev, cursor

Re: [PATCH] drm/amdgpu: remove PT BOs when unmapping

2019-10-30 Thread Huang, JinHuiEric
I tested it that it saves a lot of vram on KFD big buffer stress test. I think 
there are two reasons:

1. Calling amdgpu_vm_update_ptes() during unmapping will allocate unnecessary 
pts, because there is no flag to determine if the VA is mapping or unmapping in 
function
amdgpu_vm_update_ptes(). It saves the most of memory.

2. Intentionally removing those unmapping pts is logical expectation, although 
it is not removing so much pts.

Regards,

Eric

On 2019-10-30 11:57 a.m., Koenig, Christian wrote:


Am 30.10.2019 16:47 schrieb "Kuehling, Felix" 
<mailto:felix.kuehl...@amd.com>:
On 2019-10-30 9:52 a.m., Christian König wrote:
> Am 29.10.19 um 21:06 schrieb Huang, JinHuiEric:
>> The issue is PT BOs are not freed when unmapping VA,
>> which causes vram usage accumulated is huge in some
>> memory stress test, such as kfd big buffer stress test.
>> Function amdgpu_vm_bo_update_mapping() is called by both
>> amdgpu_vm_bo_update() and amdgpu_vm_clear_freed(). The
>> solution is replacing amdgpu_vm_bo_update_mapping() in
>> amdgpu_vm_clear_freed() with removing PT BOs function
>> to save vram usage.
>
> NAK, that is intentional behavior.
>
> Otherwise we can run into out of memory situations when page tables
> need to be allocated again under stress.

That's a bit arbitrary and inconsistent. We are freeing page tables in
other situations, when a mapping uses huge pages in
amdgpu_vm_update_ptes. Why not when a mapping is destroyed completely?

I'm actually a bit surprised that the huge-page handling in
amdgpu_vm_update_ptes isn't kicking in to free up lower-level page
tables when a BO is unmapped.

Well it does free the lower level, and that is already causing problems (that's 
why I added the reserved space).

What we don't do is freeing the higher levels.

E.g. when you free a 2MB BO we free the lowest level, if we free a 1GB BO we 
free the two lowest levels etc...

The problem with freeing the higher levels is that you don't know who is also 
using this. E.g. we would need to check all entries when we unmap one.

It's simply not worth it for a maximum saving of 2MB per VM.

Writing this I'm actually wondering how you ended up in this issue? There 
shouldn't be much savings from this.

Regards,
Christian.


Regards,
   Felix


>
> Regards,
> Christian.
>
>>
>> Change-Id: Ic24e35bff8ca85265b418a642373f189d972a924
>> Signed-off-by: Eric Huang 
>> <mailto:jinhuieric.hu...@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 56
>> +-
>>   1 file changed, 48 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 0f4c3b2..8a480c7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1930,6 +1930,51 @@ static void amdgpu_vm_prt_fini(struct
>> amdgpu_device *adev, struct amdgpu_vm *vm)
>>   }
>> /**
>> + * amdgpu_vm_remove_ptes - free PT BOs
>> + *
>> + * @adev: amdgpu device structure
>> + * @vm: amdgpu vm structure
>> + * @start: start of mapped range
>> + * @end: end of mapped entry
>> + *
>> + * Free the page table level.
>> + */
>> +static int amdgpu_vm_remove_ptes(struct amdgpu_device *adev,
>> +struct amdgpu_vm *vm, uint64_t start, uint64_t end)
>> +{
>> +struct amdgpu_vm_pt_cursor cursor;
>> +unsigned shift, num_entries;
>> +
>> +amdgpu_vm_pt_start(adev, vm, start, &cursor);
>> +while (cursor.level < AMDGPU_VM_PTB) {
>> +if (!amdgpu_vm_pt_descendant(adev, &cursor))
>> +return -ENOENT;
>> +}
>> +
>> +while (cursor.pfn < end) {
>> +amdgpu_vm_free_table(cursor.entry);
>> +num_entries = amdgpu_vm_num_entries(adev, cursor.level - 1);
>> +
>> +if (cursor.entry != &cursor.parent->entries[num_entries - 1]) {
>> +/* Next ptb entry */
>> +shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
>> +cursor.pfn += 1ULL << shift;
>> +cursor.pfn &= ~((1ULL << shift) - 1);
>> +cursor.entry++;
>> +} else {
>> +/* Next ptb entry in next pd0 entry */
>> +amdgpu_vm_pt_ancestor(&cursor);
>> +shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
>> +cursor.pfn += 1ULL << shift;
>> +cursor.pfn &= ~((1ULL << shift) - 1);
>> +amdgpu_vm_pt_descendant(adev, &cursor);
>> +}
>> +

[PATCH] drm/amdgpu: remove PT BOs when unmapping

2019-10-29 Thread Huang, JinHuiEric
The issue is PT BOs are not freed when unmapping VA,
which causes vram usage accumulated is huge in some
memory stress test, such as kfd big buffer stress test.
Function amdgpu_vm_bo_update_mapping() is called by both
amdgpu_vm_bo_update() and amdgpu_vm_clear_freed(). The
solution is replacing amdgpu_vm_bo_update_mapping() in
amdgpu_vm_clear_freed() with removing PT BOs function
to save vram usage.

Change-Id: Ic24e35bff8ca85265b418a642373f189d972a924
Signed-off-by: Eric Huang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 56 +-
 1 file changed, 48 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0f4c3b2..8a480c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1930,6 +1930,51 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device 
*adev, struct amdgpu_vm *vm)
 }
 
 /**
+ * amdgpu_vm_remove_ptes - free PT BOs
+ *
+ * @adev: amdgpu device structure
+ * @vm: amdgpu vm structure
+ * @start: start of mapped range
+ * @end: end of mapped entry
+ *
+ * Free the page table level.
+ */
+static int amdgpu_vm_remove_ptes(struct amdgpu_device *adev,
+   struct amdgpu_vm *vm, uint64_t start, uint64_t end)
+{
+   struct amdgpu_vm_pt_cursor cursor;
+   unsigned shift, num_entries;
+
+   amdgpu_vm_pt_start(adev, vm, start, &cursor);
+   while (cursor.level < AMDGPU_VM_PTB) {
+   if (!amdgpu_vm_pt_descendant(adev, &cursor))
+   return -ENOENT;
+   }
+
+   while (cursor.pfn < end) {
+   amdgpu_vm_free_table(cursor.entry);
+   num_entries = amdgpu_vm_num_entries(adev, cursor.level - 1);
+
+   if (cursor.entry != &cursor.parent->entries[num_entries - 1]) {
+   /* Next ptb entry */
+   shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
+   cursor.pfn += 1ULL << shift;
+   cursor.pfn &= ~((1ULL << shift) - 1);
+   cursor.entry++;
+   } else {
+   /* Next ptb entry in next pd0 entry */
+   amdgpu_vm_pt_ancestor(&cursor);
+   shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
+   cursor.pfn += 1ULL << shift;
+   cursor.pfn &= ~((1ULL << shift) - 1);
+   amdgpu_vm_pt_descendant(adev, &cursor);
+   }
+   }
+
+   return 0;
+}
+
+/**
  * amdgpu_vm_clear_freed - clear freed BOs in the PT
  *
  * @adev: amdgpu_device pointer
@@ -1949,7 +1994,6 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
  struct dma_fence **fence)
 {
struct amdgpu_bo_va_mapping *mapping;
-   uint64_t init_pte_value = 0;
struct dma_fence *f = NULL;
int r;
 
@@ -1958,13 +2002,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping, list);
list_del(&mapping->list);
 
-   if (vm->pte_support_ats &&
-   mapping->start < AMDGPU_GMC_HOLE_START)
-   init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
+   r = amdgpu_vm_remove_ptes(adev, vm,
+   (mapping->start + 0x1ff) & (~0x1ffll),
+   (mapping->last + 1) & (~0x1ffll));
 
-   r = amdgpu_vm_bo_update_mapping(adev, vm, false, NULL,
-   mapping->start, mapping->last,
-   init_pte_value, 0, NULL, &f);
amdgpu_vm_free_mapping(adev, vm, mapping, f);
if (r) {
dma_fence_put(f);
@@ -1980,7 +2021,6 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
}
 
return 0;
-
 }
 
 /**
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: fix gfx ib test failed in sriov

2019-08-28 Thread Huang, JinHuiEric
It partially reverts the regression of

commit e4a67e6cf14c258619f
("drm/amdgpu/psp: move TMR to cpu invisible vram region")

which causes gfx ib test failed when driver loading
in sriov system.

Signed-off-by: Eric Huang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 16 
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |  1 +
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 9f7cc5b..9f91ced 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -261,9 +261,14 @@ static int psp_tmr_init(struct psp_context *psp)
}
}
 
-   ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &psp->tmr_bo, &psp->tmr_mc_addr, NULL);
+   if (amdgpu_sriov_vf(psp->adev))
+   ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
+   else
+   ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &psp->tmr_bo, &psp->tmr_mc_addr, NULL);
 
return ret;
 }
@@ -1216,7 +1221,10 @@ static int psp_hw_fini(void *handle)
 
psp_ring_destroy(psp, PSP_RING_TYPE__KM);
 
-   amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, NULL);
+   if (amdgpu_sriov_vf(adev))
+   amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, 
&psp->tmr_buf);
+   else
+   amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, NULL);
amdgpu_bo_free_kernel(&psp->fw_pri_bo,
  &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
amdgpu_bo_free_kernel(&psp->fence_buf_bo,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index bc0947f..b73d4aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -171,6 +171,7 @@ struct psp_context
/* tmr buffer */
struct amdgpu_bo*tmr_bo;
uint64_ttmr_mc_addr;
+   void*tmr_buf;
 
/* asd firmware and buffer */
const struct firmware   *asd_fw;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdkfd: fix cp hang in eviction

2019-07-11 Thread Huang, JinHuiEric
ping.

On 2019-07-10 11:20 a.m., Huang, JinHuiEric wrote:
> The cp hang occurs in OCL conformance test only on supermicro
> platform which has 40 cores and the test generates 40 threads.
> The root cause is race condition in non-protected flags.
>
> The fix is to add flags of is_evicted and is_active(init_mqd())
> into protected area.
>
> Signed-off-by: Eric Huang 
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 16 +---
>   1 file changed, 9 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 9ffdda5..f23e17b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -1157,12 +1157,7 @@ static int create_queue_cpsch(struct 
> device_queue_manager *dqm, struct queue *q,
>   
>   mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
>   q->properties.type)];
> - /*
> -  * Eviction state logic: mark all queues as evicted, even ones
> -  * not currently active. Restoring inactive queues later only
> -  * updates the is_evicted flag but is a no-op otherwise.
> -  */
> - q->properties.is_evicted = !!qpd->evicted;
> +
>   if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
>   q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
>   dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
> @@ -1173,9 +1168,16 @@ static int create_queue_cpsch(struct 
> device_queue_manager *dqm, struct queue *q,
>   retval = -ENOMEM;
>   goto out_deallocate_doorbell;
>   }
> +
> + dqm_lock(dqm);
> + /*
> +  * Eviction state logic: mark all queues as evicted, even ones
> +  * not currently active. Restoring inactive queues later only
> +  * updates the is_evicted flag but is a no-op otherwise.
> +  */
> + q->properties.is_evicted = !!qpd->evicted;
>   mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
>   &q->gart_mqd_addr, &q->properties);
> - dqm_lock(dqm);
>   
>   list_add(&q->list, &qpd->queues_list);
>   qpd->queue_count++;
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Recall: [PATCH] drm/amdkfd: fix cp hang in eviction

2019-07-10 Thread Huang, JinHuiEric
Huang, JinHuiEric would like to recall the message, "[PATCH] drm/amdkfd: fix cp 
hang in eviction".
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: fix cp hang in eviction

2019-07-10 Thread Huang, JinHuiEric
The cp hang occurs in OCL conformance test only on supermicro
platform which has 40 cores and the test generates 40 threads.
The root cause is race condition in non-protected flags.

The fix is to add flags of is_evicted and is_active(init_mqd())
into protected area.

Signed-off-by: Eric Huang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 9ffdda5..f23e17b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1157,12 +1157,7 @@ static int create_queue_cpsch(struct 
device_queue_manager *dqm, struct queue *q,
 
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
-   /*
-* Eviction state logic: mark all queues as evicted, even ones
-* not currently active. Restoring inactive queues later only
-* updates the is_evicted flag but is a no-op otherwise.
-*/
-   q->properties.is_evicted = !!qpd->evicted;
+
if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
@@ -1173,9 +1168,16 @@ static int create_queue_cpsch(struct 
device_queue_manager *dqm, struct queue *q,
retval = -ENOMEM;
goto out_deallocate_doorbell;
}
+
+   dqm_lock(dqm);
+   /*
+* Eviction state logic: mark all queues as evicted, even ones
+* not currently active. Restoring inactive queues later only
+* updates the is_evicted flag but is a no-op otherwise.
+*/
+   q->properties.is_evicted = !!qpd->evicted;
mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
-   dqm_lock(dqm);
 
list_add(&q->list, &qpd->queues_list);
qpd->queue_count++;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: fix cp hang in eviction

2019-07-10 Thread Huang, JinHuiEric
The cp hang occurs in OCL conformance test only on supermicro
platform which has 40 cores and the test generates 40 threads.
The root cause is race condition in non-protected flags.

The fix is to add flags of is_evicted and is_active(init_mqd())
into protected area.

Signed-off-by: Eric Huang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 9ffdda5..535c981 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1157,12 +1157,7 @@ static int create_queue_cpsch(struct 
device_queue_manager *dqm, struct queue *q,
 
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
-   /*
-* Eviction state logic: mark all queues as evicted, even ones
-* not currently active. Restoring inactive queues later only
-* updates the is_evicted flag but is a no-op otherwise.
-*/
-   q->properties.is_evicted = !!qpd->evicted;
+
if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
@@ -1173,9 +1168,17 @@ static int create_queue_cpsch(struct 
device_queue_manager *dqm, struct queue *q,
retval = -ENOMEM;
goto out_deallocate_doorbell;
}
+
+   dqm_lock(dqm);
+   /*
+* Eviction state logic: mark all queues as evicted, even ones
+* not currently active. Restoring inactive queues later only
+* updates the is_evicted flag but is a no-op otherwise.
+*/
+   q->properties.is_evicted = !!qpd->evicted;
+   q->properties.is_suspended = false;
mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
-   dqm_lock(dqm);
 
list_add(&q->list, &qpd->queues_list);
qpd->queue_count++;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdkfd: fix a compiling error when CONFIG_HSA_AMD disalbed

2019-03-06 Thread Huang, JinHuiEric
Oops. I submitted it moments ago. Thanks for your catch anyway.


Eric


On 2019-03-06 11:34 a.m., William Lewis wrote:

Typo in patch description:  s/disalbed/disabled/


On 3/6/19 10:11 AM, Deucher, Alexander wrote:
Add a line like:

Fixes: 9032ea09e2d2ef ("drm/amdkfd: add RAS ECC event support")

With that added:
Reviewed-by: Alex Deucher 
<mailto:alexander.deuc...@amd.com>


From: amd-gfx 
<mailto:amd-gfx-boun...@lists.freedesktop.org>
 on behalf of Huang, JinHuiEric 
<mailto:jinhuieric.hu...@amd.com>
Sent: Wednesday, March 6, 2019 10:59 AM
To: amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Cc: Huang, JinHuiEric
Subject: [PATCH] drm/amdkfd: fix a compiling error when CONFIG_HSA_AMD disalbed

It fixes a commpiling error on commit
9032ea09e2d2ef0d10e5cd793713bf2eb21643c5
drm/amdkfd: add RAS ECC event support

Change-Id: I8792767726e28eed3a3fcf9072f608701be13c79
Signed-off-by: Eric Huang 
<mailto:jinhuieric.hu...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index fe1d736..acf8ae0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -640,4 +640,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
 {
 }
+
+void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
+{
+}
 #endif
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
https://lists.freedesktop.org/mailman/listinfo/amd-gfx<https://nam02.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7C%7C2cf93d5576b848c6d66708d6a24e5eb2%7C84df9e7fe9f640afb435%7C1%7C0%7C636874854807263322&sdata=AV62VQT1o%2F6gVK1eHdzGT4vmCEJveX6C26npgyRgZtY%3D&reserved=0>



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
https://nam02.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7C%7C2cf93d5576b848c6d66708d6a24e5eb2%7C84df9e7fe9f640afb435%7C1%7C0%7C636874854807293349&sdata=7nutYNQv0XODz%2BUzQTTbIrahZI%2B6%2BG9cETOEkBnUAU4%3D&reserved=0



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: fix a compiling error when CONFIG_HSA_AMD disalbed

2019-03-06 Thread Huang, JinHuiEric
It fixes a commpiling error on commit
9032ea09e2d2ef0d10e5cd793713bf2eb21643c5
drm/amdkfd: add RAS ECC event support

Change-Id: I8792767726e28eed3a3fcf9072f608701be13c79
Signed-off-by: Eric Huang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index fe1d736..acf8ae0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -640,4 +640,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
 {
 }
+
+void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
+{
+}
 #endif
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/powerplay: print current clock level when dpm is disabled on vg20

2019-02-19 Thread Huang, JinHuiEric
Typo "difabled" -> "disabled" in description.

With that fix it is Reviewed-by: Eric Huang 

On 2019-02-19 4:09 p.m., Liu, Shaoyun wrote:
> When DPM for the specific clock is difabled, driver should still print out
> current clock info for rocm-smi support on vega20
>
> Change-Id: I8669c77bf153caa2cd63a575802eb58747151239
> Signed-off-by: shaoyunl 
> ---
>   drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 56 
> +++---
>   1 file changed, 28 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c 
> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> index aad79aff..c95e0f3 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> @@ -2641,9 +2641,8 @@ static int vega20_get_sclks(struct pp_hwmgr *hwmgr,
>   struct vega20_single_dpm_table *dpm_table = 
> &(data->dpm_table.gfx_table);
>   int i, count;
>   
> - PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_GFXCLK].enabled,
> - "[GetSclks]: gfxclk dpm not enabled!\n",
> - return -EPERM);
> + if (!data->smu_features[GNLD_DPM_GFXCLK].enabled)
> + return -1;
>   
>   count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : 
> dpm_table->count;
>   clocks->num_levels = count;
> @@ -2670,9 +2669,8 @@ static int vega20_get_memclocks(struct pp_hwmgr *hwmgr,
>   struct vega20_single_dpm_table *dpm_table = 
> &(data->dpm_table.mem_table);
>   int i, count;
>   
> - PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_UCLK].enabled,
> - "[GetMclks]: uclk dpm not enabled!\n",
> - return -EPERM);
> + if (!data->smu_features[GNLD_DPM_UCLK].enabled)
> + return -1;
>   
>   count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : 
> dpm_table->count;
>   clocks->num_levels = data->mclk_latency_table.count = count;
> @@ -2696,9 +2694,8 @@ static int vega20_get_dcefclocks(struct pp_hwmgr *hwmgr,
>   struct vega20_single_dpm_table *dpm_table = 
> &(data->dpm_table.dcef_table);
>   int i, count;
>   
> - PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_DCEFCLK].enabled,
> - "[GetDcfclocks]: dcefclk dpm not enabled!\n",
> - return -EPERM);
> + if (!data->smu_features[GNLD_DPM_DCEFCLK].enabled)
> + return -1;
>   
>   count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : 
> dpm_table->count;
>   clocks->num_levels = count;
> @@ -2719,9 +2716,8 @@ static int vega20_get_socclocks(struct pp_hwmgr *hwmgr,
>   struct vega20_single_dpm_table *dpm_table = 
> &(data->dpm_table.soc_table);
>   int i, count;
>   
> - PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_SOCCLK].enabled,
> - "[GetSocclks]: socclk dpm not enabled!\n",
> - return -EPERM);
> + if (!data->smu_features[GNLD_DPM_SOCCLK].enabled)
> + return -1;
>   
>   count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : 
> dpm_table->count;
>   clocks->num_levels = count;
> @@ -3137,10 +3133,11 @@ static int vega20_print_clock_levels(struct pp_hwmgr 
> *hwmgr,
>   "Attempt to get current gfx clk Failed!",
>   return ret);
>   
> - ret = vega20_get_sclks(hwmgr, &clocks);
> - PP_ASSERT_WITH_CODE(!ret,
> - "Attempt to get gfx clk levels Failed!",
> - return ret);
> + if (vega20_get_sclks(hwmgr, &clocks)) {
> + size += sprintf(buf + size, "0: %uMhz * (DPM 
> disabled)\n",
> + now / 100);
> + break;
> + }
>   
>   for (i = 0; i < clocks.num_levels; i++)
>   size += sprintf(buf + size, "%d: %uMhz %s\n",
> @@ -3154,10 +3151,11 @@ static int vega20_print_clock_levels(struct pp_hwmgr 
> *hwmgr,
>   "Attempt to get current mclk freq Failed!",
>   return ret);
>   
> - ret = vega20_get_memclocks(hwmgr, &clocks);
> - PP_ASSERT_WITH_CODE(!ret,
> - "Attempt to get memory clk levels Failed!",
> - return ret);
> + if (vega20_get_memclocks(hwmgr, &clocks)) {
> + size += sprintf(buf + size, "0: %uMhz * (DPM 
> disabled)\n",
> + now / 100);
> + break;
> + }
>   
>   for (i = 0; i < clocks.num_levels; i++)
>   size += sprintf(buf + size, "%d: %uMhz %s\n",
> @@ -3171,10 +3169,11 @@ static int vega20_print_clock_levels(struct pp_hwmgr 
> *hwmgr,
>   "Attempt to get current socclk freq Failed!",
>   return ret);
>   
> - ret = vega20_get_socclocks(hwmgr, &clocks);
> - PP_ASS

Re: [PATCH] drm/powerplay: Get fix clock info when dpm is disabled for the clock

2019-02-19 Thread Huang, JinHuiEric
"rocm-smi -s" doesn't parse output from driver and it directly display 
output, so it will not break rocm-smi.

Regards,

Eric

On 2019-02-19 2:51 p.m., Kuehling, Felix wrote:
> [+Kent]
>
> On 2019-02-19 12:08 p.m., Huang, JinHuiEric wrote:
>
>> It seems redundant to print out all DPM levels when DPM is disabled.
>> Probably it looks meaningful to print out "Mhz (DPM
>> disabled)".
> If you change the format, you risk breaking tools such as rocm-smi. Is
> there some other way for rocm-smi to tell that DPM is disabled?
>
> Regards,
>     Felix
>
>
>> Regards,
>>
>> Eric
>>
>> On 2019-02-15 4:25 p.m., Liu, Shaoyun wrote:
>>> When DPM for the specific clock is difabled, driver should still able to
>>> get fix clock info from the pptable
>>>
>>> Change-Id: Ic609203b3b87aa75b0cfd57b57717b3bb89daf48
>>> Signed-off-by: shaoyunl 
>>> ---
>>> drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 16 
>>> 1 file changed, 16 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c 
>>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
>>> index aad79aff..2eae0b4 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
>>> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
>>> @@ -2641,10 +2641,6 @@ static int vega20_get_sclks(struct pp_hwmgr *hwmgr,
>>> struct vega20_single_dpm_table *dpm_table = 
>>> &(data->dpm_table.gfx_table);
>>> int i, count;
>>> 
>>> -   PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_GFXCLK].enabled,
>>> -   "[GetSclks]: gfxclk dpm not enabled!\n",
>>> -   return -EPERM);
>>> -
>>> count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : 
>>> dpm_table->count;
>>> clocks->num_levels = count;
>>> 
>>> @@ -2670,10 +2666,6 @@ static int vega20_get_memclocks(struct pp_hwmgr 
>>> *hwmgr,
>>> struct vega20_single_dpm_table *dpm_table = 
>>> &(data->dpm_table.mem_table);
>>> int i, count;
>>> 
>>> -   PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_UCLK].enabled,
>>> -   "[GetMclks]: uclk dpm not enabled!\n",
>>> -   return -EPERM);
>>> -
>>> count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : 
>>> dpm_table->count;
>>> clocks->num_levels = data->mclk_latency_table.count = count;
>>> 
>>> @@ -2696,10 +2688,6 @@ static int vega20_get_dcefclocks(struct pp_hwmgr 
>>> *hwmgr,
>>> struct vega20_single_dpm_table *dpm_table = 
>>> &(data->dpm_table.dcef_table);
>>> int i, count;
>>> 
>>> -   PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_DCEFCLK].enabled,
>>> -   "[GetDcfclocks]: dcefclk dpm not enabled!\n",
>>> -   return -EPERM);
>>> -
>>> count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : 
>>> dpm_table->count;
>>> clocks->num_levels = count;
>>> 
>>> @@ -2719,10 +2707,6 @@ static int vega20_get_socclocks(struct pp_hwmgr 
>>> *hwmgr,
>>> struct vega20_single_dpm_table *dpm_table = 
>>> &(data->dpm_table.soc_table);
>>> int i, count;
>>> 
>>> -   PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_SOCCLK].enabled,
>>> -   "[GetSocclks]: socclk dpm not enabled!\n",
>>> -   return -EPERM);
>>> -
>>> count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : 
>>> dpm_table->count;
>>> clocks->num_levels = count;
>>> 
>> ___
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/powerplay: Get fix clock info when dpm is disabled for the clock

2019-02-19 Thread Huang, JinHuiEric
It seems redundant to print out all DPM levels when DPM is disabled. 
Probably it looks meaningful to print out "Mhz (DPM 
disabled)".

Regards,

Eric

On 2019-02-15 4:25 p.m., Liu, Shaoyun wrote:
> When DPM for the specific clock is difabled, driver should still able to
> get fix clock info from the pptable
>
> Change-Id: Ic609203b3b87aa75b0cfd57b57717b3bb89daf48
> Signed-off-by: shaoyunl 
> ---
>   drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 16 
>   1 file changed, 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c 
> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> index aad79aff..2eae0b4 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> @@ -2641,10 +2641,6 @@ static int vega20_get_sclks(struct pp_hwmgr *hwmgr,
>   struct vega20_single_dpm_table *dpm_table = 
> &(data->dpm_table.gfx_table);
>   int i, count;
>   
> - PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_GFXCLK].enabled,
> - "[GetSclks]: gfxclk dpm not enabled!\n",
> - return -EPERM);
> -
>   count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : 
> dpm_table->count;
>   clocks->num_levels = count;
>   
> @@ -2670,10 +2666,6 @@ static int vega20_get_memclocks(struct pp_hwmgr *hwmgr,
>   struct vega20_single_dpm_table *dpm_table = 
> &(data->dpm_table.mem_table);
>   int i, count;
>   
> - PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_UCLK].enabled,
> - "[GetMclks]: uclk dpm not enabled!\n",
> - return -EPERM);
> -
>   count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : 
> dpm_table->count;
>   clocks->num_levels = data->mclk_latency_table.count = count;
>   
> @@ -2696,10 +2688,6 @@ static int vega20_get_dcefclocks(struct pp_hwmgr 
> *hwmgr,
>   struct vega20_single_dpm_table *dpm_table = 
> &(data->dpm_table.dcef_table);
>   int i, count;
>   
> - PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_DCEFCLK].enabled,
> - "[GetDcfclocks]: dcefclk dpm not enabled!\n",
> - return -EPERM);
> -
>   count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : 
> dpm_table->count;
>   clocks->num_levels = count;
>   
> @@ -2719,10 +2707,6 @@ static int vega20_get_socclocks(struct pp_hwmgr *hwmgr,
>   struct vega20_single_dpm_table *dpm_table = 
> &(data->dpm_table.soc_table);
>   int i, count;
>   
> - PP_ASSERT_WITH_CODE(data->smu_features[GNLD_DPM_SOCCLK].enabled,
> - "[GetSocclks]: socclk dpm not enabled!\n",
> - return -EPERM);
> -
>   count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : 
> dpm_table->count;
>   clocks->num_levels = count;
>   
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v2 2/2] drm/amd/powerplay: add override pcie parameters for Vega20 (v2)

2019-02-06 Thread Huang, JinHuiEric
Reviewed-by: Eric Huang 

On 2019-02-05 4:37 p.m., Kasiviswanathan, Harish wrote:
> v2: Fix SMU message format
>  Send override message after SMU enable features
>
> Change-Id: Ib880c83bc7aa12be370cf6619acfe66e12664c9c
> Signed-off-by: Harish Kasiviswanathan 
> ---
>   drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 45 
> +-
>   1 file changed, 27 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c 
> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> index da022ca..e1b1656 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> @@ -771,40 +771,49 @@ static int vega20_init_smc_table(struct pp_hwmgr *hwmgr)
>   return 0;
>   }
>   
> +/*
> + * Override PCIe link speed and link width for DPM Level 1. PPTable entries
> + * reflect the ASIC capabilities and not the system capabilities. For e.g.
> + * Vega20 board in a PCI Gen3 system. In this case, when SMU's tries to 
> switch
> + * to DPM1, it fails as system doesn't support Gen4.
> + */
>   static int vega20_override_pcie_parameters(struct pp_hwmgr *hwmgr)
>   {
>   struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
> - uint32_t pcie_speed = 0, pcie_width = 0, pcie_arg;
> + uint32_t pcie_gen = 0, pcie_width = 0, smu_pcie_arg;
>   int ret;
>   
>   if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4)
> - pcie_speed = 16;
> + pcie_gen = 3;
>   else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
> - pcie_speed = 8;
> + pcie_gen = 2;
>   else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
> - pcie_speed = 5;
> + pcie_gen = 1;
>   else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1)
> - pcie_speed = 2;
> + pcie_gen = 0;
>   
>   if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X32)
> - pcie_width = 32;
> + pcie_width = 7;
>   else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16)
> - pcie_width = 16;
> + pcie_width = 6;
>   else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12)
> - pcie_width = 12;
> + pcie_width = 5;
>   else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8)
> - pcie_width = 8;
> - else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4)
>   pcie_width = 4;
> + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4)
> + pcie_width = 3;
>   else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2)
>   pcie_width = 2;
>   else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1)
>   pcie_width = 1;
>   
> - pcie_arg = pcie_width | (pcie_speed << 8);
> -
> + /* Bit 31:16: LCLK DPM level. 0 is DPM0, and 1 is DPM1
> +  * Bit 15:8:  PCIE GEN, 0 to 3 corresponds to GEN1 to GEN4
> +  * Bit 7:0:   PCIE lane width, 1 to 7 corresponds is x1 to x32
> +  */
> + smu_pcie_arg = (1 << 16) | (pcie_gen << 8) | pcie_width;
>   ret = smum_send_msg_to_smc_with_parameter(hwmgr,
> - PPSMC_MSG_OverridePcieParameters, pcie_arg);
> + PPSMC_MSG_OverridePcieParameters, smu_pcie_arg);
>   PP_ASSERT_WITH_CODE(!ret,
>   "[OverridePcieParameters] Attempt to override pcie params 
> failed!",
>   return ret);
> @@ -1611,11 +1620,6 @@ static int vega20_enable_dpm_tasks(struct pp_hwmgr 
> *hwmgr)
>   "[EnableDPMTasks] Failed to initialize SMC table!",
>   return result);
>   
> - result = vega20_override_pcie_parameters(hwmgr);
> - PP_ASSERT_WITH_CODE(!result,
> - "[EnableDPMTasks] Failed to override pcie parameters!",
> - return result);
> -
>   result = vega20_run_btc(hwmgr);
>   PP_ASSERT_WITH_CODE(!result,
>   "[EnableDPMTasks] Failed to run btc!",
> @@ -1631,6 +1635,11 @@ static int vega20_enable_dpm_tasks(struct pp_hwmgr 
> *hwmgr)
>   "[EnableDPMTasks] Failed to enable all smu features!",
>   return result);
>   
> + result = vega20_override_pcie_parameters(hwmgr);
> + PP_ASSERT_WITH_CODE(!result,
> + "[EnableDPMTasks] Failed to override pcie parameters!",
> + return result);
> +
>   result = vega20_notify_smc_display_change(hwmgr);
>   PP_ASSERT_WITH_CODE(!result,
>   "[EnableDPMTasks] Failed to notify smc display change!",
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/powerplay: add override pcie parameters for Vega20

2019-01-25 Thread Huang, JinHuiEric
It is to solve RDMA performance issue.

Change-Id: I441d2943e504e2ef7d33de0e8773a0f9b8fdb2ca
Signed-off-by: Eric Huang 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 46 ++
 1 file changed, 46 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 5085b36..7e59bc8 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -771,6 +771,47 @@ static int vega20_init_smc_table(struct pp_hwmgr *hwmgr)
return 0;
 }
 
+static int vega20_override_pcie_parameters(struct pp_hwmgr *hwmgr)
+{
+   struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
+   uint32_t pcie_speed = 0, pcie_width = 0, pcie_arg;
+   int ret;
+
+   if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4)
+   pcie_speed = 16;
+   else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
+   pcie_speed = 8;
+   else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
+   pcie_speed = 5;
+   else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1)
+   pcie_speed = 2;
+
+   if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X32)
+   pcie_width = 32;
+   else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16)
+   pcie_width = 16;
+   else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12)
+   pcie_width = 12;
+   else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8)
+   pcie_width = 8;
+   else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4)
+   pcie_width = 4;
+   else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2)
+   pcie_width = 2;
+   else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1)
+   pcie_width = 1;
+
+   pcie_arg = pcie_width | (pcie_speed << 8);
+
+   ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+   PPSMC_MSG_OverridePcieParameters, pcie_arg);
+   PP_ASSERT_WITH_CODE(!ret,
+   "[OverridePcieParameters] Attempt to override pcie params 
failed!",
+   return ret);
+
+   return 0;
+}
+
 static int vega20_set_allowed_featuresmask(struct pp_hwmgr *hwmgr)
 {
struct vega20_hwmgr *data =
@@ -1570,6 +1611,11 @@ static int vega20_enable_dpm_tasks(struct pp_hwmgr 
*hwmgr)
"[EnableDPMTasks] Failed to initialize SMC table!",
return result);
 
+   result = vega20_override_pcie_parameters(hwmgr);
+   PP_ASSERT_WITH_CODE(!result,
+   "[EnableDPMTasks] Failed to override pcie parameters!",
+   return result);
+
result = vega20_run_btc(hwmgr);
PP_ASSERT_WITH_CODE(!result,
"[EnableDPMTasks] Failed to run btc!",
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx