Re: [PATCH 2/3] drm/amdgpu: support userptr cross VMAs case with HMM v3

2019-03-12 Thread Kuehling, Felix
This patch is Reviewed-by: Felix Kuehling 

Regards,
   Felix

On 3/12/2019 9:17 PM, Yang, Philip wrote:
> userptr may cross two VMAs if the forked child process (not call exec
> after fork) malloc buffer, then free it, and then malloc larger size
> buf, kerenl will create new VMA adjacent to old VMA which was cloned
> from parent process, some pages of userptr are in the first VMA, the
> rest pages are in the second VMA.
>
> HMM expects range only have one VMA, loop over all VMAs in the address
> range, create multiple ranges to handle this case. See
> is_mergeable_anon_vma in mm/mmap.c for details.
>
> Change-Id: I0ca8c77e28deabccc139906f9ffee04b7e383314
> Signed-off-by: Philip Yang 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 126 +---
>   1 file changed, 91 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index c1240bf243ba..c14198737dcd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -711,7 +711,8 @@ struct amdgpu_ttm_tt {
>   struct task_struct  *usertask;
>   uint32_tuserflags;
>   #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
> - struct hmm_rangerange;
> + struct hmm_range*ranges;
> + int nr_ranges;
>   #endif
>   };
>   
> @@ -723,62 +724,108 @@ struct amdgpu_ttm_tt {
>* once afterwards to stop HMM tracking
>*/
>   #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
> +
> +/* Support Userptr pages cross max 16 vmas */
> +#define MAX_NR_VMAS  (16)
> +
>   int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>   {
>   struct amdgpu_ttm_tt *gtt = (void *)ttm;
>   struct mm_struct *mm = gtt->usertask->mm;
> - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
> - struct hmm_range *range = >range;
> - int r = 0, i;
> + unsigned long start = gtt->userptr;
> + unsigned long end = start + ttm->num_pages * PAGE_SIZE;
> + struct hmm_range *ranges;
> + struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
> + uint64_t *pfns, f;
> + int r = 0, i, nr_pages;
>   
>   if (!mm) /* Happens during process shutdown */
>   return -ESRCH;
>   
> - amdgpu_hmm_init_range(range);
> -
>   down_read(>mmap_sem);
>   
> - range->vma = find_vma(mm, gtt->userptr);
> - if (!range_in_vma(range->vma, gtt->userptr, end))
> - r = -EFAULT;
> - else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
> - range->vma->vm_file)
> + /* user pages may cross multiple VMAs */
> + gtt->nr_ranges = 0;
> + do {
> + unsigned long vm_start;
> +
> + if (gtt->nr_ranges >= MAX_NR_VMAS) {
> + DRM_ERROR("Too many VMAs in userptr range\n");
> + r = -EFAULT;
> + goto out;
> + }
> +
> + vm_start = vma ? vma->vm_end : start;
> + vma = find_vma(mm, vm_start);
> + if (unlikely(!vma || vm_start < vma->vm_start)) {
> + r = -EFAULT;
> + goto out;
> + }
> + vmas[gtt->nr_ranges++] = vma;
> + } while (end > vma->vm_end);
> +
> + DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
> + start, gtt->nr_ranges, ttm->num_pages);
> +
> + if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
> + vmas[0]->vm_file)) {
>   r = -EPERM;
> - if (r)
>   goto out;
> + }
>   
> - range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
> -  GFP_KERNEL);
> - if (range->pfns == NULL) {
> + ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
> + if (unlikely(!ranges)) {
>   r = -ENOMEM;
>   goto out;
>   }
> - range->start = gtt->userptr;
> - range->end = end;
>   
> - range->pfns[0] = range->flags[HMM_PFN_VALID];
> - range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ?
> - 0 : range->flags[HMM_PFN_WRITE];
> - for (i = 1; i < ttm->num_pages; i++)
> - range->pfns[i] = range->pfns[0];
> + pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
> + if (unlikely(!pfns)) {
> + r = -ENOMEM;
> + goto out_free_ranges;
> + }
> +
> + for (i = 0; i < gtt->nr_ranges; i++)
> + amdgpu_hmm_init_range([i]);
> +
> + f = ranges[0].flags[HMM_PFN_VALID];
> + f |= amdgpu_ttm_tt_is_readonly(ttm) ?
> + 0 : ranges[0].flags[HMM_PFN_WRITE];
> + memset64(pfns, f, ttm->num_pages);
> +
> + for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) {
> + ranges[i].vma = vmas[i];
> + ranges[i].start = max(start, vmas[i]->vm_start);
> + ranges[i].end = min(end, 

Re: [PATCH 2/3] drm/amdgpu: support userptr cross VMAs case with HMM v2

2019-03-12 Thread Yang, Philip
Hi Felix,

Submitted v3 to fix the potential problems with invalid userptr.

Philip

On 2019-03-12 3:30 p.m., Kuehling, Felix wrote:
> See one comment inline. There are still some potential problems that
> you're not catching.
> 
> On 2019-03-06 9:42 p.m., Yang, Philip wrote:
>> userptr may cross two VMAs if the forked child process (not call exec
>> after fork) malloc buffer, then free it, and then malloc larger size
>> buf, kerenl will create new VMA adjacent to old VMA which was cloned
>> from parent process, some pages of userptr are in the first VMA, the
>> rest pages are in the second VMA.
>>
>> HMM expects range only have one VMA, loop over all VMAs in the address
>> range, create multiple ranges to handle this case. See
>> is_mergeable_anon_vma in mm/mmap.c for details.
>>
>> Change-Id: I0ca8c77e28deabccc139906f9ffee04b7e383314
>> Signed-off-by: Philip Yang 
>> ---
>>drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 123 +---
>>1 file changed, 88 insertions(+), 35 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 7cc0ba24369d..802bec7ef917 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -711,7 +711,8 @@ struct amdgpu_ttm_tt {
>>  struct task_struct  *usertask;
>>  uint32_tuserflags;
>>#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
>> -struct hmm_rangerange;
>> +struct hmm_range*ranges;
>> +int nr_ranges;
>>#endif
>>};
>>
>> @@ -723,62 +724,105 @@ struct amdgpu_ttm_tt {
>> * once afterwards to stop HMM tracking
>> */
>>#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
>> +
>> +/* Support Userptr pages cross max 16 vmas */
>> +#define MAX_NR_VMAS (16)
>> +
>>int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>>{
>>  struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>  struct mm_struct *mm = gtt->usertask->mm;
>> -unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
>> -struct hmm_range *range = >range;
>> -int r = 0, i;
>> +unsigned long start = gtt->userptr;
>> +unsigned long end = start + ttm->num_pages * PAGE_SIZE;
>> +struct hmm_range *ranges;
>> +struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
>> +uint64_t *pfns, f;
>> +int r = 0, i, nr_pages;
>>
>>  if (!mm) /* Happens during process shutdown */
>>  return -ESRCH;
>>
>> -amdgpu_hmm_init_range(range);
>> -
>>  down_read(>mmap_sem);
>>
>> -range->vma = find_vma(mm, gtt->userptr);
>> -if (!range_in_vma(range->vma, gtt->userptr, end))
>> -r = -EFAULT;
>> -else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
>> -range->vma->vm_file)
>> +/* user pages may cross multiple VMAs */
>> +gtt->nr_ranges = 0;
>> +do {
>> +if (gtt->nr_ranges >= MAX_NR_VMAS) {
>> +DRM_ERROR("Too many VMAs in userptr range\n");
>> +r = -EFAULT;
>> +goto out;
>> +}
>> +
>> +vma = find_vma(mm, vma ? vma->vm_end : start);
> 
> You need a check here that vma->vm_start <= the requested start address.
> Otherwise you can end up with gaps in your userptr mapping that don't
> have valid pages.
> 
> Regards,
>     Felix
> 
> 
>> +if (unlikely(!vma)) {
>> +r = -EFAULT;
>> +goto out;
>> +}
>> +vmas[gtt->nr_ranges++] = vma;
>> +} while (end > vma->vm_end);+
>> +DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
>> +start, gtt->nr_ranges, ttm->num_pages);
>> +
>> +if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
>> +vmas[0]->vm_file)) {
>>  r = -EPERM;
>> -if (r)
>>  goto out;
>> +}
>>
>> -range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
>> - GFP_KERNEL);
>> -if (range->pfns == NULL) {
>> +ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
>> +if (unlikely(!ranges)) {
>>  r = -ENOMEM;
>>  goto out;
>>  }
>> -range->start = gtt->userptr;
>> -range->end = end;
>>
>> -range->pfns[0] = range->flags[HMM_PFN_VALID];
>> -range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ?
>> -0 : range->flags[HMM_PFN_WRITE];
>> -for (i = 1; i < ttm->num_pages; i++)
>> -range->pfns[i] = range->pfns[0];
>> +pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
>> +if (unlikely(!pfns)) {
>> +r = -ENOMEM;
>> +goto out_free_ranges;
>> +}
>> +
>> +for (i = 0; i < gtt->nr_ranges; i++)
>> +amdgpu_hmm_init_range([i]);
>> +
>> +f = ranges[0].flags[HMM_PFN_VALID];
>> +f |= amdgpu_ttm_tt_is_readonly(ttm) ?
>> + 

[PATCH 2/3] drm/amdgpu: support userptr cross VMAs case with HMM v3

2019-03-12 Thread Yang, Philip
userptr may cross two VMAs if the forked child process (not call exec
after fork) malloc buffer, then free it, and then malloc larger size
buf, kerenl will create new VMA adjacent to old VMA which was cloned
from parent process, some pages of userptr are in the first VMA, the
rest pages are in the second VMA.

HMM expects range only have one VMA, loop over all VMAs in the address
range, create multiple ranges to handle this case. See
is_mergeable_anon_vma in mm/mmap.c for details.

Change-Id: I0ca8c77e28deabccc139906f9ffee04b7e383314
Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 126 +---
 1 file changed, 91 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c1240bf243ba..c14198737dcd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -711,7 +711,8 @@ struct amdgpu_ttm_tt {
struct task_struct  *usertask;
uint32_tuserflags;
 #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
-   struct hmm_rangerange;
+   struct hmm_range*ranges;
+   int nr_ranges;
 #endif
 };
 
@@ -723,62 +724,108 @@ struct amdgpu_ttm_tt {
  * once afterwards to stop HMM tracking
  */
 #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
+
+/* Support Userptr pages cross max 16 vmas */
+#define MAX_NR_VMAS(16)
+
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
struct amdgpu_ttm_tt *gtt = (void *)ttm;
struct mm_struct *mm = gtt->usertask->mm;
-   unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
-   struct hmm_range *range = >range;
-   int r = 0, i;
+   unsigned long start = gtt->userptr;
+   unsigned long end = start + ttm->num_pages * PAGE_SIZE;
+   struct hmm_range *ranges;
+   struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
+   uint64_t *pfns, f;
+   int r = 0, i, nr_pages;
 
if (!mm) /* Happens during process shutdown */
return -ESRCH;
 
-   amdgpu_hmm_init_range(range);
-
down_read(>mmap_sem);
 
-   range->vma = find_vma(mm, gtt->userptr);
-   if (!range_in_vma(range->vma, gtt->userptr, end))
-   r = -EFAULT;
-   else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
-   range->vma->vm_file)
+   /* user pages may cross multiple VMAs */
+   gtt->nr_ranges = 0;
+   do {
+   unsigned long vm_start;
+
+   if (gtt->nr_ranges >= MAX_NR_VMAS) {
+   DRM_ERROR("Too many VMAs in userptr range\n");
+   r = -EFAULT;
+   goto out;
+   }
+
+   vm_start = vma ? vma->vm_end : start;
+   vma = find_vma(mm, vm_start);
+   if (unlikely(!vma || vm_start < vma->vm_start)) {
+   r = -EFAULT;
+   goto out;
+   }
+   vmas[gtt->nr_ranges++] = vma;
+   } while (end > vma->vm_end);
+
+   DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
+   start, gtt->nr_ranges, ttm->num_pages);
+
+   if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
+   vmas[0]->vm_file)) {
r = -EPERM;
-   if (r)
goto out;
+   }
 
-   range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
-GFP_KERNEL);
-   if (range->pfns == NULL) {
+   ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
+   if (unlikely(!ranges)) {
r = -ENOMEM;
goto out;
}
-   range->start = gtt->userptr;
-   range->end = end;
 
-   range->pfns[0] = range->flags[HMM_PFN_VALID];
-   range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ?
-   0 : range->flags[HMM_PFN_WRITE];
-   for (i = 1; i < ttm->num_pages; i++)
-   range->pfns[i] = range->pfns[0];
+   pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
+   if (unlikely(!pfns)) {
+   r = -ENOMEM;
+   goto out_free_ranges;
+   }
+
+   for (i = 0; i < gtt->nr_ranges; i++)
+   amdgpu_hmm_init_range([i]);
+
+   f = ranges[0].flags[HMM_PFN_VALID];
+   f |= amdgpu_ttm_tt_is_readonly(ttm) ?
+   0 : ranges[0].flags[HMM_PFN_WRITE];
+   memset64(pfns, f, ttm->num_pages);
+
+   for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) {
+   ranges[i].vma = vmas[i];
+   ranges[i].start = max(start, vmas[i]->vm_start);
+   ranges[i].end = min(end, vmas[i]->vm_end);
+   ranges[i].pfns = pfns + nr_pages;
+   nr_pages += (ranges[i].end - ranges[i].start) / PAGE_SIZE;
+
+   r = hmm_vma_fault([i], true);
+   if 

Re: [PATCH 1/1] drm/amdgpu: Wait for newly allocated PTs to be idle

2019-03-12 Thread Kuehling, Felix
When we use SDMA, we don't wait on the CPU. The GPU scheduler waits for 
the fences on the root PD reservation before executing the SDMA IB. 
amdgpu_vm_bo_update_mapping gets those fences and builds the sync object 
for the scheduler after all the page tables have been allocated, so it 
should be no problem.

Regards,
   Felix

On 2019-03-12 6:13 p.m., Liu, Shaoyun wrote:
> Hi,
>
> I think even use SDMA to update PTE we may still need to wait the clear
> job to be completed if we can not guarantee the clear and set PTE job
> will use the exact same SDMA engine ( Did we use a dedicate SDMA engine
> for PTE update including clear? ).  But if we didn't use the  same
> engine , it may explain why the  test failed occasionally.
>
> Regards
>
> shaoyun.liu
>
>
>
> On 2019-03-12 5:20 p.m., Kuehling, Felix wrote:
>> When page table are updated by the CPU, synchronize with the
>> allocation and initialization of newly allocated page tables.
>>
>> Signed-off-by: Felix Kuehling 
>> ---
>>drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 20 +---
>>1 file changed, 13 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 8603c85..4303436 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -899,17 +899,17 @@ static void amdgpu_vm_bo_param(struct amdgpu_device 
>> *adev, struct amdgpu_vm *vm,
>>}
>>
>>/**
>> - * amdgpu_vm_alloc_pts - Allocate page tables.
>> + * amdgpu_vm_alloc_pts - Allocate a specific page table
>> *
>> * @adev: amdgpu_device pointer
>> * @vm: VM to allocate page tables for
>> - * @saddr: Start address which needs to be allocated
>> - * @size: Size from start address we need.
>> + * @cursor: Which page table to allocate
>> *
>> - * Make sure the page directories and page tables are allocated
>> + * Make sure a specific page table or directory is allocated.
>> *
>> * Returns:
>> - * 0 on success, errno otherwise.
>> + * 1 if page table needed to be allocated, 0 if page table was already
>> + * allocated, negative errno if an error occurred.
>> */
>>static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>> struct amdgpu_vm *vm,
>> @@ -956,7 +956,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device 
>> *adev,
>>  if (r)
>>  goto error_free_pt;
>>
>> -return 0;
>> +return 1;
>>
>>error_free_pt:
>>  amdgpu_bo_unref(>shadow);
>> @@ -1621,10 +1621,12 @@ static int amdgpu_vm_update_ptes(struct 
>> amdgpu_pte_update_params *params,
>>  unsigned shift, parent_shift, mask;
>>  uint64_t incr, entry_end, pe_start;
>>  struct amdgpu_bo *pt;
>> +bool need_to_sync;
>>
>>  r = amdgpu_vm_alloc_pts(params->adev, params->vm, );
>> -if (r)
>> +if (r < 0)
>>  return r;
>> +need_to_sync = (r && params->vm->use_cpu_for_update);
>>
>>  pt = cursor.entry->base.bo;
>>
>> @@ -1672,6 +1674,10 @@ static int amdgpu_vm_update_ptes(struct 
>> amdgpu_pte_update_params *params,
>>  entry_end += cursor.pfn & ~(entry_end - 1);
>>  entry_end = min(entry_end, end);
>>
>> +if (need_to_sync)
>> +r = amdgpu_bo_sync_wait(params->vm->root.base.bo,
>> +AMDGPU_FENCE_OWNER_VM, true);
>> +
>>  do {
>>  uint64_t upd_end = min(entry_end, frag_end);
>>  unsigned nptes = (upd_end - frag_start) >> shift;
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand

2019-03-12 Thread Kuehling, Felix
Never mind. I must have messed up my build. I can't reproduce the 
problem any more. The patch I sent out is still needed and valid. AFAICT 
it should be all that's needed to fix GPUVM for KFD.

I have not seen any faults with KFDCWSRTest.BasicTest on my system with 
Fiji or Vega10 with that patch applied.

Regards,
   Felix

On 2019-03-12 5:19 p.m., Felix Kuehling wrote:
> I'm also still seeing VM faults in the eviction test even with my fix, 
> and even with SDMA page table updates. There is still something else 
> going wrong. :/
>
> Thanks,
>   Felix
>
> On 2019-03-12 5:13 p.m., Yang, Philip wrote:
>> vm fault happens about 1/10 for KFDCWSRTest.BasicTest for me. I am using
>> SDMA for page table update. I don't try CPU page table update.
>>
>> Philip
>>
>> On 2019-03-12 11:12 a.m., Russell, Kent wrote:
>>> Peculiar, I hit it immediately when I ran it . Can you try use 
>>> --gtest_filter=KFDCWSRTest.BasicTest . That one hung every time for me.
>>>
>>>    Kent
>>>
 -Original Message-
 From: Christian König 
 Sent: Tuesday, March 12, 2019 11:09 AM
 To: Russell, Kent ; Koenig, Christian
 ; Kuehling, Felix ;
 amd-gfx@lists.freedesktop.org
 Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand

 Yeah, same problem here.

 I removed libhsakmt package and installed it manually and now it 
 seems to
 work.

 Doing some testing now, but at least of hand I can't seem to 
 reproduce the
 VM fault on a Vega10.

 Christian.

 Am 12.03.19 um 16:01 schrieb Russell, Kent:
> Oh right, I remember that issue. I had that happen to me once, 
> where my
 installed libhsakmt didn't match up with the latest source code, so 
 I ended up
 having to remove the libhsakmt package and pointing it to the folders
 instead.
>     Kent
>
>> -Original Message-
>> From: Koenig, Christian
>> Sent: Tuesday, March 12, 2019 10:49 AM
>> To: Russell, Kent ; Kuehling, Felix
>> ; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>
>> Yeah, the problem is I do have the libhsakmt installed.
>>
>> Going to give it a try to specify the directory directly.
>>
>> Christian.
>>
>> Am 12.03.19 um 15:47 schrieb Russell, Kent:
>>> The README.txt file inside the tests/kfdtest folder has 
>>> instructions
>>> on how
>> to do it if you don't have the libhsakmt package installed on 
>> your system:
>>> export LIBHSAKMT_PATH=/*your local libhsakmt folder*/ With that, 
>>> the
>>> headers and libraries are searched under LIBHSAKMT_PATH/include and
>>> LIBHSAKMT_PATH/lib respectively.
>>>
>>> So if you try export LIBHSAKMT_PATH as the root ROCT folder (the 
>>> one
>> containing include, src, tests, etc), then that should cover it.
>>>  Kent
>>>
>>>
 -Original Message-
 From: Christian König 
 Sent: Tuesday, March 12, 2019 9:13 AM
 To: Russell, Kent ; Kuehling, Felix
 ; Koenig, Christian
 ; amd-gfx@lists.freedesktop.org
 Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
 demand
 Hi guys,

 so found a few minutes today to compile kfdtest.

 Problem is that during the compile I get a lots of this:
> CMakeFiles/kfdtest.dir/src/BaseQueue.cpp.o: In Funktion
> »BaseQueue::Create(unsigned int, unsigned int, unsigned long*)«:
> /usr/src/ROCT-Thunk-Interface/tests/kfdtest/src/BaseQueue.cpp:57:
> Warnung: undefinierter Verweis auf »hsaKmtCreateQueue«
 Any idea?

 Christian.

 Am 11.03.19 um 17:55 schrieb Christian König:
> Hi guys,
>
> well it's most likely some missing handling in the KFD, so I'm
> rather reluctant to revert the change immediately.
>
> Problem is that I don't have time right now to look into it
> immediately. So Kent can you continue to take a look?
>
> Sounds like its crashing immediately, so it should be something
 obvious.
> Christian.
>
> Am 11.03.19 um 10:49 schrieb Russell, Kent:
>>      From what I've been able to dig through, the VM Fault 
>> seems to
>> occur right after a doorbell mmap, but that's as far as I got. I
>> can try to revert it in today's merge and see how things go.
>>
>>       Kent
>>
>>> -Original Message-
>>> From: Kuehling, Felix
>>> Sent: Friday, March 08, 2019 11:16 PM
>>> To: Koenig, Christian ; Russell, Kent
>>> ; amd-gfx@lists.freedesktop.org
>>> Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
>> demand
>>> My concerns were related to eviction fence handing. It would
>>> manifest by 

Re: [PATCH 1/1] drm/amdgpu: Wait for newly allocated PTs to be idle

2019-03-12 Thread Liu, Shaoyun
Hi,

I think even use SDMA to update PTE we may still need to wait the clear 
job to be completed if we can not guarantee the clear and set PTE job 
will use the exact same SDMA engine ( Did we use a dedicate SDMA engine 
for PTE update including clear? ).  But if we didn't use the  same 
engine , it may explain why the  test failed occasionally.

Regards

shaoyun.liu



On 2019-03-12 5:20 p.m., Kuehling, Felix wrote:
> When page table are updated by the CPU, synchronize with the
> allocation and initialization of newly allocated page tables.
>
> Signed-off-by: Felix Kuehling 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 20 +---
>   1 file changed, 13 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 8603c85..4303436 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -899,17 +899,17 @@ static void amdgpu_vm_bo_param(struct amdgpu_device 
> *adev, struct amdgpu_vm *vm,
>   }
>   
>   /**
> - * amdgpu_vm_alloc_pts - Allocate page tables.
> + * amdgpu_vm_alloc_pts - Allocate a specific page table
>*
>* @adev: amdgpu_device pointer
>* @vm: VM to allocate page tables for
> - * @saddr: Start address which needs to be allocated
> - * @size: Size from start address we need.
> + * @cursor: Which page table to allocate
>*
> - * Make sure the page directories and page tables are allocated
> + * Make sure a specific page table or directory is allocated.
>*
>* Returns:
> - * 0 on success, errno otherwise.
> + * 1 if page table needed to be allocated, 0 if page table was already
> + * allocated, negative errno if an error occurred.
>*/
>   static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>  struct amdgpu_vm *vm,
> @@ -956,7 +956,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>   if (r)
>   goto error_free_pt;
>   
> - return 0;
> + return 1;
>   
>   error_free_pt:
>   amdgpu_bo_unref(>shadow);
> @@ -1621,10 +1621,12 @@ static int amdgpu_vm_update_ptes(struct 
> amdgpu_pte_update_params *params,
>   unsigned shift, parent_shift, mask;
>   uint64_t incr, entry_end, pe_start;
>   struct amdgpu_bo *pt;
> + bool need_to_sync;
>   
>   r = amdgpu_vm_alloc_pts(params->adev, params->vm, );
> - if (r)
> + if (r < 0)
>   return r;
> + need_to_sync = (r && params->vm->use_cpu_for_update);
>   
>   pt = cursor.entry->base.bo;
>   
> @@ -1672,6 +1674,10 @@ static int amdgpu_vm_update_ptes(struct 
> amdgpu_pte_update_params *params,
>   entry_end += cursor.pfn & ~(entry_end - 1);
>   entry_end = min(entry_end, end);
>   
> + if (need_to_sync)
> + r = amdgpu_bo_sync_wait(params->vm->root.base.bo,
> + AMDGPU_FENCE_OWNER_VM, true);
> +
>   do {
>   uint64_t upd_end = min(entry_end, frag_end);
>   unsigned nptes = (upd_end - frag_start) >> shift;
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: Slow memory access when using OpenCL without X11

2019-03-12 Thread Deucher, Alexander
Forcing the sclk and mclk high may impact the CPU frequency since they share 
TDP.

Alex

From: amd-gfx  on behalf of Lauri 
Ehrenpreis 
Sent: Tuesday, March 12, 2019 5:31 PM
To: Kuehling, Felix
Cc: Tom St Denis; amd-gfx@lists.freedesktop.org
Subject: Re: Slow memory access when using OpenCL without X11

However it's not only related to mclk and sclk. I tried this:
rocm-smi  --setsclk 2
rocm-smi  --setmclk 3
rocm-smi
ROCm System Management Interface


GPU   Temp   AvgPwr   SCLKMCLKPCLK   Fan PerfPwrCap   
SCLK OD   MCLK OD  GPU%
GPU[0] : WARNING: Empty SysFS value: pclk
GPU[0] : WARNING: Unable to read /sys/class/drm/card0/device/gpu_busy_percent
0 34.0c  N/A  1240Mhz 1333Mhz N/A0%  manual  N/A  
0%0%   N/A

   End of ROCm SMI Log  


./cl_slow_test 1
got 1 platforms 1 devices
speed 3919.777100 avg 3919.777100 mbytes/s
speed 3809.373291 avg 3864.575195 mbytes/s
speed 585.796814 avg 2771.649170 mbytes/s
speed 188.721848 avg 2125.917236 mbytes/s
speed 188.916367 avg 1738.517090 mbytes/s

So despite forcing max sclk and mclk the memory speed is still slow..

--
Lauri


On Tue, Mar 12, 2019 at 11:21 PM Lauri Ehrenpreis 
mailto:lauri...@gmail.com>> wrote:
IN the case when memory is slow, the rocm-smi outputs this:
ROCm System Management Interface


GPU   Temp   AvgPwr   SCLKMCLKPCLK   Fan PerfPwrCap   
SCLK OD   MCLK OD  GPU%
GPU[0] : WARNING: Empty SysFS value: pclk
GPU[0] : WARNING: Unable to read /sys/class/drm/card0/device/gpu_busy_percent
0 30.0c  N/A  400Mhz  933Mhz  N/A0%  autoN/A  
0%0%   N/A

   End of ROCm SMI Log  


normal memory speed case gives following:
ROCm System Management Interface


GPU   Temp   AvgPwr   SCLKMCLKPCLK   Fan PerfPwrCap   
SCLK OD   MCLK OD  GPU%
GPU[0] : WARNING: Empty SysFS value: pclk
GPU[0] : WARNING: Unable to read /sys/class/drm/card0/device/gpu_busy_percent
0 35.0c  N/A  400Mhz  1200Mhz N/A0%  autoN/A  
0%0%   N/A

   End of ROCm SMI Log  


So there is a difference in MCLK - can this cause such a huge slowdown?

--
Lauri

On Tue, Mar 12, 2019 at 6:39 PM Kuehling, Felix 
mailto:felix.kuehl...@amd.com>> wrote:
[adding the list back]

I'd suspect a problem related to memory clock. This is an APU where
system memory is shared with the CPU, so if the SMU changes memory
clocks that would affect CPU memory access performance. If the problem
only occurs when OpenCL is running, then the compute power profile could
have an effect here.

Laurie, can you monitor the clocks during your tests using rocm-smi?

Regards,
   Felix

On 2019-03-11 1:15 p.m., Tom St Denis wrote:
> Hi Lauri,
>
> I don't have ROCm installed locally (not on that team at AMD) but I
> can rope in some of the KFD folk and see what they say :-).
>
> (in the mean time I should look into installing the ROCm stack on my
> Ubuntu disk for experimentation...).
>
> Only other thing that comes to mind is some sort of stutter due to
> power/clock gating (or gfx off/etc).  But that typically affects the
> display/gpu side not the CPU side.
>
> Felix:  Any known issues with Raven and ROCm interacting over memory
> bus performance?
>
> Tom
>
> On Mon, Mar 11, 2019 at 12:56 PM Lauri Ehrenpreis 
> mailto:lauri...@gmail.com>
> >> wrote:
>
> Hi!
>
> The 100x memory slowdown is hard to belive indeed. I attached the
> test program with my first e-mail which depends only on
> rocm-opencl-dev package. Would you mind compiling it and checking
> if it slows down memory for you as well?
>
> steps:
> 1) g++ cl_slow_test.cpp -o cl_slow_test -I
> /opt/rocm/opencl/include/ -L /opt/rocm/opencl/lib/x86_64/  -lOpenCL
> 2) logout from desktop env and disconnect hdmi/diplayport etc
> 3) log in over ssh
> 4) run the program ./cl_slow_test 1
>
> For me 

Re: Slow memory access when using OpenCL without X11

2019-03-12 Thread Lauri Ehrenpreis
However it's not only related to mclk and sclk. I tried this:
rocm-smi  --setsclk 2
rocm-smi  --setmclk 3
rocm-smi
ROCm System Management Interface


GPU   Temp   AvgPwr   SCLKMCLKPCLK   Fan Perf
PwrCap   SCLK OD   MCLK OD  GPU%
GPU[0] : WARNING: Empty SysFS value: pclk
GPU[0] : WARNING: Unable to read
/sys/class/drm/card0/device/gpu_busy_percent
0 34.0c  N/A  1240Mhz 1333Mhz N/A0%  manual  N/A
  0%0%   N/A

   End of ROCm SMI Log


./cl_slow_test 1
got 1 platforms 1 devices
speed 3919.777100 avg 3919.777100 mbytes/s
speed 3809.373291 avg 3864.575195 mbytes/s
speed 585.796814 avg 2771.649170 mbytes/s
speed 188.721848 avg 2125.917236 mbytes/s
speed 188.916367 avg 1738.517090 mbytes/s

So despite forcing max sclk and mclk the memory speed is still slow..

--
Lauri


On Tue, Mar 12, 2019 at 11:21 PM Lauri Ehrenpreis 
wrote:

> IN the case when memory is slow, the rocm-smi outputs this:
> ROCm System Management Interface
> 
>
> 
> GPU   Temp   AvgPwr   SCLKMCLKPCLK   Fan Perf
> PwrCap   SCLK OD   MCLK OD  GPU%
> GPU[0] : WARNING: Empty SysFS value: pclk
> GPU[0] : WARNING: Unable to read
> /sys/class/drm/card0/device/gpu_busy_percent
> 0 30.0c  N/A  400Mhz  933Mhz  N/A0%  autoN/A
> 0%0%   N/A
>
> 
>    End of ROCm SMI Log
> 
>
> normal memory speed case gives following:
> ROCm System Management Interface
> 
>
> 
> GPU   Temp   AvgPwr   SCLKMCLKPCLK   Fan Perf
> PwrCap   SCLK OD   MCLK OD  GPU%
> GPU[0] : WARNING: Empty SysFS value: pclk
> GPU[0] : WARNING: Unable to read
> /sys/class/drm/card0/device/gpu_busy_percent
> 0 35.0c  N/A  400Mhz  1200Mhz N/A0%  autoN/A
> 0%0%   N/A
>
> 
>    End of ROCm SMI Log
> 
>
> So there is a difference in MCLK - can this cause such a huge slowdown?
>
> --
> Lauri
>
> On Tue, Mar 12, 2019 at 6:39 PM Kuehling, Felix 
> wrote:
>
>> [adding the list back]
>>
>> I'd suspect a problem related to memory clock. This is an APU where
>> system memory is shared with the CPU, so if the SMU changes memory
>> clocks that would affect CPU memory access performance. If the problem
>> only occurs when OpenCL is running, then the compute power profile could
>> have an effect here.
>>
>> Laurie, can you monitor the clocks during your tests using rocm-smi?
>>
>> Regards,
>>Felix
>>
>> On 2019-03-11 1:15 p.m., Tom St Denis wrote:
>> > Hi Lauri,
>> >
>> > I don't have ROCm installed locally (not on that team at AMD) but I
>> > can rope in some of the KFD folk and see what they say :-).
>> >
>> > (in the mean time I should look into installing the ROCm stack on my
>> > Ubuntu disk for experimentation...).
>> >
>> > Only other thing that comes to mind is some sort of stutter due to
>> > power/clock gating (or gfx off/etc).  But that typically affects the
>> > display/gpu side not the CPU side.
>> >
>> > Felix:  Any known issues with Raven and ROCm interacting over memory
>> > bus performance?
>> >
>> > Tom
>> >
>> > On Mon, Mar 11, 2019 at 12:56 PM Lauri Ehrenpreis > > > wrote:
>> >
>> > Hi!
>> >
>> > The 100x memory slowdown is hard to belive indeed. I attached the
>> > test program with my first e-mail which depends only on
>> > rocm-opencl-dev package. Would you mind compiling it and checking
>> > if it slows down memory for you as well?
>> >
>> > steps:
>> > 1) g++ cl_slow_test.cpp -o cl_slow_test -I
>> > /opt/rocm/opencl/include/ -L /opt/rocm/opencl/lib/x86_64/  -lOpenCL
>> > 2) logout from desktop env and disconnect hdmi/diplayport etc
>> > 3) log in over ssh
>> > 4) run the program ./cl_slow_test 1
>> >
>> > For me it reproduced even without step 2 as well but less
>> > reliably. moving mouse for example could make the memory speed
>> > fast again.
>> >
>> > --
>> > Lauri
>> >
>> >
>> >
>> > On Mon, Mar 11, 2019 at 6:33 PM Tom St Denis > > > wrote:
>> >
>> > Hi Lauri,

Re: Slow memory access when using OpenCL without X11

2019-03-12 Thread Lauri Ehrenpreis
IN the case when memory is slow, the rocm-smi outputs this:
ROCm System Management Interface


GPU   Temp   AvgPwr   SCLKMCLKPCLK   Fan Perf
PwrCap   SCLK OD   MCLK OD  GPU%
GPU[0] : WARNING: Empty SysFS value: pclk
GPU[0] : WARNING: Unable to read
/sys/class/drm/card0/device/gpu_busy_percent
0 30.0c  N/A  400Mhz  933Mhz  N/A0%  autoN/A
  0%0%   N/A

   End of ROCm SMI Log


normal memory speed case gives following:
ROCm System Management Interface


GPU   Temp   AvgPwr   SCLKMCLKPCLK   Fan Perf
PwrCap   SCLK OD   MCLK OD  GPU%
GPU[0] : WARNING: Empty SysFS value: pclk
GPU[0] : WARNING: Unable to read
/sys/class/drm/card0/device/gpu_busy_percent
0 35.0c  N/A  400Mhz  1200Mhz N/A0%  autoN/A
  0%0%   N/A

   End of ROCm SMI Log


So there is a difference in MCLK - can this cause such a huge slowdown?

--
Lauri

On Tue, Mar 12, 2019 at 6:39 PM Kuehling, Felix 
wrote:

> [adding the list back]
>
> I'd suspect a problem related to memory clock. This is an APU where
> system memory is shared with the CPU, so if the SMU changes memory
> clocks that would affect CPU memory access performance. If the problem
> only occurs when OpenCL is running, then the compute power profile could
> have an effect here.
>
> Laurie, can you monitor the clocks during your tests using rocm-smi?
>
> Regards,
>Felix
>
> On 2019-03-11 1:15 p.m., Tom St Denis wrote:
> > Hi Lauri,
> >
> > I don't have ROCm installed locally (not on that team at AMD) but I
> > can rope in some of the KFD folk and see what they say :-).
> >
> > (in the mean time I should look into installing the ROCm stack on my
> > Ubuntu disk for experimentation...).
> >
> > Only other thing that comes to mind is some sort of stutter due to
> > power/clock gating (or gfx off/etc).  But that typically affects the
> > display/gpu side not the CPU side.
> >
> > Felix:  Any known issues with Raven and ROCm interacting over memory
> > bus performance?
> >
> > Tom
> >
> > On Mon, Mar 11, 2019 at 12:56 PM Lauri Ehrenpreis  > > wrote:
> >
> > Hi!
> >
> > The 100x memory slowdown is hard to belive indeed. I attached the
> > test program with my first e-mail which depends only on
> > rocm-opencl-dev package. Would you mind compiling it and checking
> > if it slows down memory for you as well?
> >
> > steps:
> > 1) g++ cl_slow_test.cpp -o cl_slow_test -I
> > /opt/rocm/opencl/include/ -L /opt/rocm/opencl/lib/x86_64/  -lOpenCL
> > 2) logout from desktop env and disconnect hdmi/diplayport etc
> > 3) log in over ssh
> > 4) run the program ./cl_slow_test 1
> >
> > For me it reproduced even without step 2 as well but less
> > reliably. moving mouse for example could make the memory speed
> > fast again.
> >
> > --
> > Lauri
> >
> >
> >
> > On Mon, Mar 11, 2019 at 6:33 PM Tom St Denis  > > wrote:
> >
> > Hi Lauri,
> >
> > There's really no connection between the two other than they
> > run in the same package.  I too run a 2400G (as my
> > workstation) and I got the same ~6.6GB/sec transfer rate but
> > without a CL app running ...  The only logical reason is your
> > CL app is bottlenecking the APUs memory bus but you claim
> > "simply opening a context is enough" so something else is
> > going on.
> >
> > Your last reply though says "with it running in the
> > background" so it's entirely possible the CPU isn't busy but
> > the package memory controller (shared between both the CPU and
> > GPU) is busy.  For instance running xonotic in a 1080p window
> > on my 4K display reduced the memory test to 5.8GB/sec and
> > that's hardly a heavy memory bound GPU app.
> >
> > The only other possible connection is the GPU is generating so
> > much heat that it's throttling the package which is also
> > unlikely if you have a proper HSF attached (I use the ones
> > that came in the retail boxes).
> >
> > Cheers,
> > Tom
> >
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/1] drm/amdgpu: Wait for newly allocated PTs to be idle

2019-03-12 Thread Kuehling, Felix
When page table are updated by the CPU, synchronize with the
allocation and initialization of newly allocated page tables.

Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 20 +---
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 8603c85..4303436 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -899,17 +899,17 @@ static void amdgpu_vm_bo_param(struct amdgpu_device 
*adev, struct amdgpu_vm *vm,
 }
 
 /**
- * amdgpu_vm_alloc_pts - Allocate page tables.
+ * amdgpu_vm_alloc_pts - Allocate a specific page table
  *
  * @adev: amdgpu_device pointer
  * @vm: VM to allocate page tables for
- * @saddr: Start address which needs to be allocated
- * @size: Size from start address we need.
+ * @cursor: Which page table to allocate
  *
- * Make sure the page directories and page tables are allocated
+ * Make sure a specific page table or directory is allocated.
  *
  * Returns:
- * 0 on success, errno otherwise.
+ * 1 if page table needed to be allocated, 0 if page table was already
+ * allocated, negative errno if an error occurred.
  */
 static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
   struct amdgpu_vm *vm,
@@ -956,7 +956,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
if (r)
goto error_free_pt;
 
-   return 0;
+   return 1;
 
 error_free_pt:
amdgpu_bo_unref(>shadow);
@@ -1621,10 +1621,12 @@ static int amdgpu_vm_update_ptes(struct 
amdgpu_pte_update_params *params,
unsigned shift, parent_shift, mask;
uint64_t incr, entry_end, pe_start;
struct amdgpu_bo *pt;
+   bool need_to_sync;
 
r = amdgpu_vm_alloc_pts(params->adev, params->vm, );
-   if (r)
+   if (r < 0)
return r;
+   need_to_sync = (r && params->vm->use_cpu_for_update);
 
pt = cursor.entry->base.bo;
 
@@ -1672,6 +1674,10 @@ static int amdgpu_vm_update_ptes(struct 
amdgpu_pte_update_params *params,
entry_end += cursor.pfn & ~(entry_end - 1);
entry_end = min(entry_end, end);
 
+   if (need_to_sync)
+   r = amdgpu_bo_sync_wait(params->vm->root.base.bo,
+   AMDGPU_FENCE_OWNER_VM, true);
+
do {
uint64_t upd_end = min(entry_end, frag_end);
unsigned nptes = (upd_end - frag_start) >> shift;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand

2019-03-12 Thread Kuehling, Felix
I'm also still seeing VM faults in the eviction test even with my fix, 
and even with SDMA page table updates. There is still something else 
going wrong. :/

Thanks,
   Felix

On 2019-03-12 5:13 p.m., Yang, Philip wrote:
> vm fault happens about 1/10 for KFDCWSRTest.BasicTest for me. I am using
> SDMA for page table update. I don't try CPU page table update.
>
> Philip
>
> On 2019-03-12 11:12 a.m., Russell, Kent wrote:
>> Peculiar, I hit it immediately when I ran it . Can you try use 
>> --gtest_filter=KFDCWSRTest.BasicTest  . That one hung every time for me.
>>
>>Kent
>>
>>> -Original Message-
>>> From: Christian König 
>>> Sent: Tuesday, March 12, 2019 11:09 AM
>>> To: Russell, Kent ; Koenig, Christian
>>> ; Kuehling, Felix ;
>>> amd-gfx@lists.freedesktop.org
>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>>
>>> Yeah, same problem here.
>>>
>>> I removed libhsakmt package and installed it manually and now it seems to
>>> work.
>>>
>>> Doing some testing now, but at least of hand I can't seem to reproduce the
>>> VM fault on a Vega10.
>>>
>>> Christian.
>>>
>>> Am 12.03.19 um 16:01 schrieb Russell, Kent:
 Oh right, I remember that issue. I had that happen to me once, where my
>>> installed libhsakmt didn't match up with the latest source code, so I ended 
>>> up
>>> having to remove the libhsakmt package and pointing it to the folders
>>> instead.
 Kent

> -Original Message-
> From: Koenig, Christian
> Sent: Tuesday, March 12, 2019 10:49 AM
> To: Russell, Kent ; Kuehling, Felix
> ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>
> Yeah, the problem is I do have the libhsakmt installed.
>
> Going to give it a try to specify the directory directly.
>
> Christian.
>
> Am 12.03.19 um 15:47 schrieb Russell, Kent:
>> The README.txt file inside the tests/kfdtest folder has instructions
>> on how
> to do it if you don't have the libhsakmt package installed on your system:
>> export LIBHSAKMT_PATH=/*your local libhsakmt folder*/ With that, the
>> headers and libraries are searched under LIBHSAKMT_PATH/include and
>> LIBHSAKMT_PATH/lib respectively.
>>
>> So if you try export LIBHSAKMT_PATH as the root ROCT folder (the one
> containing include, src, tests, etc), then that should cover it.
>>  Kent
>>
>>
>>> -Original Message-
>>> From: Christian König 
>>> Sent: Tuesday, March 12, 2019 9:13 AM
>>> To: Russell, Kent ; Kuehling, Felix
>>> ; Koenig, Christian
>>> ; amd-gfx@lists.freedesktop.org
>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
>>> demand
>>> Hi guys,
>>>
>>> so found a few minutes today to compile kfdtest.
>>>
>>> Problem is that during the compile I get a lots of this:
 CMakeFiles/kfdtest.dir/src/BaseQueue.cpp.o: In Funktion
 »BaseQueue::Create(unsigned int, unsigned int, unsigned long*)«:
 /usr/src/ROCT-Thunk-Interface/tests/kfdtest/src/BaseQueue.cpp:57:
 Warnung: undefinierter Verweis auf »hsaKmtCreateQueue«
>>> Any idea?
>>>
>>> Christian.
>>>
>>> Am 11.03.19 um 17:55 schrieb Christian König:
 Hi guys,

 well it's most likely some missing handling in the KFD, so I'm
 rather reluctant to revert the change immediately.

 Problem is that I don't have time right now to look into it
 immediately. So Kent can you continue to take a look?

 Sounds like its crashing immediately, so it should be something
>>> obvious.
 Christian.

 Am 11.03.19 um 10:49 schrieb Russell, Kent:
>  From what I've been able to dig through, the VM Fault seems to
> occur right after a doorbell mmap, but that's as far as I got. I
> can try to revert it in today's merge and see how things go.
>
>   Kent
>
>> -Original Message-
>> From: Kuehling, Felix
>> Sent: Friday, March 08, 2019 11:16 PM
>> To: Koenig, Christian ; Russell, Kent
>> ; amd-gfx@lists.freedesktop.org
>> Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
> demand
>> My concerns were related to eviction fence handing. It would
>> manifest by unnecessary eviction callbacks into KFD that aren't
>> cause by real evictions. I addressed that with a previous patch
>> series that removed the need to remove eviction fences and add
>> them back around page table updates in
>>> amdgpu_amdkfd_gpuvm.c.
>> I don't know what's going on here. I can probably take a look on
>> Monday. I haven't considered what changed with respect to PD
>> updates.
>>
>> Kent, can we temporarily revert the offending change in
>> amd-kfd-staging just to 

Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand

2019-03-12 Thread Yang, Philip
vm fault happens about 1/10 for KFDCWSRTest.BasicTest for me. I am using 
SDMA for page table update. I don't try CPU page table update.

Philip

On 2019-03-12 11:12 a.m., Russell, Kent wrote:
> Peculiar, I hit it immediately when I ran it . Can you try use 
> --gtest_filter=KFDCWSRTest.BasicTest  . That one hung every time for me.
> 
>   Kent
> 
>> -Original Message-
>> From: Christian König 
>> Sent: Tuesday, March 12, 2019 11:09 AM
>> To: Russell, Kent ; Koenig, Christian
>> ; Kuehling, Felix ;
>> amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>
>> Yeah, same problem here.
>>
>> I removed libhsakmt package and installed it manually and now it seems to
>> work.
>>
>> Doing some testing now, but at least of hand I can't seem to reproduce the
>> VM fault on a Vega10.
>>
>> Christian.
>>
>> Am 12.03.19 um 16:01 schrieb Russell, Kent:
>>> Oh right, I remember that issue. I had that happen to me once, where my
>> installed libhsakmt didn't match up with the latest source code, so I ended 
>> up
>> having to remove the libhsakmt package and pointing it to the folders
>> instead.
>>>
>>>Kent
>>>
 -Original Message-
 From: Koenig, Christian
 Sent: Tuesday, March 12, 2019 10:49 AM
 To: Russell, Kent ; Kuehling, Felix
 ; amd-gfx@lists.freedesktop.org
 Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand

 Yeah, the problem is I do have the libhsakmt installed.

 Going to give it a try to specify the directory directly.

 Christian.

 Am 12.03.19 um 15:47 schrieb Russell, Kent:
> The README.txt file inside the tests/kfdtest folder has instructions
> on how
 to do it if you don't have the libhsakmt package installed on your system:
> export LIBHSAKMT_PATH=/*your local libhsakmt folder*/ With that, the
> headers and libraries are searched under LIBHSAKMT_PATH/include and
> LIBHSAKMT_PATH/lib respectively.
>
> So if you try export LIBHSAKMT_PATH as the root ROCT folder (the one
 containing include, src, tests, etc), then that should cover it.
> Kent
>
>
>> -Original Message-
>> From: Christian König 
>> Sent: Tuesday, March 12, 2019 9:13 AM
>> To: Russell, Kent ; Kuehling, Felix
>> ; Koenig, Christian
>> ; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
>> demand
>>
>> Hi guys,
>>
>> so found a few minutes today to compile kfdtest.
>>
>> Problem is that during the compile I get a lots of this:
>>> CMakeFiles/kfdtest.dir/src/BaseQueue.cpp.o: In Funktion
>>> »BaseQueue::Create(unsigned int, unsigned int, unsigned long*)«:
>>> /usr/src/ROCT-Thunk-Interface/tests/kfdtest/src/BaseQueue.cpp:57:
>>> Warnung: undefinierter Verweis auf »hsaKmtCreateQueue«
>> Any idea?
>>
>> Christian.
>>
>> Am 11.03.19 um 17:55 schrieb Christian König:
>>> Hi guys,
>>>
>>> well it's most likely some missing handling in the KFD, so I'm
>>> rather reluctant to revert the change immediately.
>>>
>>> Problem is that I don't have time right now to look into it
>>> immediately. So Kent can you continue to take a look?
>>>
>>> Sounds like its crashing immediately, so it should be something
>> obvious.
>>>
>>> Christian.
>>>
>>> Am 11.03.19 um 10:49 schrieb Russell, Kent:
 From what I've been able to dig through, the VM Fault seems to
 occur right after a doorbell mmap, but that's as far as I got. I
 can try to revert it in today's merge and see how things go.

  Kent

> -Original Message-
> From: Kuehling, Felix
> Sent: Friday, March 08, 2019 11:16 PM
> To: Koenig, Christian ; Russell, Kent
> ; amd-gfx@lists.freedesktop.org
> Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
 demand
> My concerns were related to eviction fence handing. It would
> manifest by unnecessary eviction callbacks into KFD that aren't
> cause by real evictions. I addressed that with a previous patch
> series that removed the need to remove eviction fences and add
> them back around page table updates in
>> amdgpu_amdkfd_gpuvm.c.
>
> I don't know what's going on here. I can probably take a look on
> Monday. I haven't considered what changed with respect to PD
> updates.
>
> Kent, can we temporarily revert the offending change in
> amd-kfd-staging just to unblock the merge?
>
> Christian, I think KFD is currently broken on amd-staging-drm-next.
> If we're
> serious about supporting KFD upstream, you may also want to
> consider reverting your change there for now. Also consider
> building the Thunk and kfdtest so you can do quick 

Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand

2019-03-12 Thread Kuehling, Felix
The root cause is that we don't wait after calling amdgpu_vm_clear_bo in 
amdgpu_vm_alloc_pts.

Waiting for the page table BOs to be idle for CPU page table updates is 
done in amdgpu_vm_bo_update_mapping. That is now *before* the page 
tables are actually allocated and cleared in amdgpu_vm_update_ptes.

We'll need to move the waiting for page tables to be idle into 
amdgpu_vm_alloc_pts or amdgpu_vm_update_ptes.

Regards,
   Felix

On 2019-03-12 3:02 p.m., Felix Kuehling wrote:
> I find that it's related to CPU page table updates. If I force page 
> table updates with SDMA, I don't get the VM fault.
>
> Regards,
>   Felix
>
> On 2019-03-11 12:55 p.m., Christian König wrote:
>> Hi guys,
>>
>> well it's most likely some missing handling in the KFD, so I'm rather 
>> reluctant to revert the change immediately.
>>
>> Problem is that I don't have time right now to look into it 
>> immediately. So Kent can you continue to take a look?
>>
>> Sounds like its crashing immediately, so it should be something obvious.
>>
>> Christian.
>>
>> Am 11.03.19 um 10:49 schrieb Russell, Kent:
>>>  From what I've been able to dig through, the VM Fault seems to 
>>> occur right after a doorbell mmap, but that's as far as I got. I can 
>>> try to revert it in today's merge and see how things go.
>>>
>>>   Kent
>>>
 -Original Message-
 From: Kuehling, Felix
 Sent: Friday, March 08, 2019 11:16 PM
 To: Koenig, Christian ; Russell, Kent
 ; amd-gfx@lists.freedesktop.org
 Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand

 My concerns were related to eviction fence handing. It would 
 manifest by
 unnecessary eviction callbacks into KFD that aren't cause by real 
 evictions. I
 addressed that with a previous patch series that removed the need to
 remove eviction fences and add them back around page table updates in
 amdgpu_amdkfd_gpuvm.c.

 I don't know what's going on here. I can probably take a look on 
 Monday. I
 haven't considered what changed with respect to PD updates.

 Kent, can we temporarily revert the offending change in 
 amd-kfd-staging
 just to unblock the merge?

 Christian, I think KFD is currently broken on amd-staging-drm-next. 
 If we're
 serious about supporting KFD upstream, you may also want to consider
 reverting your change there for now. Also consider building the 
 Thunk and
 kfdtest so you can do quick smoke tests locally whenever you make
 amdgpu_vm changes that can affect KFD.
 https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface

 Regards,
    Felix

 -Original Message-
 From: amd-gfx  On Behalf Of
 Christian König
 Sent: Friday, March 08, 2019 9:14 AM
 To: Russell, Kent ; 
 amd-gfx@lists.freedesktop.org
 Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand

 My best guess is that we forget somewhere to update the PDs. What
 hardware is that on?

 Felix already mentioned that this could be problematic for the KFD.

 Maybe he has an idea,
 Christian.

 Am 08.03.19 um 15:04 schrieb Russell, Kent:
> Hi Christian,
>
> This patch ended up causing a VM Fault in KFDTest. Reverting just 
> this
 patch addressed the issue:
> [   82.703503] amdgpu :0c:00.0: GPU fault detected: 146 
> 0x480c for
 process  pid 0 thread  pid 0
> [   82.703512] amdgpu :0c:00.0:
 VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x1000
> [   82.703516] amdgpu :0c:00.0:
 VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
> [   82.703522] amdgpu :0c:00.0: VM fault (0x0c, vmid 8, pasid 
> 32769) at
 page 4096, read from 'TC0' (0x54433000) (72)
> [   82.703585] Evicting PASID 32769 queues
>
> I am looking into it, but if you have any insight that would be 
> great in
 helping to resolve it quickly.
>    Kent
>> -Original Message-
>> From: amd-gfx  On Behalf Of
>> Christian König
>> Sent: Tuesday, February 26, 2019 7:47 AM
>> To: amd-gfx@lists.freedesktop.org
>> Subject: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>
>> Let's start to allocate VM PDs/PTs on demand instead of
>> pre-allocating them during mapping.
>>
>> Signed-off-by: Christian König 
>> Reviewed-by: Felix Kuehling 
>> ---
>>    .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10 +-
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c   |   9 --
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   |  10 --
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    | 136 
>> +
 -
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |   3 -
>>    5 files changed, 39 insertions(+), 129 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> 

Re: [PATCH 1/3] drm/amdkfd: support concurrent userptr update for HMM v2

2019-03-12 Thread Kuehling, Felix
On 2019-03-06 9:42 p.m., Yang, Philip wrote:
> Userptr restore may have concurrent userptr invalidation after
> hmm_vma_fault adds the range to the hmm->ranges list, needs call
> hmm_vma_range_done to remove the range from hmm->ranges list first,
> then reschedule the restore worker. Otherwise hmm_vma_fault will add
> same range to the list, this will cause loop in the list because
> range->next point to range itself.
>
> Add function untrack_invalid_user_pages to reduce code duplication.
>
> Change-Id: I31407739dc10554f8e418c7a0e0415d3d95552f1
> Signed-off-by: Philip Yang 

This patch is Reviewed-by: Felix Kuehling 


> ---
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 25 ++-
>   1 file changed, 19 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index d2e315f42dad..60d53b0b497a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1935,6 +1935,23 @@ static int update_invalid_user_pages(struct 
> amdkfd_process_info *process_info,
>   return 0;
>   }
>   
> +/* Remove invalid userptr BOs from hmm track list
> + *
> + * Stop HMM track the userptr update
> + */
> +static void untrack_invalid_user_pages(struct amdkfd_process_info 
> *process_info)
> +{
> + struct kgd_mem *mem, *tmp_mem;
> + struct amdgpu_bo *bo;
> +
> + list_for_each_entry_safe(mem, tmp_mem,
> +  _info->userptr_inval_list,
> +  validate_list.head) {
> + bo = mem->bo;
> + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
> + }
> +}
> +
>   /* Validate invalid userptr BOs
>*
>* Validates BOs on the userptr_inval_list, and moves them back to the
> @@ -2052,12 +2069,6 @@ static int validate_invalid_user_pages(struct 
> amdkfd_process_info *process_info)
>   out_free:
>   kfree(pd_bo_list_entries);
>   out_no_mem:
> - list_for_each_entry_safe(mem, tmp_mem,
> -  _info->userptr_inval_list,
> -  validate_list.head) {
> - bo = mem->bo;
> - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
> - }
>   
>   return ret;
>   }
> @@ -2122,7 +2133,9 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct 
> work_struct *work)
>* hanging. No point trying again.
>*/
>   }
> +
>   unlock_out:
> + untrack_invalid_user_pages(process_info);
>   mutex_unlock(_info->lock);
>   mmput(mm);
>   put_task_struct(usertask);
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 2/3] drm/amdgpu: support userptr cross VMAs case with HMM v2

2019-03-12 Thread Kuehling, Felix
See one comment inline. There are still some potential problems that 
you're not catching.

On 2019-03-06 9:42 p.m., Yang, Philip wrote:
> userptr may cross two VMAs if the forked child process (not call exec
> after fork) malloc buffer, then free it, and then malloc larger size
> buf, kerenl will create new VMA adjacent to old VMA which was cloned
> from parent process, some pages of userptr are in the first VMA, the
> rest pages are in the second VMA.
>
> HMM expects range only have one VMA, loop over all VMAs in the address
> range, create multiple ranges to handle this case. See
> is_mergeable_anon_vma in mm/mmap.c for details.
>
> Change-Id: I0ca8c77e28deabccc139906f9ffee04b7e383314
> Signed-off-by: Philip Yang 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 123 +---
>   1 file changed, 88 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 7cc0ba24369d..802bec7ef917 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -711,7 +711,8 @@ struct amdgpu_ttm_tt {
>   struct task_struct  *usertask;
>   uint32_tuserflags;
>   #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
> - struct hmm_rangerange;
> + struct hmm_range*ranges;
> + int nr_ranges;
>   #endif
>   };
>   
> @@ -723,62 +724,105 @@ struct amdgpu_ttm_tt {
>* once afterwards to stop HMM tracking
>*/
>   #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
> +
> +/* Support Userptr pages cross max 16 vmas */
> +#define MAX_NR_VMAS  (16)
> +
>   int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>   {
>   struct amdgpu_ttm_tt *gtt = (void *)ttm;
>   struct mm_struct *mm = gtt->usertask->mm;
> - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
> - struct hmm_range *range = >range;
> - int r = 0, i;
> + unsigned long start = gtt->userptr;
> + unsigned long end = start + ttm->num_pages * PAGE_SIZE;
> + struct hmm_range *ranges;
> + struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
> + uint64_t *pfns, f;
> + int r = 0, i, nr_pages;
>   
>   if (!mm) /* Happens during process shutdown */
>   return -ESRCH;
>   
> - amdgpu_hmm_init_range(range);
> -
>   down_read(>mmap_sem);
>   
> - range->vma = find_vma(mm, gtt->userptr);
> - if (!range_in_vma(range->vma, gtt->userptr, end))
> - r = -EFAULT;
> - else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
> - range->vma->vm_file)
> + /* user pages may cross multiple VMAs */
> + gtt->nr_ranges = 0;
> + do {
> + if (gtt->nr_ranges >= MAX_NR_VMAS) {
> + DRM_ERROR("Too many VMAs in userptr range\n");
> + r = -EFAULT;
> + goto out;
> + }
> +
> + vma = find_vma(mm, vma ? vma->vm_end : start);

You need a check here that vma->vm_start <= the requested start address. 
Otherwise you can end up with gaps in your userptr mapping that don't 
have valid pages.

Regards,
   Felix


> + if (unlikely(!vma)) {
> + r = -EFAULT;
> + goto out;
> + }
> + vmas[gtt->nr_ranges++] = vma;
> + } while (end > vma->vm_end);+
> + DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
> + start, gtt->nr_ranges, ttm->num_pages);
> +
> + if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
> + vmas[0]->vm_file)) {
>   r = -EPERM;
> - if (r)
>   goto out;
> + }
>   
> - range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
> -  GFP_KERNEL);
> - if (range->pfns == NULL) {
> + ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
> + if (unlikely(!ranges)) {
>   r = -ENOMEM;
>   goto out;
>   }
> - range->start = gtt->userptr;
> - range->end = end;
>   
> - range->pfns[0] = range->flags[HMM_PFN_VALID];
> - range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ?
> - 0 : range->flags[HMM_PFN_WRITE];
> - for (i = 1; i < ttm->num_pages; i++)
> - range->pfns[i] = range->pfns[0];
> + pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
> + if (unlikely(!pfns)) {
> + r = -ENOMEM;
> + goto out_free_ranges;
> + }
> +
> + for (i = 0; i < gtt->nr_ranges; i++)
> + amdgpu_hmm_init_range([i]);
> +
> + f = ranges[0].flags[HMM_PFN_VALID];
> + f |= amdgpu_ttm_tt_is_readonly(ttm) ?
> + 0 : ranges[0].flags[HMM_PFN_WRITE];
> + memset64(pfns, f, ttm->num_pages);
> +
> + for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) {
> + ranges[i].vma = vmas[i];
> +   

Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand

2019-03-12 Thread Kuehling, Felix
I find that it's related to CPU page table updates. If I force page 
table updates with SDMA, I don't get the VM fault.

Regards,
   Felix

On 2019-03-11 12:55 p.m., Christian König wrote:
> Hi guys,
>
> well it's most likely some missing handling in the KFD, so I'm rather 
> reluctant to revert the change immediately.
>
> Problem is that I don't have time right now to look into it 
> immediately. So Kent can you continue to take a look?
>
> Sounds like its crashing immediately, so it should be something obvious.
>
> Christian.
>
> Am 11.03.19 um 10:49 schrieb Russell, Kent:
>>  From what I've been able to dig through, the VM Fault seems to occur 
>> right after a doorbell mmap, but that's as far as I got. I can try to 
>> revert it in today's merge and see how things go.
>>
>>   Kent
>>
>>> -Original Message-
>>> From: Kuehling, Felix
>>> Sent: Friday, March 08, 2019 11:16 PM
>>> To: Koenig, Christian ; Russell, Kent
>>> ; amd-gfx@lists.freedesktop.org
>>> Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>>
>>> My concerns were related to eviction fence handing. It would 
>>> manifest by
>>> unnecessary eviction callbacks into KFD that aren't cause by real 
>>> evictions. I
>>> addressed that with a previous patch series that removed the need to
>>> remove eviction fences and add them back around page table updates in
>>> amdgpu_amdkfd_gpuvm.c.
>>>
>>> I don't know what's going on here. I can probably take a look on 
>>> Monday. I
>>> haven't considered what changed with respect to PD updates.
>>>
>>> Kent, can we temporarily revert the offending change in amd-kfd-staging
>>> just to unblock the merge?
>>>
>>> Christian, I think KFD is currently broken on amd-staging-drm-next. 
>>> If we're
>>> serious about supporting KFD upstream, you may also want to consider
>>> reverting your change there for now. Also consider building the 
>>> Thunk and
>>> kfdtest so you can do quick smoke tests locally whenever you make
>>> amdgpu_vm changes that can affect KFD.
>>> https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface
>>>
>>> Regards,
>>>    Felix
>>>
>>> -Original Message-
>>> From: amd-gfx  On Behalf Of
>>> Christian König
>>> Sent: Friday, March 08, 2019 9:14 AM
>>> To: Russell, Kent ; amd-gfx@lists.freedesktop.org
>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>>
>>> My best guess is that we forget somewhere to update the PDs. What
>>> hardware is that on?
>>>
>>> Felix already mentioned that this could be problematic for the KFD.
>>>
>>> Maybe he has an idea,
>>> Christian.
>>>
>>> Am 08.03.19 um 15:04 schrieb Russell, Kent:
 Hi Christian,

 This patch ended up causing a VM Fault in KFDTest. Reverting just this
>>> patch addressed the issue:
 [   82.703503] amdgpu :0c:00.0: GPU fault detected: 146 
 0x480c for
>>> process  pid 0 thread  pid 0
 [   82.703512] amdgpu :0c:00.0:
>>> VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x1000
 [   82.703516] amdgpu :0c:00.0:
>>> VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
 [   82.703522] amdgpu :0c:00.0: VM fault (0x0c, vmid 8, pasid 
 32769) at
>>> page 4096, read from 'TC0' (0x54433000) (72)
 [   82.703585] Evicting PASID 32769 queues

 I am looking into it, but if you have any insight that would be 
 great in
>>> helping to resolve it quickly.
    Kent
> -Original Message-
> From: amd-gfx  On Behalf Of
> Christian König
> Sent: Tuesday, February 26, 2019 7:47 AM
> To: amd-gfx@lists.freedesktop.org
> Subject: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>
> Let's start to allocate VM PDs/PTs on demand instead of
> pre-allocating them during mapping.
>
> Signed-off-by: Christian König 
> Reviewed-by: Felix Kuehling 
> ---
>    .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10 +-
>    drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c   |   9 --
>    drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   |  10 --
>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    | 136 
> +
>>> -
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |   3 -
>    5 files changed, 39 insertions(+), 129 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 31e3953dcb6e..088e9b6b765b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -410,15 +410,7 @@ static int add_bo_to_vm(struct amdgpu_device
> *adev, struct kgd_mem *mem,
>    if (p_bo_va_entry)
>    *p_bo_va_entry = bo_va_entry;
>
> -    /* Allocate new page tables if needed and validate
> - * them.
> - */
> -    ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
> -    if (ret) {
> -    pr_err("Failed to allocate pts, err=%d\n", ret);
> -    

Re: [PATCH] drm/v3d: Fix calling drm_sched_resubmit_jobs for same sched.

2019-03-12 Thread Grodzovsky, Andrey
They are not the same, but the guilty job belongs to only one {entity, 
scheduler} pair and so we mark as guilty only for that particular entity in the 
context of that scheduler only once.

Andrey


From: Eric Anholt 
Sent: 12 March 2019 13:33:16
To: Grodzovsky, Andrey; dri-de...@lists.freedesktop.org; 
amd-gfx@lists.freedesktop.org; to...@tomeuvizoso.net
Cc: Grodzovsky, Andrey
Subject: Re: [PATCH] drm/v3d: Fix calling drm_sched_resubmit_jobs for same 
sched.

Andrey Grodzovsky  writes:

> Also stop calling drm_sched_increase_karma multiple times.

Each v3d->queue[q].sched was initialized with a separate
drm_sched_init().  I wouldn't have thought they were all the "same
sched".
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 5.0 13/25] drm: disable uncached DMA optimization for ARM and arm64

2019-03-12 Thread Greg Kroah-Hartman
5.0-stable review patch.  If anyone has any objections, please let me know.

--

[ Upstream commit e02f5c1bb2283cfcee68f2f0feddcc06150f13aa ]

The DRM driver stack is designed to work with cache coherent devices
only, but permits an optimization to be enabled in some cases, where
for some buffers, both the CPU and the GPU use uncached mappings,
removing the need for DMA snooping and allocation in the CPU caches.

The use of uncached GPU mappings relies on the correct implementation
of the PCIe NoSnoop TLP attribute by the platform, otherwise the GPU
will use cached mappings nonetheless. On x86 platforms, this does not
seem to matter, as uncached CPU mappings will snoop the caches in any
case. However, on ARM and arm64, enabling this optimization on a
platform where NoSnoop is ignored results in loss of coherency, which
breaks correct operation of the device. Since we have no way of
detecting whether NoSnoop works or not, just disable this
optimization entirely for ARM and arm64.

Cc: Christian Koenig 
Cc: Alex Deucher 
Cc: David Zhou 
Cc: Huang Rui 
Cc: Junwei Zhang 
Cc: Michel Daenzer 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Maarten Lankhorst 
Cc: Maxime Ripard 
Cc: Sean Paul 
Cc: Michael Ellerman 
Cc: Benjamin Herrenschmidt 
Cc: Will Deacon 
Cc: Christoph Hellwig 
Cc: Robin Murphy 
Cc: amd-gfx list 
Cc: dri-devel 
Reported-by: Carsten Haitzler 
Signed-off-by: Ard Biesheuvel 
Reviewed-by: Christian König 
Reviewed-by: Alex Deucher 
Link: https://patchwork.kernel.org/patch/10778815/
Signed-off-by: Christian König 
Signed-off-by: Sasha Levin 
---
 include/drm/drm_cache.h |   18 ++
 1 file changed, 18 insertions(+)

--- a/include/drm/drm_cache.h
+++ b/include/drm/drm_cache.h
@@ -47,6 +47,24 @@ static inline bool drm_arch_can_wc_memor
return false;
 #elif defined(CONFIG_MIPS) && defined(CONFIG_CPU_LOONGSON3)
return false;
+#elif defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+   /*
+* The DRM driver stack is designed to work with cache coherent devices
+* only, but permits an optimization to be enabled in some cases, where
+* for some buffers, both the CPU and the GPU use uncached mappings,
+* removing the need for DMA snooping and allocation in the CPU caches.
+*
+* The use of uncached GPU mappings relies on the correct implementation
+* of the PCIe NoSnoop TLP attribute by the platform, otherwise the GPU
+* will use cached mappings nonetheless. On x86 platforms, this does not
+* seem to matter, as uncached CPU mappings will snoop the caches in any
+* case. However, on ARM and arm64, enabling this optimization on a
+* platform where NoSnoop is ignored results in loss of coherency, which
+* breaks correct operation of the device. Since we have no way of
+* detecting whether NoSnoop works or not, just disable this
+* optimization entirely for ARM and arm64.
+*/
+   return false;
 #else
return true;
 #endif


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/v3d: Fix calling drm_sched_resubmit_jobs for same sched.

2019-03-12 Thread Eric Anholt
Andrey Grodzovsky  writes:

> Also stop calling drm_sched_increase_karma multiple times.

Each v3d->queue[q].sched was initialized with a separate
drm_sched_init().  I wouldn't have thought they were all the "same
sched".


signature.asc
Description: PGP signature
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4.14 127/135] drm: disable uncached DMA optimization for ARM and arm64

2019-03-12 Thread Greg Kroah-Hartman
4.14-stable review patch.  If anyone has any objections, please let me know.

--

[ Upstream commit e02f5c1bb2283cfcee68f2f0feddcc06150f13aa ]

The DRM driver stack is designed to work with cache coherent devices
only, but permits an optimization to be enabled in some cases, where
for some buffers, both the CPU and the GPU use uncached mappings,
removing the need for DMA snooping and allocation in the CPU caches.

The use of uncached GPU mappings relies on the correct implementation
of the PCIe NoSnoop TLP attribute by the platform, otherwise the GPU
will use cached mappings nonetheless. On x86 platforms, this does not
seem to matter, as uncached CPU mappings will snoop the caches in any
case. However, on ARM and arm64, enabling this optimization on a
platform where NoSnoop is ignored results in loss of coherency, which
breaks correct operation of the device. Since we have no way of
detecting whether NoSnoop works or not, just disable this
optimization entirely for ARM and arm64.

Cc: Christian Koenig 
Cc: Alex Deucher 
Cc: David Zhou 
Cc: Huang Rui 
Cc: Junwei Zhang 
Cc: Michel Daenzer 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Maarten Lankhorst 
Cc: Maxime Ripard 
Cc: Sean Paul 
Cc: Michael Ellerman 
Cc: Benjamin Herrenschmidt 
Cc: Will Deacon 
Cc: Christoph Hellwig 
Cc: Robin Murphy 
Cc: amd-gfx list 
Cc: dri-devel 
Reported-by: Carsten Haitzler 
Signed-off-by: Ard Biesheuvel 
Reviewed-by: Christian König 
Reviewed-by: Alex Deucher 
Link: https://patchwork.kernel.org/patch/10778815/
Signed-off-by: Christian König 
Signed-off-by: Sasha Levin 
---
 include/drm/drm_cache.h | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h
index beab0f0d0cfb..250e2d13c61b 100644
--- a/include/drm/drm_cache.h
+++ b/include/drm/drm_cache.h
@@ -45,6 +45,24 @@ static inline bool drm_arch_can_wc_memory(void)
return false;
 #elif defined(CONFIG_MIPS) && defined(CONFIG_CPU_LOONGSON3)
return false;
+#elif defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+   /*
+* The DRM driver stack is designed to work with cache coherent devices
+* only, but permits an optimization to be enabled in some cases, where
+* for some buffers, both the CPU and the GPU use uncached mappings,
+* removing the need for DMA snooping and allocation in the CPU caches.
+*
+* The use of uncached GPU mappings relies on the correct implementation
+* of the PCIe NoSnoop TLP attribute by the platform, otherwise the GPU
+* will use cached mappings nonetheless. On x86 platforms, this does not
+* seem to matter, as uncached CPU mappings will snoop the caches in any
+* case. However, on ARM and arm64, enabling this optimization on a
+* platform where NoSnoop is ignored results in loss of coherency, which
+* breaks correct operation of the device. Since we have no way of
+* detecting whether NoSnoop works or not, just disable this
+* optimization entirely for ARM and arm64.
+*/
+   return false;
 #else
return true;
 #endif
-- 
2.19.1



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4.19 134/149] drm: disable uncached DMA optimization for ARM and arm64

2019-03-12 Thread Greg Kroah-Hartman
4.19-stable review patch.  If anyone has any objections, please let me know.

--

[ Upstream commit e02f5c1bb2283cfcee68f2f0feddcc06150f13aa ]

The DRM driver stack is designed to work with cache coherent devices
only, but permits an optimization to be enabled in some cases, where
for some buffers, both the CPU and the GPU use uncached mappings,
removing the need for DMA snooping and allocation in the CPU caches.

The use of uncached GPU mappings relies on the correct implementation
of the PCIe NoSnoop TLP attribute by the platform, otherwise the GPU
will use cached mappings nonetheless. On x86 platforms, this does not
seem to matter, as uncached CPU mappings will snoop the caches in any
case. However, on ARM and arm64, enabling this optimization on a
platform where NoSnoop is ignored results in loss of coherency, which
breaks correct operation of the device. Since we have no way of
detecting whether NoSnoop works or not, just disable this
optimization entirely for ARM and arm64.

Cc: Christian Koenig 
Cc: Alex Deucher 
Cc: David Zhou 
Cc: Huang Rui 
Cc: Junwei Zhang 
Cc: Michel Daenzer 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Maarten Lankhorst 
Cc: Maxime Ripard 
Cc: Sean Paul 
Cc: Michael Ellerman 
Cc: Benjamin Herrenschmidt 
Cc: Will Deacon 
Cc: Christoph Hellwig 
Cc: Robin Murphy 
Cc: amd-gfx list 
Cc: dri-devel 
Reported-by: Carsten Haitzler 
Signed-off-by: Ard Biesheuvel 
Reviewed-by: Christian König 
Reviewed-by: Alex Deucher 
Link: https://patchwork.kernel.org/patch/10778815/
Signed-off-by: Christian König 
Signed-off-by: Sasha Levin 
---
 include/drm/drm_cache.h | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h
index bfe1639df02d..97fc498dc767 100644
--- a/include/drm/drm_cache.h
+++ b/include/drm/drm_cache.h
@@ -47,6 +47,24 @@ static inline bool drm_arch_can_wc_memory(void)
return false;
 #elif defined(CONFIG_MIPS) && defined(CONFIG_CPU_LOONGSON3)
return false;
+#elif defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+   /*
+* The DRM driver stack is designed to work with cache coherent devices
+* only, but permits an optimization to be enabled in some cases, where
+* for some buffers, both the CPU and the GPU use uncached mappings,
+* removing the need for DMA snooping and allocation in the CPU caches.
+*
+* The use of uncached GPU mappings relies on the correct implementation
+* of the PCIe NoSnoop TLP attribute by the platform, otherwise the GPU
+* will use cached mappings nonetheless. On x86 platforms, this does not
+* seem to matter, as uncached CPU mappings will snoop the caches in any
+* case. However, on ARM and arm64, enabling this optimization on a
+* platform where NoSnoop is ignored results in loss of coherency, which
+* breaks correct operation of the device. Since we have no way of
+* detecting whether NoSnoop works or not, just disable this
+* optimization entirely for ARM and arm64.
+*/
+   return false;
 #else
return true;
 #endif
-- 
2.19.1



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4.9 91/96] drm: disable uncached DMA optimization for ARM and arm64

2019-03-12 Thread Greg Kroah-Hartman
4.9-stable review patch.  If anyone has any objections, please let me know.

--

[ Upstream commit e02f5c1bb2283cfcee68f2f0feddcc06150f13aa ]

The DRM driver stack is designed to work with cache coherent devices
only, but permits an optimization to be enabled in some cases, where
for some buffers, both the CPU and the GPU use uncached mappings,
removing the need for DMA snooping and allocation in the CPU caches.

The use of uncached GPU mappings relies on the correct implementation
of the PCIe NoSnoop TLP attribute by the platform, otherwise the GPU
will use cached mappings nonetheless. On x86 platforms, this does not
seem to matter, as uncached CPU mappings will snoop the caches in any
case. However, on ARM and arm64, enabling this optimization on a
platform where NoSnoop is ignored results in loss of coherency, which
breaks correct operation of the device. Since we have no way of
detecting whether NoSnoop works or not, just disable this
optimization entirely for ARM and arm64.

Cc: Christian Koenig 
Cc: Alex Deucher 
Cc: David Zhou 
Cc: Huang Rui 
Cc: Junwei Zhang 
Cc: Michel Daenzer 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Maarten Lankhorst 
Cc: Maxime Ripard 
Cc: Sean Paul 
Cc: Michael Ellerman 
Cc: Benjamin Herrenschmidt 
Cc: Will Deacon 
Cc: Christoph Hellwig 
Cc: Robin Murphy 
Cc: amd-gfx list 
Cc: dri-devel 
Reported-by: Carsten Haitzler 
Signed-off-by: Ard Biesheuvel 
Reviewed-by: Christian König 
Reviewed-by: Alex Deucher 
Link: https://patchwork.kernel.org/patch/10778815/
Signed-off-by: Christian König 
Signed-off-by: Sasha Levin 
---
 include/drm/drm_cache.h | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h
index cebecff536a3..c5fb6f871930 100644
--- a/include/drm/drm_cache.h
+++ b/include/drm/drm_cache.h
@@ -41,6 +41,24 @@ static inline bool drm_arch_can_wc_memory(void)
return false;
 #elif defined(CONFIG_MIPS) && defined(CONFIG_CPU_LOONGSON3)
return false;
+#elif defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+   /*
+* The DRM driver stack is designed to work with cache coherent devices
+* only, but permits an optimization to be enabled in some cases, where
+* for some buffers, both the CPU and the GPU use uncached mappings,
+* removing the need for DMA snooping and allocation in the CPU caches.
+*
+* The use of uncached GPU mappings relies on the correct implementation
+* of the PCIe NoSnoop TLP attribute by the platform, otherwise the GPU
+* will use cached mappings nonetheless. On x86 platforms, this does not
+* seem to matter, as uncached CPU mappings will snoop the caches in any
+* case. However, on ARM and arm64, enabling this optimization on a
+* platform where NoSnoop is ignored results in loss of coherency, which
+* breaks correct operation of the device. Since we have no way of
+* detecting whether NoSnoop works or not, just disable this
+* optimization entirely for ARM and arm64.
+*/
+   return false;
 #else
return true;
 #endif
-- 
2.19.1



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4.20 160/171] drm: disable uncached DMA optimization for ARM and arm64

2019-03-12 Thread Greg Kroah-Hartman
4.20-stable review patch.  If anyone has any objections, please let me know.

--

[ Upstream commit e02f5c1bb2283cfcee68f2f0feddcc06150f13aa ]

The DRM driver stack is designed to work with cache coherent devices
only, but permits an optimization to be enabled in some cases, where
for some buffers, both the CPU and the GPU use uncached mappings,
removing the need for DMA snooping and allocation in the CPU caches.

The use of uncached GPU mappings relies on the correct implementation
of the PCIe NoSnoop TLP attribute by the platform, otherwise the GPU
will use cached mappings nonetheless. On x86 platforms, this does not
seem to matter, as uncached CPU mappings will snoop the caches in any
case. However, on ARM and arm64, enabling this optimization on a
platform where NoSnoop is ignored results in loss of coherency, which
breaks correct operation of the device. Since we have no way of
detecting whether NoSnoop works or not, just disable this
optimization entirely for ARM and arm64.

Cc: Christian Koenig 
Cc: Alex Deucher 
Cc: David Zhou 
Cc: Huang Rui 
Cc: Junwei Zhang 
Cc: Michel Daenzer 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Maarten Lankhorst 
Cc: Maxime Ripard 
Cc: Sean Paul 
Cc: Michael Ellerman 
Cc: Benjamin Herrenschmidt 
Cc: Will Deacon 
Cc: Christoph Hellwig 
Cc: Robin Murphy 
Cc: amd-gfx list 
Cc: dri-devel 
Reported-by: Carsten Haitzler 
Signed-off-by: Ard Biesheuvel 
Reviewed-by: Christian König 
Reviewed-by: Alex Deucher 
Link: https://patchwork.kernel.org/patch/10778815/
Signed-off-by: Christian König 
Signed-off-by: Sasha Levin 
---
 include/drm/drm_cache.h | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h
index bfe1639df02d..97fc498dc767 100644
--- a/include/drm/drm_cache.h
+++ b/include/drm/drm_cache.h
@@ -47,6 +47,24 @@ static inline bool drm_arch_can_wc_memory(void)
return false;
 #elif defined(CONFIG_MIPS) && defined(CONFIG_CPU_LOONGSON3)
return false;
+#elif defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+   /*
+* The DRM driver stack is designed to work with cache coherent devices
+* only, but permits an optimization to be enabled in some cases, where
+* for some buffers, both the CPU and the GPU use uncached mappings,
+* removing the need for DMA snooping and allocation in the CPU caches.
+*
+* The use of uncached GPU mappings relies on the correct implementation
+* of the PCIe NoSnoop TLP attribute by the platform, otherwise the GPU
+* will use cached mappings nonetheless. On x86 platforms, this does not
+* seem to matter, as uncached CPU mappings will snoop the caches in any
+* case. However, on ARM and arm64, enabling this optimization on a
+* platform where NoSnoop is ignored results in loss of coherency, which
+* breaks correct operation of the device. Since we have no way of
+* detecting whether NoSnoop works or not, just disable this
+* optimization entirely for ARM and arm64.
+*/
+   return false;
 #else
return true;
 #endif
-- 
2.19.1



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/v3d: Fix calling drm_sched_resubmit_jobs for same sched.

2019-03-12 Thread Andrey Grodzovsky
Also stop calling drm_sched_increase_karma multiple times.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/v3d/v3d_sched.c | 13 +
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 4704b2d..ce7c737b 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -231,20 +231,17 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct 
drm_sched_job *sched_job)
mutex_lock(>reset_lock);
 
/* block scheduler */
-   for (q = 0; q < V3D_MAX_QUEUES; q++) {
-   struct drm_gpu_scheduler *sched = >queue[q].sched;
-
-   drm_sched_stop(sched);
+   for (q = 0; q < V3D_MAX_QUEUES; q++)
+   drm_sched_stop(>queue[q].sched);
 
-   if(sched_job)
-   drm_sched_increase_karma(sched_job);
-   }
+   if(sched_job)
+   drm_sched_increase_karma(sched_job);
 
/* get the GPU back into the init state */
v3d_reset(v3d);
 
for (q = 0; q < V3D_MAX_QUEUES; q++)
-   drm_sched_resubmit_jobs(sched_job->sched);
+   drm_sched_resubmit_jobs(>queue[q].sched);
 
/* Unblock schedulers and restart their jobs. */
for (q = 0; q < V3D_MAX_QUEUES; q++) {
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: kernel BUG at drivers/gpu/drm//ttm/ttm_bo.c:196!

2019-03-12 Thread Michel Dänzer
On 2019-03-12 3:26 p.m., Koenig, Christian wrote:
> Am 12.03.19 um 14:47 schrieb Michel Dänzer:
>> On 2019-02-05 6:40 p.m., Michel Dänzer wrote:
>>> FWIW, I've hit this twice now today, whereas I don't remember ever
>>> hitting it before (not 100% sure though).
>>>
>>> I reverted the remaining hunk of the "cleanup setting bulk_movable"
>>> change, and it survived a piglit run. Could just be luck, though...
>> I'd been running with that revert for the last month without hitting the
>> problem. Today I tried without the revert, and promptly hit it again.
>> Seems more than just luck.
> 
> Mhm, and the only thing you reverted was the "cleanup setting 
> bulkmovable" change?

Yes, specifically, the only hunk of that which hasn't been reverted yet:

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a45ca5d2cfe9..c25b06121e8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -693,6 +693,8 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
struct amdgpu_vm_bo_base *bo_base, *tmp;
int r = 0;

+   vm->bulk_moveable &= list_empty(>evicted);
+
list_for_each_entry_safe(bo_base, tmp, >evicted, vm_status) {
struct amdgpu_bo *bo = bo_base->bo;


-- 
Earthling Michel Dänzer   |  https://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: Slow memory access when using OpenCL without X11

2019-03-12 Thread Kuehling, Felix
[adding the list back]

I'd suspect a problem related to memory clock. This is an APU where 
system memory is shared with the CPU, so if the SMU changes memory 
clocks that would affect CPU memory access performance. If the problem 
only occurs when OpenCL is running, then the compute power profile could 
have an effect here.

Laurie, can you monitor the clocks during your tests using rocm-smi?

Regards,
   Felix

On 2019-03-11 1:15 p.m., Tom St Denis wrote:
> Hi Lauri,
>
> I don't have ROCm installed locally (not on that team at AMD) but I 
> can rope in some of the KFD folk and see what they say :-).
>
> (in the mean time I should look into installing the ROCm stack on my 
> Ubuntu disk for experimentation...).
>
> Only other thing that comes to mind is some sort of stutter due to 
> power/clock gating (or gfx off/etc).  But that typically affects the 
> display/gpu side not the CPU side.
>
> Felix:  Any known issues with Raven and ROCm interacting over memory 
> bus performance?
>
> Tom
>
> On Mon, Mar 11, 2019 at 12:56 PM Lauri Ehrenpreis  > wrote:
>
> Hi!
>
> The 100x memory slowdown is hard to belive indeed. I attached the
> test program with my first e-mail which depends only on
> rocm-opencl-dev package. Would you mind compiling it and checking
> if it slows down memory for you as well?
>
> steps:
> 1) g++ cl_slow_test.cpp -o cl_slow_test -I
> /opt/rocm/opencl/include/ -L /opt/rocm/opencl/lib/x86_64/  -lOpenCL
> 2) logout from desktop env and disconnect hdmi/diplayport etc
> 3) log in over ssh
> 4) run the program ./cl_slow_test 1
>
> For me it reproduced even without step 2 as well but less
> reliably. moving mouse for example could make the memory speed
> fast again.
>
> --
> Lauri
>
>
>
> On Mon, Mar 11, 2019 at 6:33 PM Tom St Denis  > wrote:
>
> Hi Lauri,
>
> There's really no connection between the two other than they
> run in the same package.  I too run a 2400G (as my
> workstation) and I got the same ~6.6GB/sec transfer rate but
> without a CL app running ...  The only logical reason is your
> CL app is bottlenecking the APUs memory bus but you claim
> "simply opening a context is enough" so something else is
> going on.
>
> Your last reply though says "with it running in the
> background" so it's entirely possible the CPU isn't busy but
> the package memory controller (shared between both the CPU and
> GPU) is busy.  For instance running xonotic in a 1080p window
> on my 4K display reduced the memory test to 5.8GB/sec and
> that's hardly a heavy memory bound GPU app.
>
> The only other possible connection is the GPU is generating so
> much heat that it's throttling the package which is also
> unlikely if you have a proper HSF attached (I use the ones
> that came in the retail boxes).
>
> Cheers,
> Tom
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 2/3] drm/amdgpu: free up the first paging queue

2019-03-12 Thread Alex Deucher
On Tue, Mar 12, 2019 at 8:37 AM Christian König
 wrote:
>
> We need the first paging queue to handle page faults.
>
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 20 
>  1 file changed, 12 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index 3ac5abe937f4..bed18e7bbc36 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -2266,7 +2266,7 @@ static void sdma_v4_0_set_buffer_funcs(struct 
> amdgpu_device *adev)
>  {
> adev->mman.buffer_funcs = _v4_0_buffer_funcs;
> if (adev->sdma.has_page_queue)
> -   adev->mman.buffer_funcs_ring = >sdma.instance[0].page;
> +   adev->mman.buffer_funcs_ring = >sdma.instance[1].page;

Maybe add something like:
if (adev->sdma.num_instances > 1)
adev->mman.buffer_funcs_ring = >sdma.instance[1].page;
else
adev->mman.buffer_funcs_ring = >sdma.instance[0].page;

> else
> adev->mman.buffer_funcs_ring = >sdma.instance[0].ring;
>  }
> @@ -2285,15 +2285,19 @@ static void sdma_v4_0_set_vm_pte_funcs(struct 
> amdgpu_device *adev)
> unsigned i;
>
> adev->vm_manager.vm_pte_funcs = _v4_0_vm_pte_funcs;
> -   for (i = 0; i < adev->sdma.num_instances; i++) {
> -   if (adev->sdma.has_page_queue)
> -   sched = >sdma.instance[i].page.sched;
> -   else
> -   sched = >sdma.instance[i].ring.sched;
> -   adev->vm_manager.vm_pte_rqs[i] =
> +   if (adev->sdma.has_page_queue) {
> +   sched = >sdma.instance[1].page.sched;
> +   adev->vm_manager.vm_pte_rqs[0] =
> >sched_rq[DRM_SCHED_PRIORITY_KERNEL];
> +   adev->vm_manager.vm_pte_num_rqs = 1;
> +   } else {
> +   for (i = 0; i < adev->sdma.num_instances; i++) {
> +   sched = >sdma.instance[i].ring.sched;
> +   adev->vm_manager.vm_pte_rqs[i] =
> +   >sched_rq[DRM_SCHED_PRIORITY_KERNEL];
> +   }
> +   adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
> }
> -   adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
>  }
>
>  const struct amdgpu_ip_block_version sdma_v4_0_ip_block = {
> --
> 2.17.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 2/3] drm/amdgpu: free up the first paging queue

2019-03-12 Thread Deucher, Alexander
I don't think Raven has a paging queue in the first place.

Alex

From: amd-gfx  on behalf of Kuehling, 
Felix 
Sent: Tuesday, March 12, 2019 11:29 AM
To: Christian König; amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH 2/3] drm/amdgpu: free up the first paging queue

I think this would break Raven, which only has one SDMA engine.

Regards,
  Felix

-Original Message-
From: amd-gfx  On Behalf Of Christian 
König
Sent: Tuesday, March 12, 2019 8:38 AM
To: amd-gfx@lists.freedesktop.org
Subject: [PATCH 2/3] drm/amdgpu: free up the first paging queue

We need the first paging queue to handle page faults.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 3ac5abe937f4..bed18e7bbc36 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -2266,7 +2266,7 @@ static void sdma_v4_0_set_buffer_funcs(struct 
amdgpu_device *adev)  {
 adev->mman.buffer_funcs = _v4_0_buffer_funcs;
 if (adev->sdma.has_page_queue)
-   adev->mman.buffer_funcs_ring = >sdma.instance[0].page;
+   adev->mman.buffer_funcs_ring = >sdma.instance[1].page;
 else
 adev->mman.buffer_funcs_ring = >sdma.instance[0].ring;  
} @@ -2285,15 +2285,19 @@ static void sdma_v4_0_set_vm_pte_funcs(struct 
amdgpu_device *adev)
 unsigned i;

 adev->vm_manager.vm_pte_funcs = _v4_0_vm_pte_funcs;
-   for (i = 0; i < adev->sdma.num_instances; i++) {
-   if (adev->sdma.has_page_queue)
-   sched = >sdma.instance[i].page.sched;
-   else
-   sched = >sdma.instance[i].ring.sched;
-   adev->vm_manager.vm_pte_rqs[i] =
+   if (adev->sdma.has_page_queue) {
+   sched = >sdma.instance[1].page.sched;
+   adev->vm_manager.vm_pte_rqs[0] =
 >sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+   adev->vm_manager.vm_pte_num_rqs = 1;
+   } else {
+   for (i = 0; i < adev->sdma.num_instances; i++) {
+   sched = >sdma.instance[i].ring.sched;
+   adev->vm_manager.vm_pte_rqs[i] =
+   >sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+   }
+   adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
 }
-   adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
 }

 const struct amdgpu_ip_block_version sdma_v4_0_ip_block = {
--
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 2/3] drm/amdgpu: free up the first paging queue

2019-03-12 Thread Kuehling, Felix
I think this would break Raven, which only has one SDMA engine.

Regards,
  Felix

-Original Message-
From: amd-gfx  On Behalf Of Christian 
König
Sent: Tuesday, March 12, 2019 8:38 AM
To: amd-gfx@lists.freedesktop.org
Subject: [PATCH 2/3] drm/amdgpu: free up the first paging queue

We need the first paging queue to handle page faults.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 3ac5abe937f4..bed18e7bbc36 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -2266,7 +2266,7 @@ static void sdma_v4_0_set_buffer_funcs(struct 
amdgpu_device *adev)  {
adev->mman.buffer_funcs = _v4_0_buffer_funcs;
if (adev->sdma.has_page_queue)
-   adev->mman.buffer_funcs_ring = >sdma.instance[0].page;
+   adev->mman.buffer_funcs_ring = >sdma.instance[1].page;
else
adev->mman.buffer_funcs_ring = >sdma.instance[0].ring;  } 
@@ -2285,15 +2285,19 @@ static void sdma_v4_0_set_vm_pte_funcs(struct 
amdgpu_device *adev)
unsigned i;
 
adev->vm_manager.vm_pte_funcs = _v4_0_vm_pte_funcs;
-   for (i = 0; i < adev->sdma.num_instances; i++) {
-   if (adev->sdma.has_page_queue)
-   sched = >sdma.instance[i].page.sched;
-   else
-   sched = >sdma.instance[i].ring.sched;
-   adev->vm_manager.vm_pte_rqs[i] =
+   if (adev->sdma.has_page_queue) {
+   sched = >sdma.instance[1].page.sched;
+   adev->vm_manager.vm_pte_rqs[0] =
>sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+   adev->vm_manager.vm_pte_num_rqs = 1;
+   } else {
+   for (i = 0; i < adev->sdma.num_instances; i++) {
+   sched = >sdma.instance[i].ring.sched;
+   adev->vm_manager.vm_pte_rqs[i] =
+   >sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+   }
+   adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
}
-   adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
 }
 
 const struct amdgpu_ip_block_version sdma_v4_0_ip_block = {
--
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand

2019-03-12 Thread Russell, Kent
Peculiar, I hit it immediately when I ran it . Can you try use 
--gtest_filter=KFDCWSRTest.BasicTest  . That one hung every time for me.

 Kent

> -Original Message-
> From: Christian König 
> Sent: Tuesday, March 12, 2019 11:09 AM
> To: Russell, Kent ; Koenig, Christian
> ; Kuehling, Felix ;
> amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> 
> Yeah, same problem here.
> 
> I removed libhsakmt package and installed it manually and now it seems to
> work.
> 
> Doing some testing now, but at least of hand I can't seem to reproduce the
> VM fault on a Vega10.
> 
> Christian.
> 
> Am 12.03.19 um 16:01 schrieb Russell, Kent:
> > Oh right, I remember that issue. I had that happen to me once, where my
> installed libhsakmt didn't match up with the latest source code, so I ended up
> having to remove the libhsakmt package and pointing it to the folders
> instead.
> >
> >   Kent
> >
> >> -Original Message-
> >> From: Koenig, Christian
> >> Sent: Tuesday, March 12, 2019 10:49 AM
> >> To: Russell, Kent ; Kuehling, Felix
> >> ; amd-gfx@lists.freedesktop.org
> >> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> >>
> >> Yeah, the problem is I do have the libhsakmt installed.
> >>
> >> Going to give it a try to specify the directory directly.
> >>
> >> Christian.
> >>
> >> Am 12.03.19 um 15:47 schrieb Russell, Kent:
> >>> The README.txt file inside the tests/kfdtest folder has instructions
> >>> on how
> >> to do it if you don't have the libhsakmt package installed on your system:
> >>> export LIBHSAKMT_PATH=/*your local libhsakmt folder*/ With that, the
> >>> headers and libraries are searched under LIBHSAKMT_PATH/include and
> >>> LIBHSAKMT_PATH/lib respectively.
> >>>
> >>> So if you try export LIBHSAKMT_PATH as the root ROCT folder (the one
> >> containing include, src, tests, etc), then that should cover it.
> >>>Kent
> >>>
> >>>
>  -Original Message-
>  From: Christian König 
>  Sent: Tuesday, March 12, 2019 9:13 AM
>  To: Russell, Kent ; Kuehling, Felix
>  ; Koenig, Christian
>  ; amd-gfx@lists.freedesktop.org
>  Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
> demand
> 
>  Hi guys,
> 
>  so found a few minutes today to compile kfdtest.
> 
>  Problem is that during the compile I get a lots of this:
> > CMakeFiles/kfdtest.dir/src/BaseQueue.cpp.o: In Funktion
> > »BaseQueue::Create(unsigned int, unsigned int, unsigned long*)«:
> > /usr/src/ROCT-Thunk-Interface/tests/kfdtest/src/BaseQueue.cpp:57:
> > Warnung: undefinierter Verweis auf »hsaKmtCreateQueue«
>  Any idea?
> 
>  Christian.
> 
>  Am 11.03.19 um 17:55 schrieb Christian König:
> > Hi guys,
> >
> > well it's most likely some missing handling in the KFD, so I'm
> > rather reluctant to revert the change immediately.
> >
> > Problem is that I don't have time right now to look into it
> > immediately. So Kent can you continue to take a look?
> >
> > Sounds like its crashing immediately, so it should be something
> obvious.
> >
> > Christian.
> >
> > Am 11.03.19 um 10:49 schrieb Russell, Kent:
> >>    From what I've been able to dig through, the VM Fault seems to
> >> occur right after a doorbell mmap, but that's as far as I got. I
> >> can try to revert it in today's merge and see how things go.
> >>
> >>     Kent
> >>
> >>> -Original Message-
> >>> From: Kuehling, Felix
> >>> Sent: Friday, March 08, 2019 11:16 PM
> >>> To: Koenig, Christian ; Russell, Kent
> >>> ; amd-gfx@lists.freedesktop.org
> >>> Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
> >> demand
> >>> My concerns were related to eviction fence handing. It would
> >>> manifest by unnecessary eviction callbacks into KFD that aren't
> >>> cause by real evictions. I addressed that with a previous patch
> >>> series that removed the need to remove eviction fences and add
> >>> them back around page table updates in
> amdgpu_amdkfd_gpuvm.c.
> >>>
> >>> I don't know what's going on here. I can probably take a look on
> >>> Monday. I haven't considered what changed with respect to PD
> >>> updates.
> >>>
> >>> Kent, can we temporarily revert the offending change in
> >>> amd-kfd-staging just to unblock the merge?
> >>>
> >>> Christian, I think KFD is currently broken on amd-staging-drm-next.
> >>> If we're
> >>> serious about supporting KFD upstream, you may also want to
> >>> consider reverting your change there for now. Also consider
> >>> building the Thunk and kfdtest so you can do quick smoke tests
> >>> locally whenever you make amdgpu_vm changes that can affect
> KFD.
> >>> https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface
> >>>
> >>> Regards,
> >>>      Felix
> >>>
> >>> 

Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand

2019-03-12 Thread Christian König

Yeah, same problem here.

I removed libhsakmt package and installed it manually and now it seems 
to work.


Doing some testing now, but at least of hand I can't seem to reproduce 
the VM fault on a Vega10.


Christian.

Am 12.03.19 um 16:01 schrieb Russell, Kent:

Oh right, I remember that issue. I had that happen to me once, where my 
installed libhsakmt didn't match up with the latest source code, so I ended up 
having to remove the libhsakmt package and pointing it to the folders instead.

  Kent


-Original Message-
From: Koenig, Christian
Sent: Tuesday, March 12, 2019 10:49 AM
To: Russell, Kent ; Kuehling, Felix
; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand

Yeah, the problem is I do have the libhsakmt installed.

Going to give it a try to specify the directory directly.

Christian.

Am 12.03.19 um 15:47 schrieb Russell, Kent:

The README.txt file inside the tests/kfdtest folder has instructions on how

to do it if you don't have the libhsakmt package installed on your system:

export LIBHSAKMT_PATH=/*your local libhsakmt folder*/ With that, the
headers and libraries are searched under LIBHSAKMT_PATH/include and
LIBHSAKMT_PATH/lib respectively.

So if you try export LIBHSAKMT_PATH as the root ROCT folder (the one

containing include, src, tests, etc), then that should cover it.

   Kent



-Original Message-
From: Christian König 
Sent: Tuesday, March 12, 2019 9:13 AM
To: Russell, Kent ; Kuehling, Felix
; Koenig, Christian
; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand

Hi guys,

so found a few minutes today to compile kfdtest.

Problem is that during the compile I get a lots of this:

CMakeFiles/kfdtest.dir/src/BaseQueue.cpp.o: In Funktion
»BaseQueue::Create(unsigned int, unsigned int, unsigned long*)«:
/usr/src/ROCT-Thunk-Interface/tests/kfdtest/src/BaseQueue.cpp:57:
Warnung: undefinierter Verweis auf »hsaKmtCreateQueue«

Any idea?

Christian.

Am 11.03.19 um 17:55 schrieb Christian König:

Hi guys,

well it's most likely some missing handling in the KFD, so I'm
rather reluctant to revert the change immediately.

Problem is that I don't have time right now to look into it
immediately. So Kent can you continue to take a look?

Sounds like its crashing immediately, so it should be something obvious.

Christian.

Am 11.03.19 um 10:49 schrieb Russell, Kent:

   From what I've been able to dig through, the VM Fault seems to
occur right after a doorbell mmap, but that's as far as I got. I
can try to revert it in today's merge and see how things go.

    Kent


-Original Message-
From: Kuehling, Felix
Sent: Friday, March 08, 2019 11:16 PM
To: Koenig, Christian ; Russell, Kent
; amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on

demand

My concerns were related to eviction fence handing. It would
manifest by unnecessary eviction callbacks into KFD that aren't
cause by real evictions. I addressed that with a previous patch
series that removed the need to remove eviction fences and add
them back around page table updates in amdgpu_amdkfd_gpuvm.c.

I don't know what's going on here. I can probably take a look on
Monday. I haven't considered what changed with respect to PD
updates.

Kent, can we temporarily revert the offending change in
amd-kfd-staging just to unblock the merge?

Christian, I think KFD is currently broken on amd-staging-drm-next.
If we're
serious about supporting KFD upstream, you may also want to
consider reverting your change there for now. Also consider
building the Thunk and kfdtest so you can do quick smoke tests
locally whenever you make amdgpu_vm changes that can affect KFD.
https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface

Regards,
     Felix

-Original Message-
From: amd-gfx  On Behalf

Of

Christian König
Sent: Friday, March 08, 2019 9:14 AM
To: Russell, Kent ;
amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on

demand

My best guess is that we forget somewhere to update the PDs. What
hardware is that on?

Felix already mentioned that this could be problematic for the KFD.

Maybe he has an idea,
Christian.

Am 08.03.19 um 15:04 schrieb Russell, Kent:

Hi Christian,

This patch ended up causing a VM Fault in KFDTest. Reverting just
this

patch addressed the issue:

[   82.703503] amdgpu :0c:00.0: GPU fault detected: 146
0x480c for

process  pid 0 thread  pid 0

[   82.703512] amdgpu :0c:00.0:

VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x1000

[   82.703516] amdgpu :0c:00.0:

VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C

[   82.703522] amdgpu :0c:00.0: VM fault (0x0c, vmid 8, pasid
32769) at

page 4096, read from 'TC0' (0x54433000) (72)

[   82.703585] Evicting PASID 32769 queues

I am looking into it, but if you have any insight that would be
great in

helping to resolve it quickly.

     Kent

-Original Message-
From: amd-gfx  On


RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand

2019-03-12 Thread Russell, Kent
Oh right, I remember that issue. I had that happen to me once, where my 
installed libhsakmt didn't match up with the latest source code, so I ended up 
having to remove the libhsakmt package and pointing it to the folders instead. 

 Kent

> -Original Message-
> From: Koenig, Christian
> Sent: Tuesday, March 12, 2019 10:49 AM
> To: Russell, Kent ; Kuehling, Felix
> ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> 
> Yeah, the problem is I do have the libhsakmt installed.
> 
> Going to give it a try to specify the directory directly.
> 
> Christian.
> 
> Am 12.03.19 um 15:47 schrieb Russell, Kent:
> > The README.txt file inside the tests/kfdtest folder has instructions on how
> to do it if you don't have the libhsakmt package installed on your system:
> >
> > export LIBHSAKMT_PATH=/*your local libhsakmt folder*/ With that, the
> > headers and libraries are searched under LIBHSAKMT_PATH/include and
> > LIBHSAKMT_PATH/lib respectively.
> >
> > So if you try export LIBHSAKMT_PATH as the root ROCT folder (the one
> containing include, src, tests, etc), then that should cover it.
> >
> >   Kent
> >
> >
> >> -Original Message-
> >> From: Christian König 
> >> Sent: Tuesday, March 12, 2019 9:13 AM
> >> To: Russell, Kent ; Kuehling, Felix
> >> ; Koenig, Christian
> >> ; amd-gfx@lists.freedesktop.org
> >> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> >>
> >> Hi guys,
> >>
> >> so found a few minutes today to compile kfdtest.
> >>
> >> Problem is that during the compile I get a lots of this:
> >>> CMakeFiles/kfdtest.dir/src/BaseQueue.cpp.o: In Funktion
> >>> »BaseQueue::Create(unsigned int, unsigned int, unsigned long*)«:
> >>> /usr/src/ROCT-Thunk-Interface/tests/kfdtest/src/BaseQueue.cpp:57:
> >>> Warnung: undefinierter Verweis auf »hsaKmtCreateQueue«
> >> Any idea?
> >>
> >> Christian.
> >>
> >> Am 11.03.19 um 17:55 schrieb Christian König:
> >>> Hi guys,
> >>>
> >>> well it's most likely some missing handling in the KFD, so I'm
> >>> rather reluctant to revert the change immediately.
> >>>
> >>> Problem is that I don't have time right now to look into it
> >>> immediately. So Kent can you continue to take a look?
> >>>
> >>> Sounds like its crashing immediately, so it should be something obvious.
> >>>
> >>> Christian.
> >>>
> >>> Am 11.03.19 um 10:49 schrieb Russell, Kent:
>    From what I've been able to dig through, the VM Fault seems to
>  occur right after a doorbell mmap, but that's as far as I got. I
>  can try to revert it in today's merge and see how things go.
> 
>     Kent
> 
> > -Original Message-
> > From: Kuehling, Felix
> > Sent: Friday, March 08, 2019 11:16 PM
> > To: Koenig, Christian ; Russell, Kent
> > ; amd-gfx@lists.freedesktop.org
> > Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
> demand
> >
> > My concerns were related to eviction fence handing. It would
> > manifest by unnecessary eviction callbacks into KFD that aren't
> > cause by real evictions. I addressed that with a previous patch
> > series that removed the need to remove eviction fences and add
> > them back around page table updates in amdgpu_amdkfd_gpuvm.c.
> >
> > I don't know what's going on here. I can probably take a look on
> > Monday. I haven't considered what changed with respect to PD
> > updates.
> >
> > Kent, can we temporarily revert the offending change in
> > amd-kfd-staging just to unblock the merge?
> >
> > Christian, I think KFD is currently broken on amd-staging-drm-next.
> > If we're
> > serious about supporting KFD upstream, you may also want to
> > consider reverting your change there for now. Also consider
> > building the Thunk and kfdtest so you can do quick smoke tests
> > locally whenever you make amdgpu_vm changes that can affect KFD.
> > https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface
> >
> > Regards,
> >     Felix
> >
> > -Original Message-
> > From: amd-gfx  On Behalf
> Of
> > Christian König
> > Sent: Friday, March 08, 2019 9:14 AM
> > To: Russell, Kent ;
> > amd-gfx@lists.freedesktop.org
> > Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
> demand
> >
> > My best guess is that we forget somewhere to update the PDs. What
> > hardware is that on?
> >
> > Felix already mentioned that this could be problematic for the KFD.
> >
> > Maybe he has an idea,
> > Christian.
> >
> > Am 08.03.19 um 15:04 schrieb Russell, Kent:
> >> Hi Christian,
> >>
> >> This patch ended up causing a VM Fault in KFDTest. Reverting just
> >> this
> > patch addressed the issue:
> >> [   82.703503] amdgpu :0c:00.0: GPU fault detected: 146
> >> 0x480c for
> > process  pid 0 thread  pid 0
> >> [   82.703512] amdgpu :0c:00.0:
> 

Re: [PATCH v6 1/2] drm/sched: Refactor ring mirror list handling.

2019-03-12 Thread Grodzovsky, Andrey

On 3/12/19 3:43 AM, Tomeu Vizoso wrote:
> On Thu, 27 Dec 2018 at 20:28, Andrey Grodzovsky
>  wrote:
>> Decauple sched threads stop and start and ring mirror
>> list handling from the policy of what to do about the
>> guilty jobs.
>> When stoppping the sched thread and detaching sched fences
>> from non signaled HW fenes wait for all signaled HW fences
>> to complete before rerunning the jobs.
>>
>> v2: Fix resubmission of guilty job into HW after refactoring.
>>
>> v4:
>> Full restart for all the jobs, not only from guilty ring.
>> Extract karma increase into standalone function.
>>
>> v5:
>> Rework waiting for signaled jobs without relying on the job
>> struct itself as those might already be freed for non 'guilty'
>> job's schedulers.
>> Expose karma increase to drivers.
>>
>> v6:
>> Use list_for_each_entry_safe_continue and drm_sched_process_job
>> in case fence already signaled.
>> Call drm_sched_increase_karma only once for amdgpu and add documentation.
>>
>> Suggested-by: Christian Koenig 
>> Signed-off-by: Andrey Grodzovsky 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  20 ++-
>>   drivers/gpu/drm/etnaviv/etnaviv_sched.c|  11 +-
>>   drivers/gpu/drm/scheduler/sched_main.c | 195 
>> +++--
>>   drivers/gpu/drm/v3d/v3d_sched.c|  12 +-
>>   include/drm/gpu_scheduler.h|   8 +-
>>   5 files changed, 157 insertions(+), 89 deletions(-)
>>
> [snip]
>> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c 
>> b/drivers/gpu/drm/v3d/v3d_sched.c
>> index 445b2ef..f76d9ed 100644
>> --- a/drivers/gpu/drm/v3d/v3d_sched.c
>> +++ b/drivers/gpu/drm/v3d/v3d_sched.c
>> @@ -178,18 +178,22 @@ v3d_job_timedout(struct drm_sched_job *sched_job)
>>  for (q = 0; q < V3D_MAX_QUEUES; q++) {
>>  struct drm_gpu_scheduler *sched = >queue[q].sched;
>>
>> -   kthread_park(sched->thread);
>> -   drm_sched_hw_job_reset(sched, (sched_job->sched == sched ?
>> +   drm_sched_stop(sched, (sched_job->sched == sched ?
>> sched_job : NULL));
>> +
>> +   if(sched_job)
>> +   drm_sched_increase_karma(sched_job);
>>  }
>>
>>  /* get the GPU back into the init state */
>>  v3d_reset(v3d);
>>
>> +   for (q = 0; q < V3D_MAX_QUEUES; q++)
>> +   drm_sched_resubmit_jobs(sched_job->sched);
> Hi Andrey,
>
> I'm not sure of what was the original intent, but I guess it wasn't to
> repeatedly call resubmit_jobs on that specific job's queue?
>
> Regards,
>
> Tomeu

My bad,  there is also another mistake here with increasing karma for 
the guilty job's entity multiple times. I will fix that. Thanks for 
pointing out.

Andrey


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand

2019-03-12 Thread Koenig, Christian
Yeah, the problem is I do have the libhsakmt installed.

Going to give it a try to specify the directory directly.

Christian.

Am 12.03.19 um 15:47 schrieb Russell, Kent:
> The README.txt file inside the tests/kfdtest folder has instructions on how 
> to do it if you don't have the libhsakmt package installed on your system:
>
> export LIBHSAKMT_PATH=/*your local libhsakmt folder*/
> With that, the headers and libraries are searched under
> LIBHSAKMT_PATH/include and LIBHSAKMT_PATH/lib respectively.
>
> So if you try export LIBHSAKMT_PATH as the root ROCT folder (the one 
> containing include, src, tests, etc), then that should cover it.
>
>   Kent
>
>
>> -Original Message-
>> From: Christian König 
>> Sent: Tuesday, March 12, 2019 9:13 AM
>> To: Russell, Kent ; Kuehling, Felix
>> ; Koenig, Christian
>> ; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>
>> Hi guys,
>>
>> so found a few minutes today to compile kfdtest.
>>
>> Problem is that during the compile I get a lots of this:
>>> CMakeFiles/kfdtest.dir/src/BaseQueue.cpp.o: In Funktion
>>> »BaseQueue::Create(unsigned int, unsigned int, unsigned long*)«:
>>> /usr/src/ROCT-Thunk-Interface/tests/kfdtest/src/BaseQueue.cpp:57:
>>> Warnung: undefinierter Verweis auf »hsaKmtCreateQueue«
>> Any idea?
>>
>> Christian.
>>
>> Am 11.03.19 um 17:55 schrieb Christian König:
>>> Hi guys,
>>>
>>> well it's most likely some missing handling in the KFD, so I'm rather
>>> reluctant to revert the change immediately.
>>>
>>> Problem is that I don't have time right now to look into it
>>> immediately. So Kent can you continue to take a look?
>>>
>>> Sounds like its crashing immediately, so it should be something obvious.
>>>
>>> Christian.
>>>
>>> Am 11.03.19 um 10:49 schrieb Russell, Kent:
   From what I've been able to dig through, the VM Fault seems to occur
 right after a doorbell mmap, but that's as far as I got. I can try to
 revert it in today's merge and see how things go.

    Kent

> -Original Message-
> From: Kuehling, Felix
> Sent: Friday, March 08, 2019 11:16 PM
> To: Koenig, Christian ; Russell, Kent
> ; amd-gfx@lists.freedesktop.org
> Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>
> My concerns were related to eviction fence handing. It would
> manifest by unnecessary eviction callbacks into KFD that aren't
> cause by real evictions. I addressed that with a previous patch
> series that removed the need to remove eviction fences and add them
> back around page table updates in amdgpu_amdkfd_gpuvm.c.
>
> I don't know what's going on here. I can probably take a look on
> Monday. I haven't considered what changed with respect to PD
> updates.
>
> Kent, can we temporarily revert the offending change in
> amd-kfd-staging just to unblock the merge?
>
> Christian, I think KFD is currently broken on amd-staging-drm-next.
> If we're
> serious about supporting KFD upstream, you may also want to consider
> reverting your change there for now. Also consider building the
> Thunk and kfdtest so you can do quick smoke tests locally whenever
> you make amdgpu_vm changes that can affect KFD.
> https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface
>
> Regards,
>     Felix
>
> -Original Message-
> From: amd-gfx  On Behalf Of
> Christian König
> Sent: Friday, March 08, 2019 9:14 AM
> To: Russell, Kent ;
> amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>
> My best guess is that we forget somewhere to update the PDs. What
> hardware is that on?
>
> Felix already mentioned that this could be problematic for the KFD.
>
> Maybe he has an idea,
> Christian.
>
> Am 08.03.19 um 15:04 schrieb Russell, Kent:
>> Hi Christian,
>>
>> This patch ended up causing a VM Fault in KFDTest. Reverting just
>> this
> patch addressed the issue:
>> [   82.703503] amdgpu :0c:00.0: GPU fault detected: 146
>> 0x480c for
> process  pid 0 thread  pid 0
>> [   82.703512] amdgpu :0c:00.0:
> VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x1000
>> [   82.703516] amdgpu :0c:00.0:
> VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
>> [   82.703522] amdgpu :0c:00.0: VM fault (0x0c, vmid 8, pasid
>> 32769) at
> page 4096, read from 'TC0' (0x54433000) (72)
>> [   82.703585] Evicting PASID 32769 queues
>>
>> I am looking into it, but if you have any insight that would be
>> great in
> helping to resolve it quickly.
>>     Kent
>>> -Original Message-
>>> From: amd-gfx  On Behalf
>> Of
>>> Christian König
>>> Sent: Tuesday, February 26, 2019 7:47 AM
>>> To: amd-gfx@lists.freedesktop.org
>>> Subject: [PATCH 3/6] drm/amdgpu: 

RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand

2019-03-12 Thread Russell, Kent
The README.txt file inside the tests/kfdtest folder has instructions on how to 
do it if you don't have the libhsakmt package installed on your system:

export LIBHSAKMT_PATH=/*your local libhsakmt folder*/
With that, the headers and libraries are searched under
LIBHSAKMT_PATH/include and LIBHSAKMT_PATH/lib respectively.

So if you try export LIBHSAKMT_PATH as the root ROCT folder (the one containing 
include, src, tests, etc), then that should cover it.

 Kent


> -Original Message-
> From: Christian König 
> Sent: Tuesday, March 12, 2019 9:13 AM
> To: Russell, Kent ; Kuehling, Felix
> ; Koenig, Christian
> ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> 
> Hi guys,
> 
> so found a few minutes today to compile kfdtest.
> 
> Problem is that during the compile I get a lots of this:
> > CMakeFiles/kfdtest.dir/src/BaseQueue.cpp.o: In Funktion
> > »BaseQueue::Create(unsigned int, unsigned int, unsigned long*)«:
> > /usr/src/ROCT-Thunk-Interface/tests/kfdtest/src/BaseQueue.cpp:57:
> > Warnung: undefinierter Verweis auf »hsaKmtCreateQueue«
> 
> Any idea?
> 
> Christian.
> 
> Am 11.03.19 um 17:55 schrieb Christian König:
> > Hi guys,
> >
> > well it's most likely some missing handling in the KFD, so I'm rather
> > reluctant to revert the change immediately.
> >
> > Problem is that I don't have time right now to look into it
> > immediately. So Kent can you continue to take a look?
> >
> > Sounds like its crashing immediately, so it should be something obvious.
> >
> > Christian.
> >
> > Am 11.03.19 um 10:49 schrieb Russell, Kent:
> >>  From what I've been able to dig through, the VM Fault seems to occur
> >> right after a doorbell mmap, but that's as far as I got. I can try to
> >> revert it in today's merge and see how things go.
> >>
> >>   Kent
> >>
> >>> -Original Message-
> >>> From: Kuehling, Felix
> >>> Sent: Friday, March 08, 2019 11:16 PM
> >>> To: Koenig, Christian ; Russell, Kent
> >>> ; amd-gfx@lists.freedesktop.org
> >>> Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> >>>
> >>> My concerns were related to eviction fence handing. It would
> >>> manifest by unnecessary eviction callbacks into KFD that aren't
> >>> cause by real evictions. I addressed that with a previous patch
> >>> series that removed the need to remove eviction fences and add them
> >>> back around page table updates in amdgpu_amdkfd_gpuvm.c.
> >>>
> >>> I don't know what's going on here. I can probably take a look on
> >>> Monday. I haven't considered what changed with respect to PD
> >>> updates.
> >>>
> >>> Kent, can we temporarily revert the offending change in
> >>> amd-kfd-staging just to unblock the merge?
> >>>
> >>> Christian, I think KFD is currently broken on amd-staging-drm-next.
> >>> If we're
> >>> serious about supporting KFD upstream, you may also want to consider
> >>> reverting your change there for now. Also consider building the
> >>> Thunk and kfdtest so you can do quick smoke tests locally whenever
> >>> you make amdgpu_vm changes that can affect KFD.
> >>> https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface
> >>>
> >>> Regards,
> >>>    Felix
> >>>
> >>> -Original Message-
> >>> From: amd-gfx  On Behalf Of
> >>> Christian König
> >>> Sent: Friday, March 08, 2019 9:14 AM
> >>> To: Russell, Kent ;
> >>> amd-gfx@lists.freedesktop.org
> >>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> >>>
> >>> My best guess is that we forget somewhere to update the PDs. What
> >>> hardware is that on?
> >>>
> >>> Felix already mentioned that this could be problematic for the KFD.
> >>>
> >>> Maybe he has an idea,
> >>> Christian.
> >>>
> >>> Am 08.03.19 um 15:04 schrieb Russell, Kent:
>  Hi Christian,
> 
>  This patch ended up causing a VM Fault in KFDTest. Reverting just
>  this
> >>> patch addressed the issue:
>  [   82.703503] amdgpu :0c:00.0: GPU fault detected: 146
>  0x480c for
> >>> process  pid 0 thread  pid 0
>  [   82.703512] amdgpu :0c:00.0:
> >>> VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x1000
>  [   82.703516] amdgpu :0c:00.0:
> >>> VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
>  [   82.703522] amdgpu :0c:00.0: VM fault (0x0c, vmid 8, pasid
>  32769) at
> >>> page 4096, read from 'TC0' (0x54433000) (72)
>  [   82.703585] Evicting PASID 32769 queues
> 
>  I am looking into it, but if you have any insight that would be
>  great in
> >>> helping to resolve it quickly.
>     Kent
> > -Original Message-
> > From: amd-gfx  On Behalf
> Of
> > Christian König
> > Sent: Tuesday, February 26, 2019 7:47 AM
> > To: amd-gfx@lists.freedesktop.org
> > Subject: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> >
> > Let's start to allocate VM PDs/PTs on demand instead of
> > pre-allocating them during mapping.
> >
> > Signed-off-by: Christian König 
> > 

Re: will xf86-video-amdgpu is capable to drive AMD readeon Pro wx2100

2019-03-12 Thread Alex Deucher
On Tue, Mar 12, 2019 at 10:19 AM Harinath Reddy  wrote:
>
> If I am not confused/wrong with your information,if I want get use of the 
>  xf86-video-amdgpu (UMD )   which is capable of driving GCN core cards in 
> hp-ux, I should
> write hp-ux compatible  drm based kenel module.

Correct.

Alex

>
>
> Thanks,
> Harinath
>
>
>
> On Tue, Mar 12, 2019 at 7:37 PM Alex Deucher  wrote:
>>
>> All drivers for recent hardware require a proper kernel driver.  There
>> have not been user mode X drivers for new AMD hardware released in the
>> last 7-8 years.
>>
>> Alex
>>
>> On Tue, Mar 12, 2019 at 10:02 AM Harinath Reddy  
>> wrote:
>> >
>> > Hope it is different from xorg-video-radeon/hd  driver which is not 
>> > require any kernel component to drive the FireMV2250 card, except card 
>> > initialization( ATOMBIOS run from the HP-UX kernel on behalf of the xf86 
>> > request ).
>> >
>> > If it is tightly coupled with amdgpu  kernel module, and it is again 
>> > depend on the linux kernel drm module. HP-UX is not using the drm 
>> > facilities as i am aware , so is that possible to port to hp-ux.
>> >
>> >
>> > Thanks and Regards,
>> > Harinath
>> >
>> > On Tue, Mar 12, 2019 at 2:37 PM Christian König 
>> >  wrote:
>> >>
>> >> Am 12.03.19 um 09:35 schrieb Michel Dänzer:
>> >> > On 2019-03-12 4:14 a.m., Harinath Reddy wrote:
>> >> >> Hi,
>> >> >>
>> >> >> Present we are using the radeon HD driver from xorg to driver 
>> >> >> AMDFirMV2250
>> >> >> card. We would like to replace AMD FireMV2250 with AMD Radeon Pro 
>> >> >> Wx2100
>> >> >> card.
>> >> >>
>> >> >> I saw xf86-video-amdgpu xorg driver is available, but we are not sure
>> >> >> whether that would be sufficient or not, if not what else require to  
>> >> >> use
>> >> >> that card.  If you can help that  would be great
>> >> >> I am naive to the graphics world .
>> >> >>
>> >> >>
>> >> >> We are using HP_UX operating system, Xserver (Xfree86) is  as a  
>> >> >> graphics
>> >> >> server.
>> >> > xf86-video-amdgpu requires the amdgpu kernel driver. Assuming the latter
>> >> > is working with your card, the former will work (as well as the generic
>> >> > modesetting Xorg driver).
>> >>
>> >> To answer the original question a bit more direct: The amdgpu kernel
>> >> driver is NOT available for HP_UX.
>> >>
>> >> Regards,
>> >> Christian.
>> >
>> > ___
>> > amd-gfx mailing list
>> > amd-gfx@lists.freedesktop.org
>> > https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: kernel BUG at drivers/gpu/drm//ttm/ttm_bo.c:196!

2019-03-12 Thread Koenig, Christian
Am 12.03.19 um 14:47 schrieb Michel Dänzer:
> On 2019-02-05 6:40 p.m., Michel Dänzer wrote:
>> FWIW, I've hit this twice now today, whereas I don't remember ever
>> hitting it before (not 100% sure though).
>>
>> I reverted the remaining hunk of the "cleanup setting bulk_movable"
>> change, and it survived a piglit run. Could just be luck, though...
> I'd been running with that revert for the last month without hitting the
> problem. Today I tried without the revert, and promptly hit it again.
> Seems more than just luck.

Mhm, and the only thing you reverted was the "cleanup setting 
bulkmovable" change?

Christian.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: will xf86-video-amdgpu is capable to drive AMD readeon Pro wx2100

2019-03-12 Thread Harinath Reddy
If I am not confused/wrong with your information,if I want get use of
the  xf86-video-amdgpu (UMD )   which is capable of driving GCN core cards
in hp-ux, I should
write hp-ux compatible  drm based kenel module.


Thanks,
Harinath



On Tue, Mar 12, 2019 at 7:37 PM Alex Deucher  wrote:

> All drivers for recent hardware require a proper kernel driver.  There
> have not been user mode X drivers for new AMD hardware released in the
> last 7-8 years.
>
> Alex
>
> On Tue, Mar 12, 2019 at 10:02 AM Harinath Reddy 
> wrote:
> >
> > Hope it is different from xorg-video-radeon/hd  driver which is not
> require any kernel component to drive the FireMV2250 card, except card
> initialization( ATOMBIOS run from the HP-UX kernel on behalf of the xf86
> request ).
> >
> > If it is tightly coupled with amdgpu  kernel module, and it is again
> depend on the linux kernel drm module. HP-UX is not using the drm
> facilities as i am aware , so is that possible to port to hp-ux.
> >
> >
> > Thanks and Regards,
> > Harinath
> >
> > On Tue, Mar 12, 2019 at 2:37 PM Christian König <
> ckoenig.leichtzumer...@gmail.com> wrote:
> >>
> >> Am 12.03.19 um 09:35 schrieb Michel Dänzer:
> >> > On 2019-03-12 4:14 a.m., Harinath Reddy wrote:
> >> >> Hi,
> >> >>
> >> >> Present we are using the radeon HD driver from xorg to driver
> AMDFirMV2250
> >> >> card. We would like to replace AMD FireMV2250 with AMD Radeon Pro
> Wx2100
> >> >> card.
> >> >>
> >> >> I saw xf86-video-amdgpu xorg driver is available, but we are not sure
> >> >> whether that would be sufficient or not, if not what else require
> to  use
> >> >> that card.  If you can help that  would be great
> >> >> I am naive to the graphics world .
> >> >>
> >> >>
> >> >> We are using HP_UX operating system, Xserver (Xfree86) is  as a
> graphics
> >> >> server.
> >> > xf86-video-amdgpu requires the amdgpu kernel driver. Assuming the
> latter
> >> > is working with your card, the former will work (as well as the
> generic
> >> > modesetting Xorg driver).
> >>
> >> To answer the original question a bit more direct: The amdgpu kernel
> >> driver is NOT available for HP_UX.
> >>
> >> Regards,
> >> Christian.
> >
> > ___
> > amd-gfx mailing list
> > amd-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: will xf86-video-amdgpu is capable to drive AMD readeon Pro wx2100

2019-03-12 Thread Alex Deucher
All drivers for recent hardware require a proper kernel driver.  There
have not been user mode X drivers for new AMD hardware released in the
last 7-8 years.

Alex

On Tue, Mar 12, 2019 at 10:02 AM Harinath Reddy  wrote:
>
> Hope it is different from xorg-video-radeon/hd  driver which is not require 
> any kernel component to drive the FireMV2250 card, except card 
> initialization( ATOMBIOS run from the HP-UX kernel on behalf of the xf86 
> request ).
>
> If it is tightly coupled with amdgpu  kernel module, and it is again depend 
> on the linux kernel drm module. HP-UX is not using the drm facilities as i am 
> aware , so is that possible to port to hp-ux.
>
>
> Thanks and Regards,
> Harinath
>
> On Tue, Mar 12, 2019 at 2:37 PM Christian König 
>  wrote:
>>
>> Am 12.03.19 um 09:35 schrieb Michel Dänzer:
>> > On 2019-03-12 4:14 a.m., Harinath Reddy wrote:
>> >> Hi,
>> >>
>> >> Present we are using the radeon HD driver from xorg to driver AMDFirMV2250
>> >> card. We would like to replace AMD FireMV2250 with AMD Radeon Pro Wx2100
>> >> card.
>> >>
>> >> I saw xf86-video-amdgpu xorg driver is available, but we are not sure
>> >> whether that would be sufficient or not, if not what else require to  use
>> >> that card.  If you can help that  would be great
>> >> I am naive to the graphics world .
>> >>
>> >>
>> >> We are using HP_UX operating system, Xserver (Xfree86) is  as a  graphics
>> >> server.
>> > xf86-video-amdgpu requires the amdgpu kernel driver. Assuming the latter
>> > is working with your card, the former will work (as well as the generic
>> > modesetting Xorg driver).
>>
>> To answer the original question a bit more direct: The amdgpu kernel
>> driver is NOT available for HP_UX.
>>
>> Regards,
>> Christian.
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: will xf86-video-amdgpu is capable to drive AMD readeon Pro wx2100

2019-03-12 Thread Harinath Reddy
Hope it is different from xorg-video-radeon/hd  driver which is not require
any kernel component to drive the FireMV2250 card, except card
initialization( ATOMBIOS run from the HP-UX kernel on behalf of the xf86
request ).

If it is tightly coupled with amdgpu  kernel module, and it is again depend
on the linux kernel drm module. HP-UX is not using the drm facilities as i
am aware , so is that possible to port to hp-ux.


Thanks and Regards,
Harinath

On Tue, Mar 12, 2019 at 2:37 PM Christian König <
ckoenig.leichtzumer...@gmail.com> wrote:

> Am 12.03.19 um 09:35 schrieb Michel Dänzer:
> > On 2019-03-12 4:14 a.m., Harinath Reddy wrote:
> >> Hi,
> >>
> >> Present we are using the radeon HD driver from xorg to driver
> AMDFirMV2250
> >> card. We would like to replace AMD FireMV2250 with AMD Radeon Pro Wx2100
> >> card.
> >>
> >> I saw xf86-video-amdgpu xorg driver is available, but we are not sure
> >> whether that would be sufficient or not, if not what else require to
> use
> >> that card.  If you can help that  would be great
> >> I am naive to the graphics world .
> >>
> >>
> >> We are using HP_UX operating system, Xserver (Xfree86) is  as a
> graphics
> >> server.
> > xf86-video-amdgpu requires the amdgpu kernel driver. Assuming the latter
> > is working with your card, the former will work (as well as the generic
> > modesetting Xorg driver).
>
> To answer the original question a bit more direct: The amdgpu kernel
> driver is NOT available for HP_UX.
>
> Regards,
> Christian.
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 2/2] drm/amdgpu: add new ras workflow control flags

2019-03-12 Thread Deucher, Alexander
> -Original Message-
> From: Pan, Xinhui 
> Sent: Tuesday, March 12, 2019 6:14 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Quan, Evan
> ; Zhang, Hawking 
> Subject: [PATCH 2/2] drm/amdgpu: add new ras workflow control flags
> 
> add ras post init function.
> Do some initialization after all IP have finished their late init.
> 
> Add new member flags which will control the ras work flow.
> For now, vbios enable ras for us on boot. That might change in the future.
> So there should be a flag from vbios to tell us if ras is enabled or not on 
> boot.
> Looks like there is no such info now.
> 
> Other bits of the flags are reserved to control other parts of ras.
> 
> Signed-off-by: xinhui pan 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 34
> +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h|  3 ++
>  3 files changed, 39 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 29c44a2eabcf..95cd3b7886ff 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2707,6 +2707,9 @@ int amdgpu_device_init(struct amdgpu_device
> *adev,
>   goto failed;
>   }
> 
> + /* must succeed. */
> + amdgpu_ras_post_init(adev);
> +
>   return 0;
> 
>  failed:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 10ce40d2c040..238b46c304cc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -118,6 +118,11 @@ const char *ras_block_string[] = {  #define
> ras_err_str(i) (ras_error_string[ffs(i)])  #define ras_block_str(i)
> (ras_block_string[i])
> 
> +enum amdgpu_ras_flags {
> + AMDGPU_RAS_FLAG_INIT_BY_VBIOS = 1,
> +};
> +#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
> +

Move this enum to amdgpu_ras.h?  Although if you are going to be using this as 
flags, maybe defines would be better.

>  static void amdgpu_ras_self_test(struct amdgpu_device *adev)  {
>   /* TODO */
> @@ -1387,13 +1392,16 @@ int amdgpu_ras_init(struct amdgpu_device
> *adev)
>   >supported);
>   con->features = 0;
>   INIT_LIST_HEAD(>head);
> + /* Might need get this flag from vbios. */
> + con->flags = RAS_DEFAULT_FLAGS;
> 
>   if (amdgpu_ras_recovery_init(adev))
>   goto recovery_out;
> 
>   amdgpu_ras_mask &= AMDGPU_RAS_BLOCK_MASK;
> 
> - amdgpu_ras_enable_all_features(adev, 1);
> + if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
> + amdgpu_ras_enable_all_features(adev, 1);
> 
>   if (amdgpu_ras_fs_init(adev))
>   goto fs_out;
> @@ -1413,6 +1421,30 @@ int amdgpu_ras_init(struct amdgpu_device
> *adev)
>   return -EINVAL;
>  }
> 
> +/* do some init work after IP late init as dependence */ void
> +amdgpu_ras_post_init(struct amdgpu_device *adev) {
> + struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
> + struct ras_manager *obj, *tmp;
> +
> + if (!con)
> + return;
> +
> + /* We enable ras on all hw_supported block, but as boot parameter
> might
> +  * disable some of them and one or more IP has not implemented
> yet.
> +  * So we disable them on behalf.
> +  */
> + if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
> + list_for_each_entry_safe(obj, tmp, >head, node) {
> + if (!amdgpu_ras_is_supported(adev, obj-
> >head.block)) {
> + amdgpu_ras_feature_enable(adev, 
> >head, 0);
> + /* there should be no any reference. */
> + WARN_ON(alive_obj(obj));
> + }
> + };
> + }
> +}
> +
>  /* do some fini work before IP fini as dependence */  int
> amdgpu_ras_pre_fini(struct amdgpu_device *adev)  { diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> index 2b6077762b91..7a35316baab0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> @@ -103,6 +103,8 @@ struct amdgpu_ras {
>   /* error handler data */
>   struct ras_err_handler_data *eh_data;
>   struct mutex recovery_lock;
> +
> + uint32_t flags;
>  };
> 
>  /* interfaces for IP */
> @@ -197,6 +199,7 @@ static inline int amdgpu_ras_reset_gpu(struct
> amdgpu_device *adev,
> 
>  /* called in ip_init and ip_fini */
>  int amdgpu_ras_init(struct amdgpu_device *adev);
> +void amdgpu_ras_post_init(struct amdgpu_device *adev);
>  int amdgpu_ras_fini(struct amdgpu_device *adev);  int
> amdgpu_ras_pre_fini(struct amdgpu_device *adev);
> 
> --
> 2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: kernel BUG at drivers/gpu/drm//ttm/ttm_bo.c:196!

2019-03-12 Thread Michel Dänzer
On 2019-02-05 6:40 p.m., Michel Dänzer wrote:
> 
> FWIW, I've hit this twice now today, whereas I don't remember ever
> hitting it before (not 100% sure though).
> 
> I reverted the remaining hunk of the "cleanup setting bulk_movable"
> change, and it survived a piglit run. Could just be luck, though...

I'd been running with that revert for the last month without hitting the
problem. Today I tried without the revert, and promptly hit it again.
Seems more than just luck.


-- 
Earthling Michel Dänzer   |  https://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/2] drm/amdgpu: let ras initialization a little noticeable

2019-03-12 Thread Deucher, Alexander
Reviewed-by: Alex Deucher 

From: Pan, Xinhui
Sent: Tuesday, March 12, 2019 6:13 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander; Quan, Evan; Zhang, Hawking
Subject: [PATCH 1/2] drm/amdgpu: let ras initialization a little noticeable

add drm info output if ras initialized successfully.
add ras atomfirmware sanity check.

Signed-off-by: xinhui pan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 1f47974b1184..10ce40d2c040 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1357,8 +1357,9 @@ static void amdgpu_ras_check_supported(struct 
amdgpu_device *adev,
 adev->asic_type != CHIP_VEGA20)
 return;

-   if (amdgpu_atomfirmware_mem_ecc_supported(adev) ||
-   amdgpu_atomfirmware_sram_ecc_supported(adev))
+   if (adev->is_atom_fw &&
+   (amdgpu_atomfirmware_mem_ecc_supported(adev) ||
+amdgpu_atomfirmware_sram_ecc_supported(adev)))
 *hw_supported = AMDGPU_RAS_BLOCK_MASK;

 *supported = amdgpu_ras_enable == 0 ?
@@ -1398,6 +1399,10 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 goto fs_out;

 amdgpu_ras_self_test(adev);
+
+   DRM_INFO("RAS INFO: ras initialized successfully, "
+   "hardware ability[%x] ras_mask[%x]\n",
+   con->hw_supported, con->supported);
 return 0;
 fs_out:
 amdgpu_ras_recovery_fini(adev);
--
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/3] drm/amdgpu: remove non-sense NULL ptr check

2019-03-12 Thread Chunming Zhou
The series is Reviewed-by: Chunming Zhou 

在 2019/3/8 22:31, Christian König 写道:
> It's a bug having a dead pointer in the IDR, silently returning
> is the worst we can do.
>
> Signed-off-by: Christian König 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 10 --
>   1 file changed, 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 736ed1d67ec2..b7289f709644 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -570,12 +570,6 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr 
> *mgr)
>   
>   mutex_lock(>lock);
>   idr_for_each_entry(idp, ctx, id) {
> -
> - if (!ctx->adev) {
> - mutex_unlock(>lock);
> - return;
> - }
> -
>   for (i = 0; i < num_entities; i++) {
>   struct drm_sched_entity *entity;
>   
> @@ -596,10 +590,6 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr 
> *mgr)
>   idp = >ctx_handles;
>   
>   idr_for_each_entry(idp, ctx, id) {
> -
> - if (!ctx->adev)
> - return;
> -
>   if (kref_read(>refcount) != 1) {
>   DRM_ERROR("ctx %p is still alive\n", ctx);
>   continue;
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand

2019-03-12 Thread Christian König

Hi guys,

so found a few minutes today to compile kfdtest.

Problem is that during the compile I get a lots of this:
CMakeFiles/kfdtest.dir/src/BaseQueue.cpp.o: In Funktion 
»BaseQueue::Create(unsigned int, unsigned int, unsigned long*)«:
/usr/src/ROCT-Thunk-Interface/tests/kfdtest/src/BaseQueue.cpp:57: 
Warnung: undefinierter Verweis auf »hsaKmtCreateQueue«


Any idea?

Christian.

Am 11.03.19 um 17:55 schrieb Christian König:

Hi guys,

well it's most likely some missing handling in the KFD, so I'm rather 
reluctant to revert the change immediately.


Problem is that I don't have time right now to look into it 
immediately. So Kent can you continue to take a look?


Sounds like its crashing immediately, so it should be something obvious.

Christian.

Am 11.03.19 um 10:49 schrieb Russell, Kent:
 From what I've been able to dig through, the VM Fault seems to occur 
right after a doorbell mmap, but that's as far as I got. I can try to 
revert it in today's merge and see how things go.


  Kent


-Original Message-
From: Kuehling, Felix
Sent: Friday, March 08, 2019 11:16 PM
To: Koenig, Christian ; Russell, Kent
; amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand

My concerns were related to eviction fence handing. It would 
manifest by
unnecessary eviction callbacks into KFD that aren't cause by real 
evictions. I

addressed that with a previous patch series that removed the need to
remove eviction fences and add them back around page table updates in
amdgpu_amdkfd_gpuvm.c.

I don't know what's going on here. I can probably take a look on 
Monday. I

haven't considered what changed with respect to PD updates.

Kent, can we temporarily revert the offending change in amd-kfd-staging
just to unblock the merge?

Christian, I think KFD is currently broken on amd-staging-drm-next. 
If we're

serious about supporting KFD upstream, you may also want to consider
reverting your change there for now. Also consider building the 
Thunk and

kfdtest so you can do quick smoke tests locally whenever you make
amdgpu_vm changes that can affect KFD.
https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface

Regards,
   Felix

-Original Message-
From: amd-gfx  On Behalf Of
Christian König
Sent: Friday, March 08, 2019 9:14 AM
To: Russell, Kent ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand

My best guess is that we forget somewhere to update the PDs. What
hardware is that on?

Felix already mentioned that this could be problematic for the KFD.

Maybe he has an idea,
Christian.

Am 08.03.19 um 15:04 schrieb Russell, Kent:

Hi Christian,

This patch ended up causing a VM Fault in KFDTest. Reverting just this

patch addressed the issue:
[   82.703503] amdgpu :0c:00.0: GPU fault detected: 146 
0x480c for

process  pid 0 thread  pid 0

[   82.703512] amdgpu :0c:00.0:

VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x1000

[   82.703516] amdgpu :0c:00.0:

VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
[   82.703522] amdgpu :0c:00.0: VM fault (0x0c, vmid 8, pasid 
32769) at

page 4096, read from 'TC0' (0x54433000) (72)

[   82.703585] Evicting PASID 32769 queues

I am looking into it, but if you have any insight that would be 
great in

helping to resolve it quickly.

   Kent

-Original Message-
From: amd-gfx  On Behalf Of
Christian König
Sent: Tuesday, February 26, 2019 7:47 AM
To: amd-gfx@lists.freedesktop.org
Subject: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand

Let's start to allocate VM PDs/PTs on demand instead of
pre-allocating them during mapping.

Signed-off-by: Christian König 
Reviewed-by: Felix Kuehling 
---
   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c   |   9 --
   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   |  10 --
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    | 136 
+

-

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |   3 -
   5 files changed, 39 insertions(+), 129 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 31e3953dcb6e..088e9b6b765b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -410,15 +410,7 @@ static int add_bo_to_vm(struct amdgpu_device
*adev, struct kgd_mem *mem,
   if (p_bo_va_entry)
   *p_bo_va_entry = bo_va_entry;

-    /* Allocate new page tables if needed and validate
- * them.
- */
-    ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
-    if (ret) {
-    pr_err("Failed to allocate pts, err=%d\n", ret);
-    goto err_alloc_pts;
-    }
-
+    /* Allocate validate page tables if needed */
   ret = vm_validate_pt_pd_bos(vm);
   if (ret) {
   pr_err("validate_pt_pd_bos() failed\n"); diff --git
a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c

Re: [PATCH v2 5/5] drm: don't block fb changes for async plane updates

2019-03-12 Thread Kazlauskas, Nicholas
On 3/12/19 2:44 AM, Boris Brezillon wrote:
> On Mon, 11 Mar 2019 23:22:03 -0300
> Helen Koike  wrote:
> 
>> In the case of a normal sync update, the preparation of framebuffers (be
>> it calling drm_atomic_helper_prepare_planes() or doing setups with
>> drm_framebuffer_get()) are performed in the new_state and the respective
>> cleanups are performed in the old_state.
>>
>> In the case of async updates, the preparation is also done in the
>> new_state but the cleanups are done in the new_state (because updates
>> are performed in place, i.e. in the current state).
>>
>> The current code blocks async udpates when the fb is changed, turning
>> async updates into sync updates, slowing down cursor updates and
>> introducing regressions in igt tests with errors of type:
>>
>> "CRITICAL: completed 97 cursor updated in a period of 30 flips, we
>> expect to complete approximately 15360 updates, with the threshold set
>> at 7680"
>>
>> Fb changes in async updates were prevented to avoid the following scenario:
>>
>> - Async update, oldfb = NULL, newfb = fb1, prepare fb1, cleanup fb1
>> - Async update, oldfb = fb1, newfb = fb2, prepare fb2, cleanup fb2
>> - Non-async commit, oldfb = fb2, newfb = fb1, prepare fb1, cleanup fb2 
>> (wrong)
>> Where we have a single call to prepare fb2 but double cleanup call to fb2.
>>
>> To solve the above problems, instead of blocking async fb changes, we
>> place the old framebuffer in the new_state object, so when the code
>> performs cleanups in the new_state it will cleanup the old_fb and we
>> will have the following scenario instead:
>>
>> - Async update, oldfb = NULL, newfb = fb1, prepare fb1, no cleanup
>> - Async update, oldfb = fb1, newfb = fb2, prepare fb2, cleanup fb1
>> - Non-async commit, oldfb = fb2, newfb = fb1, prepare fb1, cleanup fb2
>>
>> Where calls to prepare/cleanup are balanced.
>>
>> Cc:  # v4.14+
>> Fixes: 25dc194b34dd ("drm: Block fb changes for async plane updates")
>> Suggested-by: Boris Brezillon 
>> Signed-off-by: Helen Koike 
> 
> Reviewed-by: Boris Brezillon 

Reviewed-by: Nicholas Kazlauskas 

I was thinking that the comment could go in async_commit or async_check, 
but I guess it works there too. Maybe it needs a FIXME or a TODO for a 
full state swap, but these are just nitpicks.

Nicholas Kazlauskas

> 
>>
>> ---
>> Hello,
>>
>> As mentioned in the cover letter, I tested in almost all platforms with
>> igt plane_cursor_legacy and kms_cursor_legacy and I didn't see any
>> regressions. But I couldn't test on MSM and AMD because I don't have
>> the hardware I would appreciate if anyone could help me testing those.
>>
>> Thanks!
>> Helen
>>
>> Changes in v2:
>> - Change the order of the patch in the series, add this as the last one.
>> - Add documentation
>> - s/ballanced/balanced
>>
>>   drivers/gpu/drm/drm_atomic_helper.c  | 20 ++--
>>   include/drm/drm_modeset_helper_vtables.h |  5 +
>>   2 files changed, 15 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_atomic_helper.c 
>> b/drivers/gpu/drm/drm_atomic_helper.c
>> index 540a77a2ade9..e7eb96f1efc2 100644
>> --- a/drivers/gpu/drm/drm_atomic_helper.c
>> +++ b/drivers/gpu/drm/drm_atomic_helper.c
>> @@ -1608,15 +1608,6 @@ int drm_atomic_helper_async_check(struct drm_device 
>> *dev,
>>  old_plane_state->crtc != new_plane_state->crtc)
>>  return -EINVAL;
>>   
>> -/*
>> - * FIXME: Since prepare_fb and cleanup_fb are always called on
>> - * the new_plane_state for async updates we need to block framebuffer
>> - * changes. This prevents use of a fb that's been cleaned up and
>> - * double cleanups from occuring.
>> - */
>> -if (old_plane_state->fb != new_plane_state->fb)
>> -return -EINVAL;
>> -
>>  funcs = plane->helper_private;
>>  if (!funcs->atomic_async_update)
>>  return -EINVAL;
>> @@ -1657,6 +1648,9 @@ void drm_atomic_helper_async_commit(struct drm_device 
>> *dev,
>>  int i;
>>   
>>  for_each_new_plane_in_state(state, plane, plane_state, i) {
>> +struct drm_framebuffer *new_fb = plane_state->fb;
>> +struct drm_framebuffer *old_fb = plane->state->fb;
>> +
>>  funcs = plane->helper_private;
>>  funcs->atomic_async_update(plane, plane_state);
>>   
>> @@ -1665,11 +1659,17 @@ void drm_atomic_helper_async_commit(struct 
>> drm_device *dev,
>>   * plane->state in-place, make sure at least common
>>   * properties have been properly updated.
>>   */
>> -WARN_ON_ONCE(plane->state->fb != plane_state->fb);
>> +WARN_ON_ONCE(plane->state->fb != new_fb);
>>  WARN_ON_ONCE(plane->state->crtc_x != plane_state->crtc_x);
>>  WARN_ON_ONCE(plane->state->crtc_y != plane_state->crtc_y);
>>  WARN_ON_ONCE(plane->state->src_x != plane_state->src_x);
>>  WARN_ON_ONCE(plane->state->src_y != plane_state->src_y);
>> +
>> +/*

[PATCH 3/3] drm/amdgpu: use more entries for the first paging queue

2019-03-12 Thread Christian König
To aid recoverable page faults.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 335a0edf114b..8f5026c123ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -248,6 +248,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct 
amdgpu_ring *ring,
 */
if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
sched_hw_submission = max(sched_hw_submission, 256);
+   else if (ring == >sdma.instance[0].page)
+   sched_hw_submission = 256;
 
if (ring->adev == NULL) {
if (adev->num_rings >= AMDGPU_MAX_RINGS)
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/3] drm/amdgpu: remove non-sense NULL ptr check

2019-03-12 Thread Christian König

Ping? Can anybody take a look?

Christian.

Am 08.03.19 um 15:31 schrieb Christian König:

It's a bug having a dead pointer in the IDR, silently returning
is the worst we can do.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 10 --
  1 file changed, 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 736ed1d67ec2..b7289f709644 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -570,12 +570,6 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr 
*mgr)
  
  	mutex_lock(>lock);

idr_for_each_entry(idp, ctx, id) {
-
-   if (!ctx->adev) {
-   mutex_unlock(>lock);
-   return;
-   }
-
for (i = 0; i < num_entities; i++) {
struct drm_sched_entity *entity;
  
@@ -596,10 +590,6 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)

idp = >ctx_handles;
  
  	idr_for_each_entry(idp, ctx, id) {

-
-   if (!ctx->adev)
-   return;
-
if (kref_read(>refcount) != 1) {
DRM_ERROR("ctx %p is still alive\n", ctx);
continue;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 2/2] drm/amdgpu: add new ras workflow control flags

2019-03-12 Thread Quan, Evan
Reviewed-by: Evan Quan 

> -Original Message-
> From: Pan, Xinhui
> Sent: 2019年3月12日 18:14
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Quan, Evan
> ; Zhang, Hawking 
> Subject: [PATCH 2/2] drm/amdgpu: add new ras workflow control flags
> 
> add ras post init function.
> Do some initialization after all IP have finished their late init.
> 
> Add new member flags which will control the ras work flow.
> For now, vbios enable ras for us on boot. That might change in the future.
> So there should be a flag from vbios to tell us if ras is enabled or not on 
> boot.
> Looks like there is no such info now.
> 
> Other bits of the flags are reserved to control other parts of ras.
> 
> Signed-off-by: xinhui pan 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 34
> +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h|  3 ++
>  3 files changed, 39 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 29c44a2eabcf..95cd3b7886ff 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2707,6 +2707,9 @@ int amdgpu_device_init(struct amdgpu_device
> *adev,
>   goto failed;
>   }
> 
> + /* must succeed. */
> + amdgpu_ras_post_init(adev);
> +
>   return 0;
> 
>  failed:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 10ce40d2c040..238b46c304cc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -118,6 +118,11 @@ const char *ras_block_string[] = {  #define
> ras_err_str(i) (ras_error_string[ffs(i)])  #define ras_block_str(i)
> (ras_block_string[i])
> 
> +enum amdgpu_ras_flags {
> + AMDGPU_RAS_FLAG_INIT_BY_VBIOS = 1,
> +};
> +#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
> +
>  static void amdgpu_ras_self_test(struct amdgpu_device *adev)  {
>   /* TODO */
> @@ -1387,13 +1392,16 @@ int amdgpu_ras_init(struct amdgpu_device
> *adev)
>   >supported);
>   con->features = 0;
>   INIT_LIST_HEAD(>head);
> + /* Might need get this flag from vbios. */
> + con->flags = RAS_DEFAULT_FLAGS;
> 
>   if (amdgpu_ras_recovery_init(adev))
>   goto recovery_out;
> 
>   amdgpu_ras_mask &= AMDGPU_RAS_BLOCK_MASK;
> 
> - amdgpu_ras_enable_all_features(adev, 1);
> + if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
> + amdgpu_ras_enable_all_features(adev, 1);
> 
>   if (amdgpu_ras_fs_init(adev))
>   goto fs_out;
> @@ -1413,6 +1421,30 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
>   return -EINVAL;
>  }
> 
> +/* do some init work after IP late init as dependence */ void
> +amdgpu_ras_post_init(struct amdgpu_device *adev) {
> + struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
> + struct ras_manager *obj, *tmp;
> +
> + if (!con)
> + return;
> +
> + /* We enable ras on all hw_supported block, but as boot parameter
> might
> +  * disable some of them and one or more IP has not implemented
> yet.
> +  * So we disable them on behalf.
> +  */
> + if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
> + list_for_each_entry_safe(obj, tmp, >head, node) {
> + if (!amdgpu_ras_is_supported(adev, obj-
> >head.block)) {
> + amdgpu_ras_feature_enable(adev, 
> >head, 0);
> + /* there should be no any reference. */
> + WARN_ON(alive_obj(obj));
> + }
> + };
> + }
> +}
> +
>  /* do some fini work before IP fini as dependence */  int
> amdgpu_ras_pre_fini(struct amdgpu_device *adev)  { diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> index 2b6077762b91..7a35316baab0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> @@ -103,6 +103,8 @@ struct amdgpu_ras {
>   /* error handler data */
>   struct ras_err_handler_data *eh_data;
>   struct mutex recovery_lock;
> +
> + uint32_t flags;
>  };
> 
>  /* interfaces for IP */
> @@ -197,6 +199,7 @@ static inline int amdgpu_ras_reset_gpu(struct
> amdgpu_device *adev,
> 
>  /* called in ip_init and ip_fini */
>  int amdgpu_ras_init(struct amdgpu_device *adev);
> +void amdgpu_ras_post_init(struct amdgpu_device *adev);
>  int amdgpu_ras_fini(struct amdgpu_device *adev);  int
> amdgpu_ras_pre_fini(struct amdgpu_device *adev);
> 
> --
> 2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amdgpu: add new ras workflow control flags

2019-03-12 Thread Pan, Xinhui
add ras post init function.
Do some initialization after all IP have finished their late init.

Add new member flags which will control the ras work flow.
For now, vbios enable ras for us on boot. That might change in the
future.
So there should be a flag from vbios to tell us if ras is enabled or not
on boot. Looks like there is no such info now.

Other bits of the flags are reserved to control other parts of ras.

Signed-off-by: xinhui pan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 34 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h|  3 ++
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 29c44a2eabcf..95cd3b7886ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2707,6 +2707,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
goto failed;
}
 
+   /* must succeed. */
+   amdgpu_ras_post_init(adev);
+
return 0;
 
 failed:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 10ce40d2c040..238b46c304cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -118,6 +118,11 @@ const char *ras_block_string[] = {
 #define ras_err_str(i) (ras_error_string[ffs(i)])
 #define ras_block_str(i) (ras_block_string[i])
 
+enum amdgpu_ras_flags {
+   AMDGPU_RAS_FLAG_INIT_BY_VBIOS = 1,
+};
+#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
+
 static void amdgpu_ras_self_test(struct amdgpu_device *adev)
 {
/* TODO */
@@ -1387,13 +1392,16 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
>supported);
con->features = 0;
INIT_LIST_HEAD(>head);
+   /* Might need get this flag from vbios. */
+   con->flags = RAS_DEFAULT_FLAGS;
 
if (amdgpu_ras_recovery_init(adev))
goto recovery_out;
 
amdgpu_ras_mask &= AMDGPU_RAS_BLOCK_MASK;
 
-   amdgpu_ras_enable_all_features(adev, 1);
+   if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
+   amdgpu_ras_enable_all_features(adev, 1);
 
if (amdgpu_ras_fs_init(adev))
goto fs_out;
@@ -1413,6 +1421,30 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
return -EINVAL;
 }
 
+/* do some init work after IP late init as dependence */
+void amdgpu_ras_post_init(struct amdgpu_device *adev)
+{
+   struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+   struct ras_manager *obj, *tmp;
+
+   if (!con)
+   return;
+
+   /* We enable ras on all hw_supported block, but as boot parameter might
+* disable some of them and one or more IP has not implemented yet.
+* So we disable them on behalf.
+*/
+   if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
+   list_for_each_entry_safe(obj, tmp, >head, node) {
+   if (!amdgpu_ras_is_supported(adev, obj->head.block)) {
+   amdgpu_ras_feature_enable(adev, >head, 0);
+   /* there should be no any reference. */
+   WARN_ON(alive_obj(obj));
+   }
+   };
+   }
+}
+
 /* do some fini work before IP fini as dependence */
 int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 2b6077762b91..7a35316baab0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -103,6 +103,8 @@ struct amdgpu_ras {
/* error handler data */
struct ras_err_handler_data *eh_data;
struct mutex recovery_lock;
+
+   uint32_t flags;
 };
 
 /* interfaces for IP */
@@ -197,6 +199,7 @@ static inline int amdgpu_ras_reset_gpu(struct amdgpu_device 
*adev,
 
 /* called in ip_init and ip_fini */
 int amdgpu_ras_init(struct amdgpu_device *adev);
+void amdgpu_ras_post_init(struct amdgpu_device *adev);
 int amdgpu_ras_fini(struct amdgpu_device *adev);
 int amdgpu_ras_pre_fini(struct amdgpu_device *adev);
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdgpu: let ras initialization a little noticeable

2019-03-12 Thread Pan, Xinhui
add drm info output if ras initialized successfully.
add ras atomfirmware sanity check.

Signed-off-by: xinhui pan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 1f47974b1184..10ce40d2c040 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1357,8 +1357,9 @@ static void amdgpu_ras_check_supported(struct 
amdgpu_device *adev,
adev->asic_type != CHIP_VEGA20)
return;
 
-   if (amdgpu_atomfirmware_mem_ecc_supported(adev) ||
-   amdgpu_atomfirmware_sram_ecc_supported(adev))
+   if (adev->is_atom_fw &&
+   (amdgpu_atomfirmware_mem_ecc_supported(adev) ||
+amdgpu_atomfirmware_sram_ecc_supported(adev)))
*hw_supported = AMDGPU_RAS_BLOCK_MASK;
 
*supported = amdgpu_ras_enable == 0 ?
@@ -1398,6 +1399,10 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
goto fs_out;
 
amdgpu_ras_self_test(adev);
+
+   DRM_INFO("RAS INFO: ras initialized successfully, "
+   "hardware ability[%x] ras_mask[%x]\n",
+   con->hw_supported, con->supported);
return 0;
 fs_out:
amdgpu_ras_recovery_fini(adev);
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: will xf86-video-amdgpu is capable to drive AMD readeon Pro wx2100

2019-03-12 Thread Christian König

Am 12.03.19 um 09:35 schrieb Michel Dänzer:

On 2019-03-12 4:14 a.m., Harinath Reddy wrote:

Hi,

Present we are using the radeon HD driver from xorg to driver AMDFirMV2250
card. We would like to replace AMD FireMV2250 with AMD Radeon Pro Wx2100
card.

I saw xf86-video-amdgpu xorg driver is available, but we are not sure
whether that would be sufficient or not, if not what else require to  use
that card.  If you can help that  would be great
I am naive to the graphics world .


We are using HP_UX operating system, Xserver (Xfree86) is  as a  graphics
server.

xf86-video-amdgpu requires the amdgpu kernel driver. Assuming the latter
is working with your card, the former will work (as well as the generic
modesetting Xorg driver).


To answer the original question a bit more direct: The amdgpu kernel 
driver is NOT available for HP_UX.


Regards,
Christian.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: will xf86-video-amdgpu is capable to drive AMD readeon Pro wx2100

2019-03-12 Thread Michel Dänzer
On 2019-03-12 4:14 a.m., Harinath Reddy wrote:
> Hi,
> 
> Present we are using the radeon HD driver from xorg to driver AMDFirMV2250
> card. We would like to replace AMD FireMV2250 with AMD Radeon Pro Wx2100
> card.
> 
> I saw xf86-video-amdgpu xorg driver is available, but we are not sure
> whether that would be sufficient or not, if not what else require to  use
> that card.  If you can help that  would be great
> I am naive to the graphics world .
> 
> 
> We are using HP_UX operating system, Xserver (Xfree86) is  as a  graphics
> server.

xf86-video-amdgpu requires the amdgpu kernel driver. Assuming the latter
is working with your card, the former will work (as well as the generic
modesetting Xorg driver).


-- 
Earthling Michel Dänzer   |  https://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v6 1/2] drm/sched: Refactor ring mirror list handling.

2019-03-12 Thread Tomeu Vizoso
On Thu, 27 Dec 2018 at 20:28, Andrey Grodzovsky
 wrote:
>
> Decauple sched threads stop and start and ring mirror
> list handling from the policy of what to do about the
> guilty jobs.
> When stoppping the sched thread and detaching sched fences
> from non signaled HW fenes wait for all signaled HW fences
> to complete before rerunning the jobs.
>
> v2: Fix resubmission of guilty job into HW after refactoring.
>
> v4:
> Full restart for all the jobs, not only from guilty ring.
> Extract karma increase into standalone function.
>
> v5:
> Rework waiting for signaled jobs without relying on the job
> struct itself as those might already be freed for non 'guilty'
> job's schedulers.
> Expose karma increase to drivers.
>
> v6:
> Use list_for_each_entry_safe_continue and drm_sched_process_job
> in case fence already signaled.
> Call drm_sched_increase_karma only once for amdgpu and add documentation.
>
> Suggested-by: Christian Koenig 
> Signed-off-by: Andrey Grodzovsky 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  20 ++-
>  drivers/gpu/drm/etnaviv/etnaviv_sched.c|  11 +-
>  drivers/gpu/drm/scheduler/sched_main.c | 195 
> +++--
>  drivers/gpu/drm/v3d/v3d_sched.c|  12 +-
>  include/drm/gpu_scheduler.h|   8 +-
>  5 files changed, 157 insertions(+), 89 deletions(-)
>
[snip]
> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
> index 445b2ef..f76d9ed 100644
> --- a/drivers/gpu/drm/v3d/v3d_sched.c
> +++ b/drivers/gpu/drm/v3d/v3d_sched.c
> @@ -178,18 +178,22 @@ v3d_job_timedout(struct drm_sched_job *sched_job)
> for (q = 0; q < V3D_MAX_QUEUES; q++) {
> struct drm_gpu_scheduler *sched = >queue[q].sched;
>
> -   kthread_park(sched->thread);
> -   drm_sched_hw_job_reset(sched, (sched_job->sched == sched ?
> +   drm_sched_stop(sched, (sched_job->sched == sched ?
>sched_job : NULL));
> +
> +   if(sched_job)
> +   drm_sched_increase_karma(sched_job);
> }
>
> /* get the GPU back into the init state */
> v3d_reset(v3d);
>
> +   for (q = 0; q < V3D_MAX_QUEUES; q++)
> +   drm_sched_resubmit_jobs(sched_job->sched);

Hi Andrey,

I'm not sure of what was the original intent, but I guess it wasn't to
repeatedly call resubmit_jobs on that specific job's queue?

Regards,

Tomeu
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v2 5/5] drm: don't block fb changes for async plane updates

2019-03-12 Thread Boris Brezillon
On Mon, 11 Mar 2019 23:22:03 -0300
Helen Koike  wrote:

> In the case of a normal sync update, the preparation of framebuffers (be
> it calling drm_atomic_helper_prepare_planes() or doing setups with
> drm_framebuffer_get()) are performed in the new_state and the respective
> cleanups are performed in the old_state.
> 
> In the case of async updates, the preparation is also done in the
> new_state but the cleanups are done in the new_state (because updates
> are performed in place, i.e. in the current state).
> 
> The current code blocks async udpates when the fb is changed, turning
> async updates into sync updates, slowing down cursor updates and
> introducing regressions in igt tests with errors of type:
> 
> "CRITICAL: completed 97 cursor updated in a period of 30 flips, we
> expect to complete approximately 15360 updates, with the threshold set
> at 7680"
> 
> Fb changes in async updates were prevented to avoid the following scenario:
> 
> - Async update, oldfb = NULL, newfb = fb1, prepare fb1, cleanup fb1
> - Async update, oldfb = fb1, newfb = fb2, prepare fb2, cleanup fb2
> - Non-async commit, oldfb = fb2, newfb = fb1, prepare fb1, cleanup fb2 (wrong)
> Where we have a single call to prepare fb2 but double cleanup call to fb2.
> 
> To solve the above problems, instead of blocking async fb changes, we
> place the old framebuffer in the new_state object, so when the code
> performs cleanups in the new_state it will cleanup the old_fb and we
> will have the following scenario instead:
> 
> - Async update, oldfb = NULL, newfb = fb1, prepare fb1, no cleanup
> - Async update, oldfb = fb1, newfb = fb2, prepare fb2, cleanup fb1
> - Non-async commit, oldfb = fb2, newfb = fb1, prepare fb1, cleanup fb2
> 
> Where calls to prepare/cleanup are balanced.
> 
> Cc:  # v4.14+
> Fixes: 25dc194b34dd ("drm: Block fb changes for async plane updates")
> Suggested-by: Boris Brezillon 
> Signed-off-by: Helen Koike 

Reviewed-by: Boris Brezillon 

> 
> ---
> Hello,
> 
> As mentioned in the cover letter, I tested in almost all platforms with
> igt plane_cursor_legacy and kms_cursor_legacy and I didn't see any
> regressions. But I couldn't test on MSM and AMD because I don't have
> the hardware I would appreciate if anyone could help me testing those.
> 
> Thanks!
> Helen
> 
> Changes in v2:
> - Change the order of the patch in the series, add this as the last one.
> - Add documentation
> - s/ballanced/balanced
> 
>  drivers/gpu/drm/drm_atomic_helper.c  | 20 ++--
>  include/drm/drm_modeset_helper_vtables.h |  5 +
>  2 files changed, 15 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_atomic_helper.c 
> b/drivers/gpu/drm/drm_atomic_helper.c
> index 540a77a2ade9..e7eb96f1efc2 100644
> --- a/drivers/gpu/drm/drm_atomic_helper.c
> +++ b/drivers/gpu/drm/drm_atomic_helper.c
> @@ -1608,15 +1608,6 @@ int drm_atomic_helper_async_check(struct drm_device 
> *dev,
>   old_plane_state->crtc != new_plane_state->crtc)
>   return -EINVAL;
>  
> - /*
> -  * FIXME: Since prepare_fb and cleanup_fb are always called on
> -  * the new_plane_state for async updates we need to block framebuffer
> -  * changes. This prevents use of a fb that's been cleaned up and
> -  * double cleanups from occuring.
> -  */
> - if (old_plane_state->fb != new_plane_state->fb)
> - return -EINVAL;
> -
>   funcs = plane->helper_private;
>   if (!funcs->atomic_async_update)
>   return -EINVAL;
> @@ -1657,6 +1648,9 @@ void drm_atomic_helper_async_commit(struct drm_device 
> *dev,
>   int i;
>  
>   for_each_new_plane_in_state(state, plane, plane_state, i) {
> + struct drm_framebuffer *new_fb = plane_state->fb;
> + struct drm_framebuffer *old_fb = plane->state->fb;
> +
>   funcs = plane->helper_private;
>   funcs->atomic_async_update(plane, plane_state);
>  
> @@ -1665,11 +1659,17 @@ void drm_atomic_helper_async_commit(struct drm_device 
> *dev,
>* plane->state in-place, make sure at least common
>* properties have been properly updated.
>*/
> - WARN_ON_ONCE(plane->state->fb != plane_state->fb);
> + WARN_ON_ONCE(plane->state->fb != new_fb);
>   WARN_ON_ONCE(plane->state->crtc_x != plane_state->crtc_x);
>   WARN_ON_ONCE(plane->state->crtc_y != plane_state->crtc_y);
>   WARN_ON_ONCE(plane->state->src_x != plane_state->src_x);
>   WARN_ON_ONCE(plane->state->src_y != plane_state->src_y);
> +
> + /*
> +  * Make sure the FBs have been swapped so that cleanups in the
> +  * new_state performs a cleanup in the old FB.
> +  */
> + WARN_ON_ONCE(plane_state->fb != old_fb);
>   }
>  }
>  EXPORT_SYMBOL(drm_atomic_helper_async_commit);
> diff --git a/include/drm/drm_modeset_helper_vtables.h 
>