Re: [PATCH] drm/amdgpu: fix and cleanup amdgpu_gem_object_close v2

2020-03-18 Thread Pan, Xinhui
[AMD Official Use Only - Internal Distribution Only]

yes, adding excl fence again as shared one is more reliable

From: Christian König 
Sent: Wednesday, March 18, 2020 4:03:14 PM
To: Pan, Xinhui 
Cc: amd-gfx@lists.freedesktop.org ; Liu, Monk 

Subject: Re: [PATCH] drm/amdgpu: fix and cleanup amdgpu_gem_object_close v2

The key point is that 10ms should be sufficient that either the move or
the update is finished.

One alternative which came to my mind would be to add the exclusive
fence as shared as well in this case.

This way we won't need to block at all.

Christian.

Am 18.03.20 um 09:00 schrieb Pan, Xinhui:
> I wonder if it really fix anything with such small delay. but it should be no 
> harm anyway.
>
> Reviewed-by: xinhui pan 
>
>> 2020年3月18日 15:51,Christian König  写道:
>>
>> Ping? Xinhui can I get an rb for this?
>>
>> Thanks,
>> Christian.
>>
>> Am 16.03.20 um 14:22 schrieb Christian König:
>>> The problem is that we can't add the clear fence to the BO
>>> when there is an exclusive fence on it since we can't
>>> guarantee the the clear fence will complete after the
>>> exclusive one.
>>>
>>> To fix this refactor the function and wait for any potential
>>> exclusive fence with a small timeout before adding the
>>> shared clear fence.
>>>
>>> v2: fix warning
>>>
>>> Signed-off-by: Christian König 
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 43 +++--
>>>   1 file changed, 26 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> index 5bec66e6b1f8..49c91dac35a0 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> @@ -161,10 +161,11 @@ void amdgpu_gem_object_close(struct drm_gem_object 
>>> *obj,
>>>  struct amdgpu_bo_list_entry vm_pd;
>>>  struct list_head list, duplicates;
>>> +   struct dma_fence *fence = NULL;
>>>  struct ttm_validate_buffer tv;
>>>  struct ww_acquire_ctx ticket;
>>>  struct amdgpu_bo_va *bo_va;
>>> -   int r;
>>> +   long r;
>>>  INIT_LIST_HEAD();
>>>  INIT_LIST_HEAD();
>>> @@ -178,28 +179,36 @@ void amdgpu_gem_object_close(struct drm_gem_object 
>>> *obj,
>>>  r = ttm_eu_reserve_buffers(, , false, );
>>>  if (r) {
>>>  dev_err(adev->dev, "leaking bo va because "
>>> -   "we fail to reserve bo (%d)\n", r);
>>> +   "we fail to reserve bo (%ld)\n", r);
>>>  return;
>>>  }
>>>  bo_va = amdgpu_vm_bo_find(vm, bo);
>>> -   if (bo_va && --bo_va->ref_count == 0) {
>>> -   amdgpu_vm_bo_rmv(adev, bo_va);
>>> +   if (!bo_va || --bo_va->ref_count)
>>> +   goto out_unlock;
>>>   - if (amdgpu_vm_ready(vm)) {
>>> -   struct dma_fence *fence = NULL;
>>> +   amdgpu_vm_bo_rmv(adev, bo_va);
>>> +   if (!amdgpu_vm_ready(vm))
>>> +   goto out_unlock;
>>>   - r = amdgpu_vm_clear_freed(adev, vm, );
>>> -   if (unlikely(r)) {
>>> -   dev_err(adev->dev, "failed to clear page "
>>> -   "tables on GEM object close (%d)\n", r);
>>> -   }
>>>   - if (fence) {
>>> -   amdgpu_bo_fence(bo, fence, true);
>>> -   dma_fence_put(fence);
>>> -   }
>>> -   }
>>> -   }
>>> +   r = amdgpu_vm_clear_freed(adev, vm, );
>>> +   if (r || !fence)
>>> +   goto out_unlock;
>>> +
>>> +   r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, false, false,
>>> + msecs_to_jiffies(10));
>>> +   if (r == 0)
>>> +   r = -ETIMEDOUT;
>>> +   if (r)
>>> +   goto out_unlock;
>>> +
>>> +   amdgpu_bo_fence(bo, fence, true);
>>> +   dma_fence_put(fence);
>>> +
>>> +out_unlock:
>>> +   if (unlikely(r < 0))
>>> +   dev_err(adev->dev, "failed to clear page "
>>> +   "tables on GEM object close (%ld)\n", r);
>>>  ttm_eu_backoff_reservation(, );
>>>   }
>>>

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix and cleanup amdgpu_gem_object_close v2

2020-03-18 Thread Christian König
The key point is that 10ms should be sufficient that either the move or 
the update is finished.


One alternative which came to my mind would be to add the exclusive 
fence as shared as well in this case.


This way we won't need to block at all.

Christian.

Am 18.03.20 um 09:00 schrieb Pan, Xinhui:

I wonder if it really fix anything with such small delay. but it should be no 
harm anyway.

Reviewed-by: xinhui pan 


2020年3月18日 15:51,Christian König  写道:

Ping? Xinhui can I get an rb for this?

Thanks,
Christian.

Am 16.03.20 um 14:22 schrieb Christian König:

The problem is that we can't add the clear fence to the BO
when there is an exclusive fence on it since we can't
guarantee the the clear fence will complete after the
exclusive one.

To fix this refactor the function and wait for any potential
exclusive fence with a small timeout before adding the
shared clear fence.

v2: fix warning

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 43 +++--
  1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 5bec66e6b1f8..49c91dac35a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -161,10 +161,11 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct amdgpu_bo_list_entry vm_pd;
struct list_head list, duplicates;
+   struct dma_fence *fence = NULL;
struct ttm_validate_buffer tv;
struct ww_acquire_ctx ticket;
struct amdgpu_bo_va *bo_va;
-   int r;
+   long r;
INIT_LIST_HEAD();
INIT_LIST_HEAD();
@@ -178,28 +179,36 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
r = ttm_eu_reserve_buffers(, , false, );
if (r) {
dev_err(adev->dev, "leaking bo va because "
-   "we fail to reserve bo (%d)\n", r);
+   "we fail to reserve bo (%ld)\n", r);
return;
}
bo_va = amdgpu_vm_bo_find(vm, bo);
-   if (bo_va && --bo_va->ref_count == 0) {
-   amdgpu_vm_bo_rmv(adev, bo_va);
+   if (!bo_va || --bo_va->ref_count)
+   goto out_unlock;
  - if (amdgpu_vm_ready(vm)) {
-   struct dma_fence *fence = NULL;
+   amdgpu_vm_bo_rmv(adev, bo_va);
+   if (!amdgpu_vm_ready(vm))
+   goto out_unlock;
  - r = amdgpu_vm_clear_freed(adev, vm, );
-   if (unlikely(r)) {
-   dev_err(adev->dev, "failed to clear page "
-   "tables on GEM object close (%d)\n", r);
-   }
  - if (fence) {
-   amdgpu_bo_fence(bo, fence, true);
-   dma_fence_put(fence);
-   }
-   }
-   }
+   r = amdgpu_vm_clear_freed(adev, vm, );
+   if (r || !fence)
+   goto out_unlock;
+
+   r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, false, false,
+ msecs_to_jiffies(10));
+   if (r == 0)
+   r = -ETIMEDOUT;
+   if (r)
+   goto out_unlock;
+
+   amdgpu_bo_fence(bo, fence, true);
+   dma_fence_put(fence);
+
+out_unlock:
+   if (unlikely(r < 0))
+   dev_err(adev->dev, "failed to clear page "
+   "tables on GEM object close (%ld)\n", r);
ttm_eu_backoff_reservation(, );
  }
  


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix and cleanup amdgpu_gem_object_close v2

2020-03-18 Thread Pan, Xinhui
I wonder if it really fix anything with such small delay. but it should be no 
harm anyway.

Reviewed-by: xinhui pan 

> 2020年3月18日 15:51,Christian König  写道:
> 
> Ping? Xinhui can I get an rb for this?
> 
> Thanks,
> Christian.
> 
> Am 16.03.20 um 14:22 schrieb Christian König:
>> The problem is that we can't add the clear fence to the BO
>> when there is an exclusive fence on it since we can't
>> guarantee the the clear fence will complete after the
>> exclusive one.
>> 
>> To fix this refactor the function and wait for any potential
>> exclusive fence with a small timeout before adding the
>> shared clear fence.
>> 
>> v2: fix warning
>> 
>> Signed-off-by: Christian König 
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 43 +++--
>>  1 file changed, 26 insertions(+), 17 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index 5bec66e6b1f8..49c91dac35a0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -161,10 +161,11 @@ void amdgpu_gem_object_close(struct drm_gem_object 
>> *obj,
>>  struct amdgpu_bo_list_entry vm_pd;
>>  struct list_head list, duplicates;
>> +struct dma_fence *fence = NULL;
>>  struct ttm_validate_buffer tv;
>>  struct ww_acquire_ctx ticket;
>>  struct amdgpu_bo_va *bo_va;
>> -int r;
>> +long r;
>>  INIT_LIST_HEAD();
>>  INIT_LIST_HEAD();
>> @@ -178,28 +179,36 @@ void amdgpu_gem_object_close(struct drm_gem_object 
>> *obj,
>>  r = ttm_eu_reserve_buffers(, , false, );
>>  if (r) {
>>  dev_err(adev->dev, "leaking bo va because "
>> -"we fail to reserve bo (%d)\n", r);
>> +"we fail to reserve bo (%ld)\n", r);
>>  return;
>>  }
>>  bo_va = amdgpu_vm_bo_find(vm, bo);
>> -if (bo_va && --bo_va->ref_count == 0) {
>> -amdgpu_vm_bo_rmv(adev, bo_va);
>> +if (!bo_va || --bo_va->ref_count)
>> +goto out_unlock;
>>  -   if (amdgpu_vm_ready(vm)) {
>> -struct dma_fence *fence = NULL;
>> +amdgpu_vm_bo_rmv(adev, bo_va);
>> +if (!amdgpu_vm_ready(vm))
>> +goto out_unlock;
>>  -   r = amdgpu_vm_clear_freed(adev, vm, );
>> -if (unlikely(r)) {
>> -dev_err(adev->dev, "failed to clear page "
>> -"tables on GEM object close (%d)\n", r);
>> -}
>>  -   if (fence) {
>> -amdgpu_bo_fence(bo, fence, true);
>> -dma_fence_put(fence);
>> -}
>> -}
>> -}
>> +r = amdgpu_vm_clear_freed(adev, vm, );
>> +if (r || !fence)
>> +goto out_unlock;
>> +
>> +r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, false, false,
>> +  msecs_to_jiffies(10));
>> +if (r == 0)
>> +r = -ETIMEDOUT;
>> +if (r)
>> +goto out_unlock;
>> +
>> +amdgpu_bo_fence(bo, fence, true);
>> +dma_fence_put(fence);
>> +
>> +out_unlock:
>> +if (unlikely(r < 0))
>> +dev_err(adev->dev, "failed to clear page "
>> +"tables on GEM object close (%ld)\n", r);
>>  ttm_eu_backoff_reservation(, );
>>  }
>>  
> 

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix and cleanup amdgpu_gem_object_close v2

2020-03-18 Thread Christian König

Ping? Xinhui can I get an rb for this?

Thanks,
Christian.

Am 16.03.20 um 14:22 schrieb Christian König:

The problem is that we can't add the clear fence to the BO
when there is an exclusive fence on it since we can't
guarantee the the clear fence will complete after the
exclusive one.

To fix this refactor the function and wait for any potential
exclusive fence with a small timeout before adding the
shared clear fence.

v2: fix warning

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 43 +++--
  1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 5bec66e6b1f8..49c91dac35a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -161,10 +161,11 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
  
  	struct amdgpu_bo_list_entry vm_pd;

struct list_head list, duplicates;
+   struct dma_fence *fence = NULL;
struct ttm_validate_buffer tv;
struct ww_acquire_ctx ticket;
struct amdgpu_bo_va *bo_va;
-   int r;
+   long r;
  
  	INIT_LIST_HEAD();

INIT_LIST_HEAD();
@@ -178,28 +179,36 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
r = ttm_eu_reserve_buffers(, , false, );
if (r) {
dev_err(adev->dev, "leaking bo va because "
-   "we fail to reserve bo (%d)\n", r);
+   "we fail to reserve bo (%ld)\n", r);
return;
}
bo_va = amdgpu_vm_bo_find(vm, bo);
-   if (bo_va && --bo_va->ref_count == 0) {
-   amdgpu_vm_bo_rmv(adev, bo_va);
+   if (!bo_va || --bo_va->ref_count)
+   goto out_unlock;
  
-		if (amdgpu_vm_ready(vm)) {

-   struct dma_fence *fence = NULL;
+   amdgpu_vm_bo_rmv(adev, bo_va);
+   if (!amdgpu_vm_ready(vm))
+   goto out_unlock;
  
-			r = amdgpu_vm_clear_freed(adev, vm, );

-   if (unlikely(r)) {
-   dev_err(adev->dev, "failed to clear page "
-   "tables on GEM object close (%d)\n", r);
-   }
  
-			if (fence) {

-   amdgpu_bo_fence(bo, fence, true);
-   dma_fence_put(fence);
-   }
-   }
-   }
+   r = amdgpu_vm_clear_freed(adev, vm, );
+   if (r || !fence)
+   goto out_unlock;
+
+   r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, false, false,
+ msecs_to_jiffies(10));
+   if (r == 0)
+   r = -ETIMEDOUT;
+   if (r)
+   goto out_unlock;
+
+   amdgpu_bo_fence(bo, fence, true);
+   dma_fence_put(fence);
+
+out_unlock:
+   if (unlikely(r < 0))
+   dev_err(adev->dev, "failed to clear page "
+   "tables on GEM object close (%ld)\n", r);
ttm_eu_backoff_reservation(, );
  }
  


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: fix and cleanup amdgpu_gem_object_close v2

2020-03-16 Thread Christian König
The problem is that we can't add the clear fence to the BO
when there is an exclusive fence on it since we can't
guarantee the the clear fence will complete after the
exclusive one.

To fix this refactor the function and wait for any potential
exclusive fence with a small timeout before adding the
shared clear fence.

v2: fix warning

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 43 +++--
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 5bec66e6b1f8..49c91dac35a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -161,10 +161,11 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 
struct amdgpu_bo_list_entry vm_pd;
struct list_head list, duplicates;
+   struct dma_fence *fence = NULL;
struct ttm_validate_buffer tv;
struct ww_acquire_ctx ticket;
struct amdgpu_bo_va *bo_va;
-   int r;
+   long r;
 
INIT_LIST_HEAD();
INIT_LIST_HEAD();
@@ -178,28 +179,36 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
r = ttm_eu_reserve_buffers(, , false, );
if (r) {
dev_err(adev->dev, "leaking bo va because "
-   "we fail to reserve bo (%d)\n", r);
+   "we fail to reserve bo (%ld)\n", r);
return;
}
bo_va = amdgpu_vm_bo_find(vm, bo);
-   if (bo_va && --bo_va->ref_count == 0) {
-   amdgpu_vm_bo_rmv(adev, bo_va);
+   if (!bo_va || --bo_va->ref_count)
+   goto out_unlock;
 
-   if (amdgpu_vm_ready(vm)) {
-   struct dma_fence *fence = NULL;
+   amdgpu_vm_bo_rmv(adev, bo_va);
+   if (!amdgpu_vm_ready(vm))
+   goto out_unlock;
 
-   r = amdgpu_vm_clear_freed(adev, vm, );
-   if (unlikely(r)) {
-   dev_err(adev->dev, "failed to clear page "
-   "tables on GEM object close (%d)\n", r);
-   }
 
-   if (fence) {
-   amdgpu_bo_fence(bo, fence, true);
-   dma_fence_put(fence);
-   }
-   }
-   }
+   r = amdgpu_vm_clear_freed(adev, vm, );
+   if (r || !fence)
+   goto out_unlock;
+
+   r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, false, false,
+ msecs_to_jiffies(10));
+   if (r == 0)
+   r = -ETIMEDOUT;
+   if (r)
+   goto out_unlock;
+
+   amdgpu_bo_fence(bo, fence, true);
+   dma_fence_put(fence);
+
+out_unlock:
+   if (unlikely(r < 0))
+   dev_err(adev->dev, "failed to clear page "
+   "tables on GEM object close (%ld)\n", r);
ttm_eu_backoff_reservation(, );
 }
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx