Re: [RFC PATCH] drm/ttm: Do page counting after populate callback succeed

2021-06-15 Thread Christian König




Am 15.06.21 um 17:06 schrieb Felix Kuehling:

Am 2021-06-15 um 8:18 a.m. schrieb Christian König:

Am 15.06.21 um 14:11 schrieb Pan, Xinhui:

2021年6月15日 20:01,Christian König 
写道:

Am 15.06.21 um 13:57 schrieb xinhui pan:

Amdgpu set SG flag in populate callback. So TTM still count pages
in SG
BO.

It's probably better to fix this instead. E.g. why does amdgpu
modify the SG flag during populate and not during initial creation?
That doesn't seem to make sense.

fair enough. Let me have a try.
No idea why we set SG flag in populate years ago.

That's pretty recent IIRC. Felix moved the code around a bit to fix
another problem.

I moved some code from populate/unpopulate to backend_bind/unbind for
attaching and detaching dmabufs. I didn't change the setting/unsetting
of SG flags for userptr BOs. That goes back all the way to 2015.

As far as I can tell, this is needed because userptr BOs are not created
as SG BOs. That's something I've also pointed out before.


Ah, yes. That's because we need the pages array for userptr BOs.

Then we should probably move that to amdgpu_ttm_tt_set_userptr().

Thanks,
Christian.



Regards,
   Felix



Christian.


Christian.


One easy way to fix this is lets count pages after populate callback.

We hit one issue that amdgpu alloc many SG BOs, but TTM try to do swap
again and again even if swapout does not swap SG BOs at all.

Signed-off-by: xinhui pan 
---
   drivers/gpu/drm/ttm/ttm_tt.c | 32 +---
   1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c
b/drivers/gpu/drm/ttm/ttm_tt.c
index a1a25410ec74..4fa0a8cd71c0 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -317,13 +317,6 @@ int ttm_tt_populate(struct ttm_device *bdev,
   if (ttm_tt_is_populated(ttm))
   return 0;
   -    if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
-    atomic_long_add(ttm->num_pages, _pages_allocated);
-    if (bdev->pool.use_dma32)
-    atomic_long_add(ttm->num_pages,
-    _dma32_pages_allocated);
-    }
-
   while (atomic_long_read(_pages_allocated) >
ttm_pages_limit ||
  atomic_long_read(_dma32_pages_allocated) >
  ttm_dma32_pages_limit) {
@@ -342,6 +335,13 @@ int ttm_tt_populate(struct ttm_device *bdev,
   if (ret)
   goto error;
   +    if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
+    atomic_long_add(ttm->num_pages, _pages_allocated);
+    if (bdev->pool.use_dma32)
+    atomic_long_add(ttm->num_pages,
+    _dma32_pages_allocated);
+    }
+
   ttm_tt_add_mapping(bdev, ttm);
   ttm->page_flags |= TTM_PAGE_FLAG_PRIV_POPULATED;
   if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) {
@@ -355,12 +355,6 @@ int ttm_tt_populate(struct ttm_device *bdev,
   return 0;
     error:
-    if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
-    atomic_long_sub(ttm->num_pages, _pages_allocated);
-    if (bdev->pool.use_dma32)
-    atomic_long_sub(ttm->num_pages,
-    _dma32_pages_allocated);
-    }
   return ret;
   }
   EXPORT_SYMBOL(ttm_tt_populate);
@@ -384,12 +378,6 @@ void ttm_tt_unpopulate(struct ttm_device
*bdev, struct ttm_tt *ttm)
   if (!ttm_tt_is_populated(ttm))
   return;
   -    ttm_tt_clear_mapping(ttm);
-    if (bdev->funcs->ttm_tt_unpopulate)
-    bdev->funcs->ttm_tt_unpopulate(bdev, ttm);
-    else
-    ttm_pool_free(>pool, ttm);
-
   if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
   atomic_long_sub(ttm->num_pages, _pages_allocated);
   if (bdev->pool.use_dma32)
@@ -397,6 +385,12 @@ void ttm_tt_unpopulate(struct ttm_device
*bdev, struct ttm_tt *ttm)
   _dma32_pages_allocated);
   }
   +    ttm_tt_clear_mapping(ttm);
+    if (bdev->funcs->ttm_tt_unpopulate)
+    bdev->funcs->ttm_tt_unpopulate(bdev, ttm);
+    else
+    ttm_pool_free(>pool, ttm);
+
   ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
   }
   




Re: [RFC PATCH] drm/ttm: Do page counting after populate callback succeed

2021-06-15 Thread Felix Kuehling
Am 2021-06-15 um 8:18 a.m. schrieb Christian König:
> Am 15.06.21 um 14:11 schrieb Pan, Xinhui:
>>> 2021年6月15日 20:01,Christian König 
>>> 写道:
>>>
>>> Am 15.06.21 um 13:57 schrieb xinhui pan:
 Amdgpu set SG flag in populate callback. So TTM still count pages
 in SG
 BO.
>>> It's probably better to fix this instead. E.g. why does amdgpu
>>> modify the SG flag during populate and not during initial creation?
>>> That doesn't seem to make sense.
>> fair enough. Let me have a try.
>> No idea why we set SG flag in populate years ago.
>
> That's pretty recent IIRC. Felix moved the code around a bit to fix
> another problem.

I moved some code from populate/unpopulate to backend_bind/unbind for
attaching and detaching dmabufs. I didn't change the setting/unsetting
of SG flags for userptr BOs. That goes back all the way to 2015.

As far as I can tell, this is needed because userptr BOs are not created
as SG BOs. That's something I've also pointed out before.

Regards,
  Felix


>
> Christian.
>
>>
>>> Christian.
>>>
 One easy way to fix this is lets count pages after populate callback.

 We hit one issue that amdgpu alloc many SG BOs, but TTM try to do swap
 again and again even if swapout does not swap SG BOs at all.

 Signed-off-by: xinhui pan 
 ---
   drivers/gpu/drm/ttm/ttm_tt.c | 32 +---
   1 file changed, 13 insertions(+), 19 deletions(-)

 diff --git a/drivers/gpu/drm/ttm/ttm_tt.c
 b/drivers/gpu/drm/ttm/ttm_tt.c
 index a1a25410ec74..4fa0a8cd71c0 100644
 --- a/drivers/gpu/drm/ttm/ttm_tt.c
 +++ b/drivers/gpu/drm/ttm/ttm_tt.c
 @@ -317,13 +317,6 @@ int ttm_tt_populate(struct ttm_device *bdev,
   if (ttm_tt_is_populated(ttm))
   return 0;
   -    if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
 -    atomic_long_add(ttm->num_pages, _pages_allocated);
 -    if (bdev->pool.use_dma32)
 -    atomic_long_add(ttm->num_pages,
 -    _dma32_pages_allocated);
 -    }
 -
   while (atomic_long_read(_pages_allocated) >
 ttm_pages_limit ||
  atomic_long_read(_dma32_pages_allocated) >
  ttm_dma32_pages_limit) {
 @@ -342,6 +335,13 @@ int ttm_tt_populate(struct ttm_device *bdev,
   if (ret)
   goto error;
   +    if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
 +    atomic_long_add(ttm->num_pages, _pages_allocated);
 +    if (bdev->pool.use_dma32)
 +    atomic_long_add(ttm->num_pages,
 +    _dma32_pages_allocated);
 +    }
 +
   ttm_tt_add_mapping(bdev, ttm);
   ttm->page_flags |= TTM_PAGE_FLAG_PRIV_POPULATED;
   if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) {
 @@ -355,12 +355,6 @@ int ttm_tt_populate(struct ttm_device *bdev,
   return 0;
     error:
 -    if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
 -    atomic_long_sub(ttm->num_pages, _pages_allocated);
 -    if (bdev->pool.use_dma32)
 -    atomic_long_sub(ttm->num_pages,
 -    _dma32_pages_allocated);
 -    }
   return ret;
   }
   EXPORT_SYMBOL(ttm_tt_populate);
 @@ -384,12 +378,6 @@ void ttm_tt_unpopulate(struct ttm_device
 *bdev, struct ttm_tt *ttm)
   if (!ttm_tt_is_populated(ttm))
   return;
   -    ttm_tt_clear_mapping(ttm);
 -    if (bdev->funcs->ttm_tt_unpopulate)
 -    bdev->funcs->ttm_tt_unpopulate(bdev, ttm);
 -    else
 -    ttm_pool_free(>pool, ttm);
 -
   if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
   atomic_long_sub(ttm->num_pages, _pages_allocated);
   if (bdev->pool.use_dma32)
 @@ -397,6 +385,12 @@ void ttm_tt_unpopulate(struct ttm_device
 *bdev, struct ttm_tt *ttm)
   _dma32_pages_allocated);
   }
   +    ttm_tt_clear_mapping(ttm);
 +    if (bdev->funcs->ttm_tt_unpopulate)
 +    bdev->funcs->ttm_tt_unpopulate(bdev, ttm);
 +    else
 +    ttm_pool_free(>pool, ttm);
 +
   ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
   }
   
>


Re: [RFC PATCH] drm/ttm: Do page counting after populate callback succeed

2021-06-15 Thread Christian König

Am 15.06.21 um 14:11 schrieb Pan, Xinhui:

2021年6月15日 20:01,Christian König  写道:

Am 15.06.21 um 13:57 schrieb xinhui pan:

Amdgpu set SG flag in populate callback. So TTM still count pages in SG
BO.

It's probably better to fix this instead. E.g. why does amdgpu modify the SG 
flag during populate and not during initial creation? That doesn't seem to make 
sense.

fair enough. Let me have a try.
No idea why we set SG flag in populate years ago.


That's pretty recent IIRC. Felix moved the code around a bit to fix 
another problem.


Christian.




Christian.


One easy way to fix this is lets count pages after populate callback.

We hit one issue that amdgpu alloc many SG BOs, but TTM try to do swap
again and again even if swapout does not swap SG BOs at all.

Signed-off-by: xinhui pan 
---
  drivers/gpu/drm/ttm/ttm_tt.c | 32 +---
  1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index a1a25410ec74..4fa0a8cd71c0 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -317,13 +317,6 @@ int ttm_tt_populate(struct ttm_device *bdev,
if (ttm_tt_is_populated(ttm))
return 0;
  - if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
-   atomic_long_add(ttm->num_pages, _pages_allocated);
-   if (bdev->pool.use_dma32)
-   atomic_long_add(ttm->num_pages,
-   _dma32_pages_allocated);
-   }
-
while (atomic_long_read(_pages_allocated) > ttm_pages_limit ||
   atomic_long_read(_dma32_pages_allocated) >
   ttm_dma32_pages_limit) {
@@ -342,6 +335,13 @@ int ttm_tt_populate(struct ttm_device *bdev,
if (ret)
goto error;
  + if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
+   atomic_long_add(ttm->num_pages, _pages_allocated);
+   if (bdev->pool.use_dma32)
+   atomic_long_add(ttm->num_pages,
+   _dma32_pages_allocated);
+   }
+
ttm_tt_add_mapping(bdev, ttm);
ttm->page_flags |= TTM_PAGE_FLAG_PRIV_POPULATED;
if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) {
@@ -355,12 +355,6 @@ int ttm_tt_populate(struct ttm_device *bdev,
return 0;
error:
-   if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
-   atomic_long_sub(ttm->num_pages, _pages_allocated);
-   if (bdev->pool.use_dma32)
-   atomic_long_sub(ttm->num_pages,
-   _dma32_pages_allocated);
-   }
return ret;
  }
  EXPORT_SYMBOL(ttm_tt_populate);
@@ -384,12 +378,6 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct 
ttm_tt *ttm)
if (!ttm_tt_is_populated(ttm))
return;
  - ttm_tt_clear_mapping(ttm);
-   if (bdev->funcs->ttm_tt_unpopulate)
-   bdev->funcs->ttm_tt_unpopulate(bdev, ttm);
-   else
-   ttm_pool_free(>pool, ttm);
-
if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
atomic_long_sub(ttm->num_pages, _pages_allocated);
if (bdev->pool.use_dma32)
@@ -397,6 +385,12 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct 
ttm_tt *ttm)
_dma32_pages_allocated);
}
  + ttm_tt_clear_mapping(ttm);
+   if (bdev->funcs->ttm_tt_unpopulate)
+   bdev->funcs->ttm_tt_unpopulate(bdev, ttm);
+   else
+   ttm_pool_free(>pool, ttm);
+
ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
  }
  




Re: [RFC PATCH] drm/ttm: Do page counting after populate callback succeed

2021-06-15 Thread Pan, Xinhui


> 2021年6月15日 20:01,Christian König  写道:
> 
> Am 15.06.21 um 13:57 schrieb xinhui pan:
>> Amdgpu set SG flag in populate callback. So TTM still count pages in SG
>> BO.
> 
> It's probably better to fix this instead. E.g. why does amdgpu modify the SG 
> flag during populate and not during initial creation? That doesn't seem to 
> make sense.

fair enough. Let me have a try.
No idea why we set SG flag in populate years ago.

> 
> Christian.
> 
>> One easy way to fix this is lets count pages after populate callback.
>> 
>> We hit one issue that amdgpu alloc many SG BOs, but TTM try to do swap
>> again and again even if swapout does not swap SG BOs at all.
>> 
>> Signed-off-by: xinhui pan 
>> ---
>>  drivers/gpu/drm/ttm/ttm_tt.c | 32 +---
>>  1 file changed, 13 insertions(+), 19 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
>> index a1a25410ec74..4fa0a8cd71c0 100644
>> --- a/drivers/gpu/drm/ttm/ttm_tt.c
>> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
>> @@ -317,13 +317,6 @@ int ttm_tt_populate(struct ttm_device *bdev,
>>  if (ttm_tt_is_populated(ttm))
>>  return 0;
>>  -   if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
>> -atomic_long_add(ttm->num_pages, _pages_allocated);
>> -if (bdev->pool.use_dma32)
>> -atomic_long_add(ttm->num_pages,
>> -_dma32_pages_allocated);
>> -}
>> -
>>  while (atomic_long_read(_pages_allocated) > ttm_pages_limit ||
>> atomic_long_read(_dma32_pages_allocated) >
>> ttm_dma32_pages_limit) {
>> @@ -342,6 +335,13 @@ int ttm_tt_populate(struct ttm_device *bdev,
>>  if (ret)
>>  goto error;
>>  +   if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
>> +atomic_long_add(ttm->num_pages, _pages_allocated);
>> +if (bdev->pool.use_dma32)
>> +atomic_long_add(ttm->num_pages,
>> +_dma32_pages_allocated);
>> +}
>> +
>>  ttm_tt_add_mapping(bdev, ttm);
>>  ttm->page_flags |= TTM_PAGE_FLAG_PRIV_POPULATED;
>>  if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) {
>> @@ -355,12 +355,6 @@ int ttm_tt_populate(struct ttm_device *bdev,
>>  return 0;
>>error:
>> -if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
>> -atomic_long_sub(ttm->num_pages, _pages_allocated);
>> -if (bdev->pool.use_dma32)
>> -atomic_long_sub(ttm->num_pages,
>> -_dma32_pages_allocated);
>> -}
>>  return ret;
>>  }
>>  EXPORT_SYMBOL(ttm_tt_populate);
>> @@ -384,12 +378,6 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct 
>> ttm_tt *ttm)
>>  if (!ttm_tt_is_populated(ttm))
>>  return;
>>  -   ttm_tt_clear_mapping(ttm);
>> -if (bdev->funcs->ttm_tt_unpopulate)
>> -bdev->funcs->ttm_tt_unpopulate(bdev, ttm);
>> -else
>> -ttm_pool_free(>pool, ttm);
>> -
>>  if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
>>  atomic_long_sub(ttm->num_pages, _pages_allocated);
>>  if (bdev->pool.use_dma32)
>> @@ -397,6 +385,12 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct 
>> ttm_tt *ttm)
>>  _dma32_pages_allocated);
>>  }
>>  +   ttm_tt_clear_mapping(ttm);
>> +if (bdev->funcs->ttm_tt_unpopulate)
>> +bdev->funcs->ttm_tt_unpopulate(bdev, ttm);
>> +else
>> +ttm_pool_free(>pool, ttm);
>> +
>>  ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
>>  }
>>  
> 



Re: [RFC PATCH] drm/ttm: Do page counting after populate callback succeed

2021-06-15 Thread Christian König

Am 15.06.21 um 13:57 schrieb xinhui pan:

Amdgpu set SG flag in populate callback. So TTM still count pages in SG
BO.


It's probably better to fix this instead. E.g. why does amdgpu modify 
the SG flag during populate and not during initial creation? That 
doesn't seem to make sense.


Christian.


One easy way to fix this is lets count pages after populate callback.

We hit one issue that amdgpu alloc many SG BOs, but TTM try to do swap
again and again even if swapout does not swap SG BOs at all.

Signed-off-by: xinhui pan 
---
  drivers/gpu/drm/ttm/ttm_tt.c | 32 +---
  1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index a1a25410ec74..4fa0a8cd71c0 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -317,13 +317,6 @@ int ttm_tt_populate(struct ttm_device *bdev,
if (ttm_tt_is_populated(ttm))
return 0;
  
-	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {

-   atomic_long_add(ttm->num_pages, _pages_allocated);
-   if (bdev->pool.use_dma32)
-   atomic_long_add(ttm->num_pages,
-   _dma32_pages_allocated);
-   }
-
while (atomic_long_read(_pages_allocated) > ttm_pages_limit ||
   atomic_long_read(_dma32_pages_allocated) >
   ttm_dma32_pages_limit) {
@@ -342,6 +335,13 @@ int ttm_tt_populate(struct ttm_device *bdev,
if (ret)
goto error;
  
+	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {

+   atomic_long_add(ttm->num_pages, _pages_allocated);
+   if (bdev->pool.use_dma32)
+   atomic_long_add(ttm->num_pages,
+   _dma32_pages_allocated);
+   }
+
ttm_tt_add_mapping(bdev, ttm);
ttm->page_flags |= TTM_PAGE_FLAG_PRIV_POPULATED;
if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) {
@@ -355,12 +355,6 @@ int ttm_tt_populate(struct ttm_device *bdev,
return 0;
  
  error:

-   if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
-   atomic_long_sub(ttm->num_pages, _pages_allocated);
-   if (bdev->pool.use_dma32)
-   atomic_long_sub(ttm->num_pages,
-   _dma32_pages_allocated);
-   }
return ret;
  }
  EXPORT_SYMBOL(ttm_tt_populate);
@@ -384,12 +378,6 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct 
ttm_tt *ttm)
if (!ttm_tt_is_populated(ttm))
return;
  
-	ttm_tt_clear_mapping(ttm);

-   if (bdev->funcs->ttm_tt_unpopulate)
-   bdev->funcs->ttm_tt_unpopulate(bdev, ttm);
-   else
-   ttm_pool_free(>pool, ttm);
-
if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
atomic_long_sub(ttm->num_pages, _pages_allocated);
if (bdev->pool.use_dma32)
@@ -397,6 +385,12 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct 
ttm_tt *ttm)
_dma32_pages_allocated);
}
  
+	ttm_tt_clear_mapping(ttm);

+   if (bdev->funcs->ttm_tt_unpopulate)
+   bdev->funcs->ttm_tt_unpopulate(bdev, ttm);
+   else
+   ttm_pool_free(>pool, ttm);
+
ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
  }