Re: [RFC PATCH v2 3/3] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory()

2021-07-16 Thread Christian König




Am 16.07.21 um 05:10 schrieb Kevin Wang:

1. using vram aper to access vram if possible
2. avoid MM_INDEX/MM_DATA is not working when mmio protect feature is
enabled.

Signed-off-by: Kevin Wang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 91 +++--
  1 file changed, 54 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index f4ff3c9350b3..62ea5089b4f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1407,6 +1407,56 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct 
ttm_buffer_object *bo,
return ttm_bo_eviction_valuable(bo, place);
  }
  
+static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,

+ void *buf, size_t size, bool write)
+{
+   while (size) {
+   uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
+   uint64_t bytes = 4 - (pos & 0x3);
+   uint32_t shift = (pos & 0x3) * 8;
+   uint32_t mask = 0x << shift;
+   uint32_t value = 0;
+
+   if (size < bytes) {
+   mask &= 0x >> (bytes - size) * 8;
+   bytes = size;
+   }
+
+   if (mask != 0x) {
+   amdgpu_device_mm_access(adev, aligned_pos, &value, 4, 
false);
+   if (write) {
+   value &= ~mask;
+   value |= (*(uint32_t *)buf << shift) & mask;
+   amdgpu_device_mm_access(adev, aligned_pos, 
&value, 4, true);
+   } else {
+   value = (value & mask) >> shift;
+   memcpy(buf, &value, bytes);
+   }
+   } else {
+   amdgpu_device_mm_access(adev, aligned_pos, buf, 4, 
write);
+   }
+
+   pos += bytes;
+   buf += bytes;
+   size -= bytes;
+   }
+}
+
+static void amdgpu_ttm_vram_access(struct amdgpu_device *adev, loff_t pos,
+  void *buf, size_t size, bool write)
+{
+   size_t count;
+
+   count = amdgpu_device_aper_access(adev, pos, buf, size, write);
+   size -= count;
+   if (size) {
+   /* using MM to access rest vram and handle un-aligned address */
+   pos += count;
+   buf += count;
+   amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);
+   }
+}


Just inline that function into the caller, don't really see the need to 
have that separated.


Apart from that looks good to me.

Regards,
Christian.


+
  /**
   * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
   *
@@ -1426,8 +1476,6 @@ static int amdgpu_ttm_access_memory(struct 
ttm_buffer_object *bo,
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
struct amdgpu_res_cursor cursor;
-   unsigned long flags;
-   uint32_t value = 0;
int ret = 0;
  
  	if (bo->mem.mem_type != TTM_PL_VRAM)

@@ -1435,41 +1483,10 @@ static int amdgpu_ttm_access_memory(struct 
ttm_buffer_object *bo,
  
  	amdgpu_res_first(&bo->mem, offset, len, &cursor);

while (cursor.remaining) {
-   uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
-   uint64_t bytes = 4 - (cursor.start & 3);
-   uint32_t shift = (cursor.start & 3) * 8;
-   uint32_t mask = 0x << shift;
-
-   if (cursor.size < bytes) {
-   mask &= 0x >> (bytes - cursor.size) * 8;
-   bytes = cursor.size;
-   }
-
-   if (mask != 0x) {
-   spin_lock_irqsave(&adev->mmio_idx_lock, flags);
-   WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 
0x8000);
-   WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
-   value = RREG32_NO_KIQ(mmMM_DATA);
-   if (write) {
-   value &= ~mask;
-   value |= (*(uint32_t *)buf << shift) & mask;
-   WREG32_NO_KIQ(mmMM_DATA, value);
-   }
-   spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
-   if (!write) {
-   value = (value & mask) >> shift;
-   memcpy(buf, &value, bytes);
-   }
-   } else {
-   bytes = cursor.size & ~0x3ULL;
-   amdgpu_device_vram_access(adev, cursor.start,
- (uint32_t *)buf, bytes,
- write);
-  

[RFC PATCH v2 3/3] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory()

2021-07-16 Thread Kevin Wang
1. using vram aper to access vram if possible
2. avoid MM_INDEX/MM_DATA is not working when mmio protect feature is
enabled.

Signed-off-by: Kevin Wang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 91 +++--
 1 file changed, 54 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index f4ff3c9350b3..62ea5089b4f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1407,6 +1407,56 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct 
ttm_buffer_object *bo,
return ttm_bo_eviction_valuable(bo, place);
 }
 
+static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write)
+{
+   while (size) {
+   uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
+   uint64_t bytes = 4 - (pos & 0x3);
+   uint32_t shift = (pos & 0x3) * 8;
+   uint32_t mask = 0x << shift;
+   uint32_t value = 0;
+
+   if (size < bytes) {
+   mask &= 0x >> (bytes - size) * 8;
+   bytes = size;
+   }
+
+   if (mask != 0x) {
+   amdgpu_device_mm_access(adev, aligned_pos, &value, 4, 
false);
+   if (write) {
+   value &= ~mask;
+   value |= (*(uint32_t *)buf << shift) & mask;
+   amdgpu_device_mm_access(adev, aligned_pos, 
&value, 4, true);
+   } else {
+   value = (value & mask) >> shift;
+   memcpy(buf, &value, bytes);
+   }
+   } else {
+   amdgpu_device_mm_access(adev, aligned_pos, buf, 4, 
write);
+   }
+
+   pos += bytes;
+   buf += bytes;
+   size -= bytes;
+   }
+}
+
+static void amdgpu_ttm_vram_access(struct amdgpu_device *adev, loff_t pos,
+  void *buf, size_t size, bool write)
+{
+   size_t count;
+
+   count = amdgpu_device_aper_access(adev, pos, buf, size, write);
+   size -= count;
+   if (size) {
+   /* using MM to access rest vram and handle un-aligned address */
+   pos += count;
+   buf += count;
+   amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);
+   }
+}
+
 /**
  * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
  *
@@ -1426,8 +1476,6 @@ static int amdgpu_ttm_access_memory(struct 
ttm_buffer_object *bo,
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
struct amdgpu_res_cursor cursor;
-   unsigned long flags;
-   uint32_t value = 0;
int ret = 0;
 
if (bo->mem.mem_type != TTM_PL_VRAM)
@@ -1435,41 +1483,10 @@ static int amdgpu_ttm_access_memory(struct 
ttm_buffer_object *bo,
 
amdgpu_res_first(&bo->mem, offset, len, &cursor);
while (cursor.remaining) {
-   uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
-   uint64_t bytes = 4 - (cursor.start & 3);
-   uint32_t shift = (cursor.start & 3) * 8;
-   uint32_t mask = 0x << shift;
-
-   if (cursor.size < bytes) {
-   mask &= 0x >> (bytes - cursor.size) * 8;
-   bytes = cursor.size;
-   }
-
-   if (mask != 0x) {
-   spin_lock_irqsave(&adev->mmio_idx_lock, flags);
-   WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 
0x8000);
-   WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
-   value = RREG32_NO_KIQ(mmMM_DATA);
-   if (write) {
-   value &= ~mask;
-   value |= (*(uint32_t *)buf << shift) & mask;
-   WREG32_NO_KIQ(mmMM_DATA, value);
-   }
-   spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
-   if (!write) {
-   value = (value & mask) >> shift;
-   memcpy(buf, &value, bytes);
-   }
-   } else {
-   bytes = cursor.size & ~0x3ULL;
-   amdgpu_device_vram_access(adev, cursor.start,
- (uint32_t *)buf, bytes,
- write);
-   }
-
-   ret += bytes;
-   buf = (uint8_t *)buf + bytes;
-   amdgpu_res_next(&cursor, bytes);
+   amdgpu_ttm_vram_access(adev, cursor.start, buf, cursor.si