Avoid constant register reloads while emitting IBs by using a local write
pointer and only updating the size at the end of each helper.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursu...@igalia.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 108 +++++++++++++++----------
 1 file changed, 65 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 1c076bd1cf73..01b95d1c7419 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -927,16 +927,19 @@ static void sdma_v3_0_vm_copy_pte(struct amdgpu_ib *ib,
                                  uint64_t pe, uint64_t src,
                                  unsigned count)
 {
+       u32 *ptr = &ib->ptr[ib->length_dw];
        unsigned bytes = count * 8;
 
-       ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
-               SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
-       ib->ptr[ib->length_dw++] = bytes;
-       ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
-       ib->ptr[ib->length_dw++] = lower_32_bits(src);
-       ib->ptr[ib->length_dw++] = upper_32_bits(src);
-       ib->ptr[ib->length_dw++] = lower_32_bits(pe);
-       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+       *ptr++ = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+                SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+       *ptr++ = bytes;
+       *ptr++ = 0; /* src/dst endian swap */
+       *ptr++ = lower_32_bits(src);
+       *ptr++ = upper_32_bits(src);
+       *ptr++ = lower_32_bits(pe);
+       *ptr++ = upper_32_bits(pe);
+
+       ib->length_dw = ptr - ib->ptr;
 }
 
 /**
@@ -954,18 +957,21 @@ static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib, 
uint64_t pe,
                                   uint64_t value, unsigned count,
                                   uint32_t incr)
 {
+       u32 *ptr = &ib->ptr[ib->length_dw];
        unsigned ndw = count * 2;
 
-       ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
-               SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
-       ib->ptr[ib->length_dw++] = lower_32_bits(pe);
-       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-       ib->ptr[ib->length_dw++] = ndw;
+       *ptr++ = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+                SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+       *ptr++ = lower_32_bits(pe);
+       *ptr++ = upper_32_bits(pe);
+       *ptr++ = ndw;
        for (; ndw > 0; ndw -= 2) {
-               ib->ptr[ib->length_dw++] = lower_32_bits(value);
-               ib->ptr[ib->length_dw++] = upper_32_bits(value);
+               *ptr++ = lower_32_bits(value);
+               *ptr++ = upper_32_bits(value);
                value += incr;
        }
+
+       ib->length_dw = ptr - ib->ptr;
 }
 
 /**
@@ -984,17 +990,21 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib 
*ib, uint64_t pe,
                                     uint64_t addr, unsigned count,
                                     uint32_t incr, uint64_t flags)
 {
+       u32 *ptr = &ib->ptr[ib->length_dw];
+
        /* for physically contiguous pages (vram) */
-       ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
-       ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
-       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-       ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
-       ib->ptr[ib->length_dw++] = upper_32_bits(flags);
-       ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
-       ib->ptr[ib->length_dw++] = upper_32_bits(addr);
-       ib->ptr[ib->length_dw++] = incr; /* increment size */
-       ib->ptr[ib->length_dw++] = 0;
-       ib->ptr[ib->length_dw++] = count; /* number of entries */
+       *ptr++ = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
+       *ptr++ = lower_32_bits(pe); /* dst addr */
+       *ptr++ = upper_32_bits(pe);
+       *ptr++ = lower_32_bits(flags); /* mask */
+       *ptr++ = upper_32_bits(flags);
+       *ptr++ = lower_32_bits(addr); /* value */
+       *ptr++ = upper_32_bits(addr);
+       *ptr++ = incr; /* increment size */
+       *ptr++ = 0;
+       *ptr++ = count; /* number of entries */
+
+       ib->length_dw = ptr - ib->ptr;
 }
 
 /**
@@ -1007,18 +1017,22 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib 
*ib, uint64_t pe,
 static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib 
*ib)
 {
        struct amdgpu_sdma_instance *sdma = 
amdgpu_sdma_get_instance_from_ring(ring);
+       u32 *ptr = &ib->ptr[ib->length_dw];
        u32 pad_count;
        int i;
 
        pad_count = (-ib->length_dw) & 7;
+       if (!pad_count)
+               return;
+
        for (i = 0; i < pad_count; i++)
                if (sdma && sdma->burst_nop && (i == 0))
-                       ib->ptr[ib->length_dw++] =
-                               SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
-                               SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+                       *ptr++ = SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
+                                SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
                else
-                       ib->ptr[ib->length_dw++] =
-                               SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
+                       *ptr++ = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
+
+       ib->length_dw = pad_count;
 }
 
 /**
@@ -1626,14 +1640,18 @@ static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib 
*ib,
                                       uint32_t byte_count,
                                       uint32_t copy_flags)
 {
-       ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
-               SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
-       ib->ptr[ib->length_dw++] = byte_count;
-       ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
-       ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
-       ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
-       ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
-       ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+       u32 *ptr = &ib->ptr[ib->length_dw];
+
+       *ptr++ = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+                SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+       *ptr++ = byte_count;
+       *ptr++ = 0; /* src/dst endian swap */
+       *ptr++ = lower_32_bits(src_offset);
+       *ptr++ = upper_32_bits(src_offset);
+       *ptr++ = lower_32_bits(dst_offset);
+       *ptr++ = upper_32_bits(dst_offset);
+
+       ib->length_dw = ptr - ib->ptr;
 }
 
 /**
@@ -1651,11 +1669,15 @@ static void sdma_v3_0_emit_fill_buffer(struct amdgpu_ib 
*ib,
                                       uint64_t dst_offset,
                                       uint32_t byte_count)
 {
-       ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
-       ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
-       ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
-       ib->ptr[ib->length_dw++] = src_data;
-       ib->ptr[ib->length_dw++] = byte_count;
+       u32 *ptr = &ib->ptr[ib->length_dw];
+
+       *ptr++ = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
+       *ptr++ = lower_32_bits(dst_offset);
+       *ptr++ = upper_32_bits(dst_offset);
+       *ptr++ = src_data;
+       *ptr++ = byte_count;
+
+       ib->length_dw = ptr - ib->ptr;
 }
 
 static const struct amdgpu_buffer_funcs sdma_v3_0_buffer_funcs = {
-- 
2.48.0

Reply via email to