Re: 答复: [PATCH 2/3] drm/amdgpu:implement META-DATA write routines
That is perfectly fine as long as you add the code using it in the same set of patches. What we should avoid is adding a lot of code and then not using for for quite some time, that get certainly removed sooner or later. Regards, Christian. Am 16.01.2017 um 09:20 schrieb Liu, Monk: if ring_write_multiple is organized in a separate patch, doesn't it introduces an function that no client using it ?? fine by me although ... BR Monk *发件人:* Christian König <deathsim...@vodafone.de> *发送时间:* 2017年1月12日 20:27:48 *收件人:* Liu, Monk; amd-gfx@lists.freedesktop.org *主题:* Re: [PATCH 2/3] drm/amdgpu:implement META-DATA write routines Am 12.01.2017 um 08:41 schrieb Monk Liu: > Change-Id: I66007a7f7e4e27fb129121f36143dce3cfb43738 > Signed-off-by: Monk Liu <monk@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 31 ++ > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 61 +++ > 2 files changed, 92 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index e9983fb..2039da7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1599,6 +1599,37 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) >ring->count_dw--; > } > > +static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *src, int count_dw) > +{ > + unsigned occupied, chunk1, chunk2; > + void *dst; > + > + if (ring->count_dw < count_dw) > + DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); > + else { Coding style says when either the "if" or the "else" branch uses "{" and "}" both should use them. I think even better would be to use a return statement in the "if", cause this is just checking the prerequisites for errors. Additional to that adding this function should be a separate patch. Christian. > + occupied = ring->wptr & ring->ptr_mask; > + dst = (void *)>ring[occupied]; > + chunk1 = ring->ptr_mask + 1 - occupied; > + chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1; > + chunk2 = count_dw - chunk1; > + chunk1 <<= 2; > + chunk2 <<= 2; > + if (chunk1) { > + memcpy(dst, src, chunk1); > + } > + > + if (chunk2) { > + src += chunk1; > + dst = (void *)ring->ring; > + memcpy(dst, src, chunk2); > + } > + > + ring->wptr += count_dw; > + ring->wptr &= ring->ptr_mask; > + ring->count_dw -= count_dw; > + } > +} > + > static inline struct amdgpu_sdma_instance * > amdgpu_get_sdma_instance(struct amdgpu_ring *ring) > { > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > index 375784d..3e8cff3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > @@ -657,6 +657,8 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); > static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); > static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); > static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); > +static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr); > +static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr); > > static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) > { > @@ -7212,3 +7214,62 @@ const struct amdgpu_ip_block_version gfx_v8_1_ip_block = >.rev = 0, >.funcs = _v8_0_ip_funcs, > }; > + > +static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr) > +{ > + uint64_t ce_payload_addr; > + int cnt_ce; > + static union { > + struct amdgpu_ce_ib_state regular; > + struct amdgpu_ce_ib_state_chained_ib chained; > + } ce_payload = {0}; > + > + if (ring->adev->virt.chained_ib_support) { > + ce_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data_chained_ib, ce_payload); > + cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; > + } else { > + ce_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data, ce_payload); > + cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; > +
答复: [PATCH 2/3] drm/amdgpu:implement META-DATA write routines
if ring_write_multiple is organized in a separate patch, doesn't it introduces an function that no client using it ?? fine by me although ... BR Monk 发件人: Christian König <deathsim...@vodafone.de> 发送时间: 2017年1月12日 20:27:48 收件人: Liu, Monk; amd-gfx@lists.freedesktop.org 主题: Re: [PATCH 2/3] drm/amdgpu:implement META-DATA write routines Am 12.01.2017 um 08:41 schrieb Monk Liu: > Change-Id: I66007a7f7e4e27fb129121f36143dce3cfb43738 > Signed-off-by: Monk Liu <monk@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 31 ++ > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 61 > +++ > 2 files changed, 92 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index e9983fb..2039da7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1599,6 +1599,37 @@ static inline void amdgpu_ring_write(struct > amdgpu_ring *ring, uint32_t v) >ring->count_dw--; > } > > +static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void > *src, int count_dw) > +{ > + unsigned occupied, chunk1, chunk2; > + void *dst; > + > + if (ring->count_dw < count_dw) > + DRM_ERROR("amdgpu: writing more dwords to the ring than > expected!\n"); > + else { Coding style says when either the "if" or the "else" branch uses "{" and "}" both should use them. I think even better would be to use a return statement in the "if", cause this is just checking the prerequisites for errors. Additional to that adding this function should be a separate patch. Christian. > + occupied = ring->wptr & ring->ptr_mask; > + dst = (void *)>ring[occupied]; > + chunk1 = ring->ptr_mask + 1 - occupied; > + chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1; > + chunk2 = count_dw - chunk1; > + chunk1 <<= 2; > + chunk2 <<= 2; > + if (chunk1) { > + memcpy(dst, src, chunk1); > + } > + > + if (chunk2) { > + src += chunk1; > + dst = (void *)ring->ring; > + memcpy(dst, src, chunk2); > + } > + > + ring->wptr += count_dw; > + ring->wptr &= ring->ptr_mask; > + ring->count_dw -= count_dw; > + } > +} > + > static inline struct amdgpu_sdma_instance * > amdgpu_get_sdma_instance(struct amdgpu_ring *ring) > { > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > index 375784d..3e8cff3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > @@ -657,6 +657,8 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device > *adev); > static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); > static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); > static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); > +static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, > uint64_t addr); > +static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, > uint64_t addr); > > static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) > { > @@ -7212,3 +7214,62 @@ const struct amdgpu_ip_block_version gfx_v8_1_ip_block > = >.rev = 0, >.funcs = _v8_0_ip_funcs, > }; > + > +static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, > uint64_t csa_addr) > +{ > + uint64_t ce_payload_addr; > + int cnt_ce; > + static union { > + struct amdgpu_ce_ib_state regular; > + struct amdgpu_ce_ib_state_chained_ib chained; > + } ce_payload = {0}; > + > + if (ring->adev->virt.chained_ib_support) { > + ce_payload_addr = csa_addr + offsetof(struct > amdgpu_gfx_meta_data_chained_ib, ce_payload); > + cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; > + } else { > + ce_payload_addr = csa_addr + offsetof(struct > amdgpu_gfx_meta_data, ce_payload); > + cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; > + } > + > + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce)); > + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | > + WRITE_DATA_DST_SEL(8) | > + WR_CONFIRM) | > + WRITE_DATA_CACHE_POLI
Re: [PATCH 2/3] drm/amdgpu:implement META-DATA write routines
On Thu, Jan 12, 2017 at 2:41 AM, Monk Liuwrote: > Change-Id: I66007a7f7e4e27fb129121f36143dce3cfb43738 > Signed-off-by: Monk Liu > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 31 ++ > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 61 > +++ > 2 files changed, 92 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index e9983fb..2039da7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1599,6 +1599,37 @@ static inline void amdgpu_ring_write(struct > amdgpu_ring *ring, uint32_t v) > ring->count_dw--; > } > > +static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void > *src, int count_dw) > +{ > + unsigned occupied, chunk1, chunk2; > + void *dst; > + > + if (ring->count_dw < count_dw) > + DRM_ERROR("amdgpu: writing more dwords to the ring than > expected!\n"); > + else { Parens coding style as noted by Christian. Also, this behavior differs from amdgpu_ring_write() which completes the writes anyway. > + occupied = ring->wptr & ring->ptr_mask; > + dst = (void *)>ring[occupied]; > + chunk1 = ring->ptr_mask + 1 - occupied; > + chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1; > + chunk2 = count_dw - chunk1; > + chunk1 <<= 2; > + chunk2 <<= 2; > + if (chunk1) { > + memcpy(dst, src, chunk1); > + } > + > + if (chunk2) { > + src += chunk1; > + dst = (void *)ring->ring; > + memcpy(dst, src, chunk2); > + } > + > + ring->wptr += count_dw; > + ring->wptr &= ring->ptr_mask; > + ring->count_dw -= count_dw; > + } > +} > + This hunk should be a separate patch. > static inline struct amdgpu_sdma_instance * > amdgpu_get_sdma_instance(struct amdgpu_ring *ring) > { > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > index 375784d..3e8cff3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > @@ -657,6 +657,8 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device > *adev); > static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); > static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); > static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); > +static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, > uint64_t addr); > +static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, > uint64_t addr); > > static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) > { > @@ -7212,3 +7214,62 @@ const struct amdgpu_ip_block_version gfx_v8_1_ip_block > = > .rev = 0, > .funcs = _v8_0_ip_funcs, > }; > + > +static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, > uint64_t csa_addr) > +{ > + uint64_t ce_payload_addr; > + int cnt_ce; > + static union { > + struct amdgpu_ce_ib_state regular; > + struct amdgpu_ce_ib_state_chained_ib chained; > + } ce_payload = {0}; > + > + if (ring->adev->virt.chained_ib_support) { > + ce_payload_addr = csa_addr + offsetof(struct > amdgpu_gfx_meta_data_chained_ib, ce_payload); > + cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; > + } else { > + ce_payload_addr = csa_addr + offsetof(struct > amdgpu_gfx_meta_data, ce_payload); > + cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; > + } > + > + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce)); > + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | > + WRITE_DATA_DST_SEL(8) | > + WR_CONFIRM) | > + WRITE_DATA_CACHE_POLICY(0)); > + amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr)); > + amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr)); > + amdgpu_ring_write_multiple(ring, (void *)_payload, cnt_ce - 2); > +} > + > +static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, > uint64_t csa_addr) > +{ > + uint64_t de_payload_addr, gds_addr; > + int cnt_de; > + static union { > + struct amdgpu_de_ib_state regular; > + struct amdgpu_de_ib_state_chained_ib chained; > + } de_payload = {0}; > + > + gds_addr = csa_addr + 4096; > + if (ring->adev->virt.chained_ib_support) { > + de_payload.chained.gds_backup_addrlo = > lower_32_bits(gds_addr); > + de_payload.chained.gds_backup_addrhi = > upper_32_bits(gds_addr); > + de_payload_addr = csa_addr +
[PATCH 2/3] drm/amdgpu:implement META-DATA write routines
Change-Id: I66007a7f7e4e27fb129121f36143dce3cfb43738 Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 31 ++ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 61 +++ 2 files changed, 92 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e9983fb..2039da7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1599,6 +1599,37 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) ring->count_dw--; } +static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *src, int count_dw) +{ + unsigned occupied, chunk1, chunk2; + void *dst; + + if (ring->count_dw < count_dw) + DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); + else { + occupied = ring->wptr & ring->ptr_mask; + dst = (void *)>ring[occupied]; + chunk1 = ring->ptr_mask + 1 - occupied; + chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1; + chunk2 = count_dw - chunk1; + chunk1 <<= 2; + chunk2 <<= 2; + if (chunk1) { + memcpy(dst, src, chunk1); + } + + if (chunk2) { + src += chunk1; + dst = (void *)ring->ring; + memcpy(dst, src, chunk2); + } + + ring->wptr += count_dw; + ring->wptr &= ring->ptr_mask; + ring->count_dw -= count_dw; + } +} + static inline struct amdgpu_sdma_instance * amdgpu_get_sdma_instance(struct amdgpu_ring *ring) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 375784d..3e8cff3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -657,6 +657,8 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); +static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr); +static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr); static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) { @@ -7212,3 +7214,62 @@ const struct amdgpu_ip_block_version gfx_v8_1_ip_block = .rev = 0, .funcs = _v8_0_ip_funcs, }; + +static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr) +{ + uint64_t ce_payload_addr; + int cnt_ce; + static union { + struct amdgpu_ce_ib_state regular; + struct amdgpu_ce_ib_state_chained_ib chained; + } ce_payload = {0}; + + if (ring->adev->virt.chained_ib_support) { + ce_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data_chained_ib, ce_payload); + cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; + } else { + ce_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data, ce_payload); + cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | + WRITE_DATA_DST_SEL(8) | + WR_CONFIRM) | + WRITE_DATA_CACHE_POLICY(0)); + amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr)); + amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr)); + amdgpu_ring_write_multiple(ring, (void *)_payload, cnt_ce - 2); +} + +static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr) +{ + uint64_t de_payload_addr, gds_addr; + int cnt_de; + static union { + struct amdgpu_de_ib_state regular; + struct amdgpu_de_ib_state_chained_ib chained; + } de_payload = {0}; + + gds_addr = csa_addr + 4096; + if (ring->adev->virt.chained_ib_support) { + de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); + de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr); + de_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data_chained_ib, de_payload); + cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2; + } else { + de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr); + de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr); + de_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data, de_payload); + cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2; + } + +