Re: 答复: [PATCH 2/3] drm/amdgpu:implement META-DATA write routines

2017-01-16 Thread Christian König
That is perfectly fine as long as you add the code using it in the same 
set of patches.


What we should avoid is adding a lot of code and then not using for for 
quite some time, that get certainly removed sooner or later.


Regards,
Christian.

Am 16.01.2017 um 09:20 schrieb Liu, Monk:


if ring_write_multiple is organized in a separate patch, doesn't it 
introduces an function that no client using it ??



fine by me although ...


BR Monk


*发件人:* Christian König <deathsim...@vodafone.de>
*发送时间:* 2017年1月12日 20:27:48
*收件人:* Liu, Monk; amd-gfx@lists.freedesktop.org
*主题:* Re: [PATCH 2/3] drm/amdgpu:implement META-DATA write routines
Am 12.01.2017 um 08:41 schrieb Monk Liu:
> Change-Id: I66007a7f7e4e27fb129121f36143dce3cfb43738
> Signed-off-by: Monk Liu <monk@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 31 ++
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 61 
+++

>   2 files changed, 92 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

> index e9983fb..2039da7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1599,6 +1599,37 @@ static inline void amdgpu_ring_write(struct 
amdgpu_ring *ring, uint32_t v)

>ring->count_dw--;
>   }
>
> +static inline void amdgpu_ring_write_multiple(struct amdgpu_ring 
*ring, void *src, int count_dw)

> +{
> + unsigned occupied, chunk1, chunk2;
> + void *dst;
> +
> + if (ring->count_dw < count_dw)
> + DRM_ERROR("amdgpu: writing more dwords to the ring 
than expected!\n");

> + else {

Coding style says when either the "if" or the "else" branch uses "{" and
"}" both should use them.

I think even better would be to use a return statement in the "if",
cause this is just checking the prerequisites for errors.

Additional to that adding this function should be a separate patch.

Christian.

> + occupied = ring->wptr & ring->ptr_mask;
> + dst = (void *)>ring[occupied];
> + chunk1 = ring->ptr_mask + 1 - occupied;
> + chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
> + chunk2 = count_dw - chunk1;
> + chunk1 <<= 2;
> + chunk2 <<= 2;
> + if (chunk1) {
> + memcpy(dst, src, chunk1);
> + }
> +
> + if (chunk2) {
> + src += chunk1;
> + dst = (void *)ring->ring;
> + memcpy(dst, src, chunk2);
> + }
> +
> + ring->wptr += count_dw;
> + ring->wptr &= ring->ptr_mask;
> + ring->count_dw -= count_dw;
> + }
> +}
> +
>   static inline struct amdgpu_sdma_instance *
>   amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
>   {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c

> index 375784d..3e8cff3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -657,6 +657,8 @@ static void gfx_v8_0_set_gds_init(struct 
amdgpu_device *adev);

>   static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
>   static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
>   static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
> +static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring 
*ring, uint64_t addr);
> +static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring 
*ring, uint64_t addr);

>
>   static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
>   {
> @@ -7212,3 +7214,62 @@ const struct amdgpu_ip_block_version 
gfx_v8_1_ip_block =

>.rev = 0,
>.funcs = _v8_0_ip_funcs,
>   };
> +
> +static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring 
*ring, uint64_t csa_addr)

> +{
> + uint64_t ce_payload_addr;
> + int cnt_ce;
> + static union {
> + struct amdgpu_ce_ib_state regular;
> + struct amdgpu_ce_ib_state_chained_ib chained;
> + } ce_payload = {0};
> +
> + if (ring->adev->virt.chained_ib_support) {
> + ce_payload_addr = csa_addr + offsetof(struct 
amdgpu_gfx_meta_data_chained_ib, ce_payload);

> + cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
> + } else {
> + ce_payload_addr = csa_addr + offsetof(struct 
amdgpu_gfx_meta_data, ce_payload);

> + cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
> +

答复: [PATCH 2/3] drm/amdgpu:implement META-DATA write routines

2017-01-16 Thread Liu, Monk
if ring_write_multiple is organized in a separate patch, doesn't it introduces 
an function that no client using it ??


fine by me although ...


BR Monk


发件人: Christian König <deathsim...@vodafone.de>
发送时间: 2017年1月12日 20:27:48
收件人: Liu, Monk; amd-gfx@lists.freedesktop.org
主题: Re: [PATCH 2/3] drm/amdgpu:implement META-DATA write routines

Am 12.01.2017 um 08:41 schrieb Monk Liu:
> Change-Id: I66007a7f7e4e27fb129121f36143dce3cfb43738
> Signed-off-by: Monk Liu <monk@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 31 ++
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 61 
> +++
>   2 files changed, 92 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e9983fb..2039da7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1599,6 +1599,37 @@ static inline void amdgpu_ring_write(struct 
> amdgpu_ring *ring, uint32_t v)
>ring->count_dw--;
>   }
>
> +static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void 
> *src, int count_dw)
> +{
> + unsigned occupied, chunk1, chunk2;
> + void *dst;
> +
> + if (ring->count_dw < count_dw)
> + DRM_ERROR("amdgpu: writing more dwords to the ring than 
> expected!\n");
> + else {

Coding style says when either the "if" or the "else" branch uses "{" and
"}" both should use them.

I think even better would be to use a return statement in the "if",
cause this is just checking the prerequisites for errors.

Additional to that adding this function should be a separate patch.

Christian.

> + occupied = ring->wptr & ring->ptr_mask;
> + dst = (void *)>ring[occupied];
> + chunk1 = ring->ptr_mask + 1 - occupied;
> + chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
> + chunk2 = count_dw - chunk1;
> + chunk1 <<= 2;
> + chunk2 <<= 2;
> + if (chunk1) {
> + memcpy(dst, src, chunk1);
> + }
> +
> + if (chunk2) {
> + src += chunk1;
> + dst = (void *)ring->ring;
> + memcpy(dst, src, chunk2);
> + }
> +
> + ring->wptr += count_dw;
> + ring->wptr &= ring->ptr_mask;
> + ring->count_dw -= count_dw;
> + }
> +}
> +
>   static inline struct amdgpu_sdma_instance *
>   amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
>   {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 375784d..3e8cff3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -657,6 +657,8 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device 
> *adev);
>   static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
>   static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
>   static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
> +static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, 
> uint64_t addr);
> +static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, 
> uint64_t addr);
>
>   static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
>   {
> @@ -7212,3 +7214,62 @@ const struct amdgpu_ip_block_version gfx_v8_1_ip_block 
> =
>.rev = 0,
>.funcs = _v8_0_ip_funcs,
>   };
> +
> +static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, 
> uint64_t csa_addr)
> +{
> + uint64_t ce_payload_addr;
> + int cnt_ce;
> + static union {
> + struct amdgpu_ce_ib_state regular;
> + struct amdgpu_ce_ib_state_chained_ib chained;
> + } ce_payload = {0};
> +
> + if (ring->adev->virt.chained_ib_support) {
> + ce_payload_addr = csa_addr + offsetof(struct 
> amdgpu_gfx_meta_data_chained_ib, ce_payload);
> + cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
> + } else {
> + ce_payload_addr = csa_addr + offsetof(struct 
> amdgpu_gfx_meta_data, ce_payload);
> + cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
> + }
> +
> + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
> + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
> + WRITE_DATA_DST_SEL(8) |
> + WR_CONFIRM) |
> + WRITE_DATA_CACHE_POLI

Re: [PATCH 2/3] drm/amdgpu:implement META-DATA write routines

2017-01-13 Thread Alex Deucher
On Thu, Jan 12, 2017 at 2:41 AM, Monk Liu  wrote:
> Change-Id: I66007a7f7e4e27fb129121f36143dce3cfb43738
> Signed-off-by: Monk Liu 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 31 ++
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 61 
> +++
>  2 files changed, 92 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e9983fb..2039da7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1599,6 +1599,37 @@ static inline void amdgpu_ring_write(struct 
> amdgpu_ring *ring, uint32_t v)
> ring->count_dw--;
>  }
>
> +static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void 
> *src, int count_dw)
> +{
> +   unsigned occupied, chunk1, chunk2;
> +   void *dst;
> +
> +   if (ring->count_dw < count_dw)
> +   DRM_ERROR("amdgpu: writing more dwords to the ring than 
> expected!\n");
> +   else {

Parens coding style as noted by Christian.  Also, this behavior
differs from amdgpu_ring_write() which completes the writes anyway.

> +   occupied = ring->wptr & ring->ptr_mask;
> +   dst = (void *)>ring[occupied];
> +   chunk1 = ring->ptr_mask + 1 - occupied;
> +   chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
> +   chunk2 = count_dw - chunk1;
> +   chunk1 <<= 2;
> +   chunk2 <<= 2;
> +   if (chunk1) {
> +   memcpy(dst, src, chunk1);
> +   }
> +
> +   if (chunk2) {
> +   src += chunk1;
> +   dst = (void *)ring->ring;
> +   memcpy(dst, src, chunk2);
> +   }
> +
> +   ring->wptr += count_dw;
> +   ring->wptr &= ring->ptr_mask;
> +   ring->count_dw -= count_dw;
> +   }
> +}
> +

This hunk should be a separate patch.


>  static inline struct amdgpu_sdma_instance *
>  amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
>  {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 375784d..3e8cff3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -657,6 +657,8 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device 
> *adev);
>  static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
>  static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
>  static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
> +static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, 
> uint64_t addr);
> +static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, 
> uint64_t addr);
>
>  static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
>  {
> @@ -7212,3 +7214,62 @@ const struct amdgpu_ip_block_version gfx_v8_1_ip_block 
> =
> .rev = 0,
> .funcs = _v8_0_ip_funcs,
>  };
> +
> +static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, 
> uint64_t csa_addr)
> +{
> +   uint64_t ce_payload_addr;
> +   int cnt_ce;
> +   static union {
> +   struct amdgpu_ce_ib_state regular;
> +   struct amdgpu_ce_ib_state_chained_ib chained;
> +   } ce_payload = {0};
> +
> +   if (ring->adev->virt.chained_ib_support) {
> +   ce_payload_addr = csa_addr + offsetof(struct 
> amdgpu_gfx_meta_data_chained_ib, ce_payload);
> +   cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
> +   } else {
> +   ce_payload_addr = csa_addr + offsetof(struct 
> amdgpu_gfx_meta_data, ce_payload);
> +   cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
> +   }
> +
> +   amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
> +   amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
> +   WRITE_DATA_DST_SEL(8) |
> +   WR_CONFIRM) |
> +   WRITE_DATA_CACHE_POLICY(0));
> +   amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
> +   amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
> +   amdgpu_ring_write_multiple(ring, (void *)_payload, cnt_ce - 2);
> +}
> +
> +static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, 
> uint64_t csa_addr)
> +{
> +   uint64_t de_payload_addr, gds_addr;
> +   int cnt_de;
> +   static union {
> +   struct amdgpu_de_ib_state regular;
> +   struct amdgpu_de_ib_state_chained_ib chained;
> +   } de_payload = {0};
> +
> +   gds_addr = csa_addr + 4096;
> +   if (ring->adev->virt.chained_ib_support) {
> +   de_payload.chained.gds_backup_addrlo = 
> lower_32_bits(gds_addr);
> +   de_payload.chained.gds_backup_addrhi = 
> upper_32_bits(gds_addr);
> +   de_payload_addr = csa_addr + 

[PATCH 2/3] drm/amdgpu:implement META-DATA write routines

2017-01-11 Thread Monk Liu
Change-Id: I66007a7f7e4e27fb129121f36143dce3cfb43738
Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 31 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 61 +++
 2 files changed, 92 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e9983fb..2039da7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1599,6 +1599,37 @@ static inline void amdgpu_ring_write(struct amdgpu_ring 
*ring, uint32_t v)
ring->count_dw--;
 }
 
+static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void 
*src, int count_dw)
+{
+   unsigned occupied, chunk1, chunk2;
+   void *dst;
+
+   if (ring->count_dw < count_dw)
+   DRM_ERROR("amdgpu: writing more dwords to the ring than 
expected!\n");
+   else {
+   occupied = ring->wptr & ring->ptr_mask;
+   dst = (void *)>ring[occupied];
+   chunk1 = ring->ptr_mask + 1 - occupied;
+   chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
+   chunk2 = count_dw - chunk1;
+   chunk1 <<= 2;
+   chunk2 <<= 2;
+   if (chunk1) {
+   memcpy(dst, src, chunk1);
+   }
+
+   if (chunk2) {
+   src += chunk1;
+   dst = (void *)ring->ring;
+   memcpy(dst, src, chunk2);
+   }
+
+   ring->wptr += count_dw;
+   ring->wptr &= ring->ptr_mask;
+   ring->count_dw -= count_dw;
+   }
+}
+
 static inline struct amdgpu_sdma_instance *
 amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 375784d..3e8cff3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -657,6 +657,8 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device 
*adev);
 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
+static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t 
addr);
+static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t 
addr);
 
 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 {
@@ -7212,3 +7214,62 @@ const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
.rev = 0,
.funcs = _v8_0_ip_funcs,
 };
+
+static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t 
csa_addr)
+{
+   uint64_t ce_payload_addr;
+   int cnt_ce;
+   static union {
+   struct amdgpu_ce_ib_state regular;
+   struct amdgpu_ce_ib_state_chained_ib chained;
+   } ce_payload = {0};
+
+   if (ring->adev->virt.chained_ib_support) {
+   ce_payload_addr = csa_addr + offsetof(struct 
amdgpu_gfx_meta_data_chained_ib, ce_payload);
+   cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
+   } else {
+   ce_payload_addr = csa_addr + offsetof(struct 
amdgpu_gfx_meta_data, ce_payload);
+   cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
+   }
+
+   amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
+   amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
+   WRITE_DATA_DST_SEL(8) |
+   WR_CONFIRM) |
+   WRITE_DATA_CACHE_POLICY(0));
+   amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
+   amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
+   amdgpu_ring_write_multiple(ring, (void *)_payload, cnt_ce - 2);
+}
+
+static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t 
csa_addr)
+{
+   uint64_t de_payload_addr, gds_addr;
+   int cnt_de;
+   static union {
+   struct amdgpu_de_ib_state regular;
+   struct amdgpu_de_ib_state_chained_ib chained;
+   } de_payload = {0};
+
+   gds_addr = csa_addr + 4096;
+   if (ring->adev->virt.chained_ib_support) {
+   de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
+   de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
+   de_payload_addr = csa_addr + offsetof(struct 
amdgpu_gfx_meta_data_chained_ib, de_payload);
+   cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
+   } else {
+   de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
+   de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
+   de_payload_addr = csa_addr + offsetof(struct 
amdgpu_gfx_meta_data, de_payload);
+   cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
+   }
+
+