[PATCH] drm/amdgpu: GFX10: GRBM requires 1-cycle delay (v2)

2019-10-25 Thread Tuikov, Luben
The GRBM interface is now capable of bursting
1-cycle op per register, a WRITE followed by
another WRITE, or a WRITE followed by a READ--much
faster than previous muti-cycle per
completed-transaction interface. This causes a
problem, whereby status registers requiring a
read/write by hardware, have a 1-cycle delay, due
to the register update having to go through GRBM
interface.

This patch adds this delay.

A one cycle read op is added after updating the
invalidate request and before reading the
invalidate-ACK status.

See also commit
534991731cb5fa94b5519957646cf849ca10d17d.

v2: Remove GFX9 and apply only to SDMA ring.

Signed-off-by: Luben Tuikov 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 7 +++
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 6e1b25bd1fe7..dedd7e1ab2fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -346,6 +346,13 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct 
amdgpu_ring *ring,
 
amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
 
+   /* Insert a dummy read to delay one cycle after the write REQ,
+* and before the ACK inquiry.
+*/
+   if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA)
+   amdgpu_ring_emit_reg_wait(ring,
+ hub->vm_inv_eng0_req + eng, 0, 0);
+
/* wait for the invalidate to complete */
amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
  1 << vmid, 1 << vmid);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index b8fdb192f6d6..0c41b4fdc58b 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -1588,7 +1588,7 @@ static const struct amdgpu_ring_funcs 
sdma_v5_0_ring_funcs = {
6 + /* sdma_v5_0_ring_emit_pipeline_sync */
/* sdma_v5_0_ring_emit_vm_flush */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
-   SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+   SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, 
vm fence */
.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
.emit_ib = sdma_v5_0_ring_emit_ib,
-- 
2.23.0.385.gbc12974a89

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: GFX9, GFX10: GRBM requires 1-cycle delay

2019-10-25 Thread Tuikov, Luben
On 2019-10-25 12:19 p.m., Koenig, Christian wrote:
> Am 25.10.19 um 18:05 schrieb Alex Deucher:
>> On Fri, Oct 25, 2019 at 2:49 AM Koenig, Christian
>>  wrote:
>>> Am 24.10.19 um 23:16 schrieb Tuikov, Luben:
 The GRBM interface is now capable of bursting
 1-cycle op per register, a WRITE followed by
 another WRITE, or a WRITE followed by a READ--much
 faster than previous muti-cycle per
 completed-transaction interface. This causes a
 problem, whereby status registers requiring a
 read/write by hardware, have a 1-cycle delay, due
 to the register update having to go through GRBM
 interface.

 This patch adds this delay.

 A one cycle read op is added after updating the
 invalidate request and before reading the
 invalidate-ACK status.
>>> Please completely drop all changes for GFX9 since this patch will most
>>> likely break SRIOV.
>>>
>>> Additional to that please apply the workaround only to SDMA since the CP
>>> driven engines should handle that in firmware.

Thank you Christian for reviewing this patch.

This patch stirred quite a bit of noise. So, then, I'll go by
your last comment above--I suppose this is the desired way to go forward then?

Regards,
Luben


>> I think the CP only handles this in firmware if we use the new TLB
>> invalidation packet.  I don't think it applies it to general register
>> writes like we do.
> 
> No, on the CP we should use the combined write/wait command even if we 
> don't use the new specialized VM invalidate command. Everything else 
> won't work with SRIOV.
> 
> Even if we want to we can't insert an extra read in this combined 
> write/wait command. And if we split up the commands we would break SRIOV 
> once more.
> 
> So applying this workaround to the CP code doesn't make any sense at all.
> 
> The only TODO which I can see is that we maybe don't use the combined 
> write/wait command on Navi yet.
> 
> Christian.
> 
>>
>> Alex
>>
>>> Regards,
>>> Christian.
>>>
 See also commit
 534991731cb5fa94b5519957646cf849ca10d17d.

 Signed-off-by: Luben Tuikov 
 ---
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 ++--
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 4 ++--
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 9 +
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 8 
drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +-
5 files changed, 22 insertions(+), 5 deletions(-)

 diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
 b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
 index ac43b1af69e3..0042868dbd53 100644
 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
 +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
 @@ -5129,7 +5129,7 @@ static const struct amdgpu_ring_funcs 
 gfx_v10_0_ring_funcs_gfx = {
5 + /* COND_EXEC */
7 + /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
 - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
2 + /* VM_FLUSH */
8 + /* FENCE for VM_FLUSH */
20 + /* GDS switch */
 @@ -5182,7 +5182,7 @@ static const struct amdgpu_ring_funcs 
 gfx_v10_0_ring_funcs_compute = {
5 + /* hdp invalidate */
7 + /* gfx_v10_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
 - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
2 + /* gfx_v10_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, 
 vm fence */
.emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
 diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
 b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
 index 9fe95e7693d5..9a7a717208de 100644
 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
 +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
 @@ -6218,7 +6218,7 @@ static const struct amdgpu_ring_funcs 
 gfx_v9_0_ring_funcs_gfx = {
5 +  /* COND_EXEC */
7 +  /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
 - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
2 + /* VM_FLUSH */
8 +  /* FENCE for VM_FLUSH */
20 + /* GDS switch */
 @@ -6271,7 +6271,7 @@ static const struct amdgpu_ring_funcs 
 gfx_v9_0_ring_funcs_compute = {
5 + /* hdp invalidate */
7 + /* gfx_v9_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
 - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
2 + /* gfx_v9_0_ring_emit_vm_flush */
  

[PATCH] drm/amdgpu: simplify padding calculations (v2)

2019-10-25 Thread Tuikov, Luben
Simplify padding calculations.

v2: Comment update and spacing.

Signed-off-by: Luben Tuikov 
---
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c  |  4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 17 -
 5 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index c45304f1047c..4af9acc2dc4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -228,7 +228,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
u32 extra_bits = vmid & 0xf;
 
/* IB packet must end on a 8 DW boundary */
-   cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 
8);
+   cik_sdma_ring_insert_nop(ring, (4 - lower_32_bits(ring->wptr)) & 7);
 
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, 
extra_bits));
amdgpu_ring_write(ring, ib->gpu_addr & 0xffe0); /* base must be 32 
byte aligned */
@@ -811,7 +811,7 @@ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, 
struct amdgpu_ib *ib)
u32 pad_count;
int i;
 
-   pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+   pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index a10175838013..b6af67f6f214 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -255,7 +255,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 
/* IB packet must end on a 8 DW boundary */
-   sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) 
% 8);
+   sdma_v2_4_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
 
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -750,7 +750,7 @@ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, 
struct amdgpu_ib *ib
u32 pad_count;
int i;
 
-   pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+   pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 5f4e2c616241..cd3ebed46d05 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -429,7 +429,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 
/* IB packet must end on a 8 DW boundary */
-   sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) 
% 8);
+   sdma_v3_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
 
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -1021,7 +1021,7 @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring 
*ring, struct amdgpu_ib *ib
u32 pad_count;
int i;
 
-   pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+   pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 45bd538ba97e..8ce15056ee4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -698,7 +698,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 
/* IB packet must end on a 8 DW boundary */
-   sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) 
% 8);
+   sdma_v4_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
 
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -1580,7 +1580,7 @@ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring 
*ring, struct amdgpu_ib *ib
u32 pad_count;
int i;
 
-   pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+   pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 0c41b4fdc58b..d117bde3f29a 100644
--- 

Re: [PATCH 1/2] drm/amdgpu: add independent DMA-buf export v7

2019-10-25 Thread Alex Deucher
On Fri, Oct 25, 2019 at 8:16 AM Christian König
 wrote:
>
> Add an DMA-buf export implementation independent of the DRM helpers.
>
> This not only avoids the caching of DMA-buf mappings, but also
> allows us to use the new dynamic locking approach.
>
> This is also a prerequisite of unpinned DMA-buf handling.
>
> v2: fix unintended recursion, remove debugging leftovers
> v3: split out from unpinned DMA-buf work
> v4: rebase on top of new no_sgt_cache flag
> v5: fix some warnings by including amdgpu_dma_buf.h
> v6: fix locking for non amdgpu exports
> v7: rebased on new DMA-buf locking patch
>
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 172 +++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h |   1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |   1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |   1 +
>  4 files changed, 97 insertions(+), 78 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> index 61f108ec2b5c..f14b52cc7205 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> @@ -34,26 +34,11 @@
>  #include "amdgpu.h"
>  #include "amdgpu_display.h"
>  #include "amdgpu_gem.h"
> +#include "amdgpu_dma_buf.h"
>  #include 
>  #include 
>  #include 
>
> -/**
> - * amdgpu_gem_prime_get_sg_table - _driver.gem_prime_get_sg_table
> - * implementation
> - * @obj: GEM buffer object (BO)
> - *
> - * Returns:
> - * A scatter/gather table for the pinned pages of the BO's memory.
> - */
> -struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
> -{
> -   struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> -   int npages = bo->tbo.num_pages;
> -
> -   return drm_prime_pages_to_sg(bo->tbo.ttm->pages, npages);
> -}
> -
>  /**
>   * amdgpu_gem_prime_vmap - _buf_ops.vmap implementation
>   * @obj: GEM BO
> @@ -179,92 +164,126 @@ __dma_resv_make_exclusive(struct dma_resv *obj)
>  }
>
>  /**
> - * amdgpu_dma_buf_map_attach - _buf_ops.attach implementation
> - * @dma_buf: Shared DMA buffer
> + * amdgpu_dma_buf_attach - _buf_ops.attach implementation
> + *
> + * @dmabuf: DMA-buf where we attach to
> + * @attach: attachment to add
> + *
> + * Add the attachment as user to the exported DMA-buf.
> + */
> +static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
> +struct dma_buf_attachment *attach)
> +{
> +   struct drm_gem_object *obj = dmabuf->priv;
> +   struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> +   struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> +   int r;
> +
> +   if (attach->dev->driver == adev->dev->driver)
> +   return 0;
> +
> +   r = amdgpu_bo_reserve(bo, false);
> +   if (unlikely(r != 0))
> +   return r;
> +
> +   /*
> +* We only create shared fences for internal use, but importers
> +* of the dmabuf rely on exclusive fences for implicitly
> +* tracking write hazards. As any of the current fences may
> +* correspond to a write, we need to convert all existing
> +* fences on the reservation object into a single exclusive
> +* fence.
> +*/
> +   r = __dma_resv_make_exclusive(bo->tbo.base.resv);
> +   if (r)
> +   return r;
> +
> +   bo->prime_shared_count++;
> +   amdgpu_bo_unreserve(bo);
> +   return 0;
> +}
> +
> +/**
> + * amdgpu_dma_buf_detach - _buf_ops.detach implementation
> + *
> + * @dmabuf: DMA-buf where we remove the attachment from
> + * @attach: the attachment to remove
> + *
> + * Called when an attachment is removed from the DMA-buf.
> + */
> +static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
> + struct dma_buf_attachment *attach)
> +{
> +   struct drm_gem_object *obj = dmabuf->priv;
> +   struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> +   struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> +
> +   if (attach->dev->driver != adev->dev->driver && 
> bo->prime_shared_count)
> +   bo->prime_shared_count--;
> +}
> +
> +/**
> + * amdgpu_dma_buf_map - _buf_ops.map_dma_buf implementation
>   * @attach: DMA-buf attachment
> + * @dir: DMA direction
>   *
>   * Makes sure that the shared DMA buffer can be accessed by the target 
> device.
>   * For now, simply pins it to the GTT domain, where it should be accessible 
> by
>   * all DMA devices.
>   *
>   * Returns:
> - * 0 on success or a negative error code on failure.
> + * sg_table filled with the DMA addresses to use or ERR_PRT with negative 
> error
> + * code.
>   */
> -static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf,
> -struct dma_buf_attachment *attach)
> +static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
> +  enum dma_data_direction dir)
>  {
> + 

[PATCH] drm/amdgpu: simplify padding calculations

2019-10-25 Thread Tuikov, Luben
Simplify padding calculations.

Signed-off-by: Luben Tuikov 
---
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c  |  4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 17 -
 5 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index c45304f1047c..1ea9e18d6f08 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -228,7 +228,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
u32 extra_bits = vmid & 0xf;
 
/* IB packet must end on a 8 DW boundary */
-   cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 
8);
+   cik_sdma_ring_insert_nop(ring, (4-lower_32_bits(ring->wptr)) & 7);
 
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, 
extra_bits));
amdgpu_ring_write(ring, ib->gpu_addr & 0xffe0); /* base must be 32 
byte aligned */
@@ -811,7 +811,7 @@ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, 
struct amdgpu_ib *ib)
u32 pad_count;
int i;
 
-   pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+   pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index a10175838013..d340f179401a 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -255,7 +255,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 
/* IB packet must end on a 8 DW boundary */
-   sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) 
% 8);
+   sdma_v2_4_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
 
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -750,7 +750,7 @@ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, 
struct amdgpu_ib *ib
u32 pad_count;
int i;
 
-   pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+   pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 5f4e2c616241..5c3c310188b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -429,7 +429,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 
/* IB packet must end on a 8 DW boundary */
-   sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) 
% 8);
+   sdma_v3_0_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
 
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -1021,7 +1021,7 @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring 
*ring, struct amdgpu_ib *ib
u32 pad_count;
int i;
 
-   pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+   pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 45bd538ba97e..7c71c88e38a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -698,7 +698,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 
/* IB packet must end on a 8 DW boundary */
-   sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) 
% 8);
+   sdma_v4_0_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
 
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -1580,7 +1580,7 @@ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring 
*ring, struct amdgpu_ib *ib
u32 pad_count;
int i;
 
-   pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+   pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 0c41b4fdc58b..d117bde3f29a 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ 

[pull] radeon, amdgpu drm-next-5.5

2019-10-25 Thread Alex Deucher
Hi Dave, Daniel,

More new stuff for 5.5.  I tested a back merge and it was clean.

The following changes since commit 1cd4d9eead73c004d08a58536dc726bd172eaaec:

  drm/amdkfd: update for drmP.h removal (2019-10-09 12:04:48 -0500)

are available in the Git repository at:

  git://people.freedesktop.org/~agd5f/linux tags/drm-next-5.5-2019-10-25

for you to fetch changes up to 0e04ad7d1857670944786a8465930a049aaf995f:

  drm/amdgpu/powerplay: use local renoir array sizes for clock fetching 
(2019-10-25 16:48:14 -0400)


drm-next-5.5-2019-10-25:

amdgpu:
- BACO support for CI and VI asics
- Quick memory training support for navi
- MSI-X support
- RAS fixes
- Display AVI infoframe fixes
- Display ref clock fixes for renoir
- Fix number of audio endpoints in renoir
- Fix for discovery tables
- Powerplay fixes
- Documentation fixes
- Misc cleanups

radeon:
- revert a PPC fix which broke x86


Ahzo (1):
  drm/amd/display: add NULL checks for clock manager pointer

Alex Deucher (33):
  drm/amdgpu/ras: fix typos in documentation
  drm/amdgpu/ras: document the reboot ras option
  drm/amdgpu/powerplay: fix typo in mvdd table setup
  drm/amdgpu/swSMU/navi: add feature toggles for more things
  drm/amdgpu/display: clean up dcn2*_pp_smu functions
  Revert "drm/radeon: Fix EEH during kexec"
  drm/amdgpu: move pci_save_state into suspend path
  drm/amdgpu: move gpu reset out of amdgpu_device_suspend
  drm/amdgpu: simplify ATPX detection
  drm/amdgpu: remove in_baco_reset hack
  drm/amdgpu/soc15: add support for baco reset with swSMU
  drm/amdgpu: add new BIF 4.1 register for BACO
  drm/amdgpu: add new BIF 5.0 register for BACO
  drm/amdgpu: add new SMU 7.0.1 registers for BACO
  drm/amdgpu: add new SMU 7.1.2 registers for BACO
  drm/amdgpu: add new SMU 7.1.3 registers for BACO
  drm/amdgpu/powerplay: add core support for pre-SOC15 baco
  drm/amdgpu/powerplay: add support for BACO on tonga
  drm/amdgpu/powerplay: add support for BACO on Iceland
  drm/amdgpu/powerplay: add support for BACO on polaris
  drm/amdgpu/powerplay: add support for BACO on VegaM
  drm/amdgpu/powerplay: add support for BACO on Fiji
  drm/amdgpu/powerplay: add support for BACO on CI
  drm/amdgpu/powerplay: split out common smu7 BACO code
  drm/amdgpu/powerplay: wire up BACO to powerplay API for smu7
  drm/amdgpu: enable BACO reset for SMU7 based dGPUs (v2)
  drm/amdgpu/display: fix build when CONFIG_DRM_AMD_DC_DSC_SUPPORT=n
  drm/amdgpu/uvd6: fix allocation size in enc ring test (v2)
  drm/amdgpu/uvd7: fix allocation size in enc ring test (v2)
  drm/amdgpu/vcn: fix allocation size in enc ring test
  drm/amdgpu/vce: fix allocation size in enc ring test
  drm/amdgpu/vce: make some functions static
  drm/amdgpu/powerplay: use local renoir array sizes for clock fetching

Andrey Grodzovsky (1):
  dmr/amdgpu: Fix crash on SRIOV for ERREVENT_ATHUB_INTERRUPT interrupt.

Anthony Koo (1):
  drm/amd/display: 3.2.52

Aric Cyr (4):
  drm/amd/display: Update V_UPDATE whenever VSTARTUP changes
  drm/amd/display: Properly round nominal frequency for SPD
  drm/amd/display: 3.2.53
  drm/amd/display: 3.2.54

Bhawanpreet Lakha (14):
  drm/amd/display: Add DP_DPHY_INTERNAL_CTR regs
  drm/amd/display: Add DCN_BASE regs
  drm/amd/display: Add renoir hw_seq
  drm/amd/display: create dcn21_link_encoder files
  drm/amd/display: add REFCYC_PER_TRIP_TO_MEMORY programming
  drm/amd/display: fix incorrect page table address for renoir
  drm/amd/display: add detile buffer size for renoir
  drm/amd/display: update dcn21 hubbub registers
  drm/amd/display: update renoir bounding box and res_caps
  drm/amd/display: change PP_SM defs to 8
  drm/amd/display: handle "18" case in TruncToValidBPP
  drm/amd/display: use requested_dispclk_khz instead of clk
  drm/amd/display: handle dp is usb-c
  drm/amd/display: null check pp_smu clock table before using it

Charlene Liu (1):
  drm/amd/display: use vbios message to call smu for dpm level

Christian König (2):
  drm/amdgpu: fix error handling in amdgpu_bo_list_create
  drm/amdgpu: fix potential VM faults

Dan Carpenter (1):
  drm/amdgpu/vi: silence an uninitialized variable warning

David Galiffi (1):
  drm/amd/display: Fix dongle_caps containing stale information.

Dennis Li (3):
  drm/amdgpu: change to query the actual EDC counter
  drm/amd/include: add register define for VML2 and ATCL2
  drm/amdgpu: add RAS support for VML2 and ATCL2

Dmytro Laktyushkin (6):
  drm/amd/display: fix pipe re-assignment when odm present
  drm/amd/display: add renoir specific watermark range and clk helper
  drm/amd/display: enable hostvm based on roimmu active for dcn2.1
  

Re: [PATCH] dc.c:use kzalloc without test

2019-10-25 Thread Alex Deucher
Applied.  thanks!

Alex

On Wed, Oct 23, 2019 at 9:35 AM Harry Wentland  wrote:
>
> On 2019-10-23 4:32 a.m., zhongshiqi wrote:
> > dc.c:583:null check is needed after using kzalloc function
> >
> > Signed-off-by: zhongshiqi 
>
> Reviewed-by: Harry Wentland 
>
> Harry
>
> > ---
> >  drivers/gpu/drm/amd/display/dc/core/dc.c | 4 
> >  1 file changed, 4 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
> > b/drivers/gpu/drm/amd/display/dc/core/dc.c
> > index 5d1aded..4b8819c 100644
> > --- a/drivers/gpu/drm/amd/display/dc/core/dc.c
> > +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
> > @@ -580,6 +580,10 @@ static bool construct(struct dc *dc,
> >  #ifdef CONFIG_DRM_AMD_DC_DCN2_0
> >   // Allocate memory for the vm_helper
> >   dc->vm_helper = kzalloc(sizeof(struct vm_helper), GFP_KERNEL);
> > + if (!dc->vm_helper) {
> > + dm_error("%s: failed to create dc->vm_helper\n", __func__);
> > + goto fail;
> > + }
> >
> >  #endif
> >   memcpy(>bb_overrides, _params->bb_overrides, 
> > sizeof(dc->bb_overrides));
> >
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Delete unnecessary pr_fmt switch

2019-10-25 Thread Zhao, Yong
Given amdkfd.ko has been merged into amdgpu.ko, this switch is no
longer useful.

Change-Id: If56b80e086f4ea26f347c70b620b3892afc24ddf
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c  | 1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c  | 3 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c   | 1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c   | 1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c   | 3 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c| 3 ---
 7 files changed, 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index d3da9dde4ee1..fa5471c12c34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -19,7 +19,6 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-
 #include "amdgpu_amdkfd.h"
 #include "amd_shared.h"
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index e1fbbebce4fd..b6713e0ed1b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -19,10 +19,6 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-
-#undef pr_fmt
-#define pr_fmt(fmt) "kfd2kgd: " fmt
-
 #include 
 #include 
 #include 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 0878f59ec340..61cd707158e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -19,9 +19,6 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-#undef pr_fmt
-#define pr_fmt(fmt) "kfd2kgd: " fmt
-
 #include 
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 6e6f0a99ec06..30897b2d9175 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -19,7 +19,6 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-
 #include 
 
 #include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index bfbddedb2380..ede6ab0cbe4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -19,7 +19,6 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-
 #include 
 
 #include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index c72246f2c08a..47c853ef1051 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -19,9 +19,6 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-
-#define pr_fmt(fmt) "kfd2kgd: " fmt
-
 #include 
 
 #include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 1fbe81094b5f..97114e18c022 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -19,9 +19,6 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-
-#define pr_fmt(fmt) "kfd2kgd: " fmt
-
 #include 
 #include 
 #include 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/radeon: remove assignment for return value

2019-10-25 Thread Alex Deucher
Applied.  Thanks!

Alex

On Wed, Oct 23, 2019 at 11:09 AM Harry Wentland  wrote:
>
> On 2019-10-19 3:32 a.m., Wambui Karuga wrote:
> > Remove unnecessary assignment for return value and have the
> > function return the required value directly.
> > Issue found by coccinelle:
> > @@
> > local idexpression ret;
> > expression e;
> > @@
> >
> > -ret =
> > +return
> >  e;
> > -return ret;
> >
> > Signed-off-by: Wambui Karuga 
>
> Thanks for your patch.
>
> Reviewed-by: Harry Wentland 
>
> Harry
>
>
> > ---
> >  drivers/gpu/drm/radeon/cik.c | 8 ++--
> >  1 file changed, 2 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
> > index 62eab82a64f9..daff9a2af3be 100644
> > --- a/drivers/gpu/drm/radeon/cik.c
> > +++ b/drivers/gpu/drm/radeon/cik.c
> > @@ -221,9 +221,7 @@ int ci_get_temp(struct radeon_device *rdev)
> >   else
> >   actual_temp = temp & 0x1ff;
> >
> > - actual_temp = actual_temp * 1000;
> > -
> > - return actual_temp;
> > + return actual_temp * 1000;
> >  }
> >
> >  /* get temperature in millidegrees */
> > @@ -239,9 +237,7 @@ int kv_get_temp(struct radeon_device *rdev)
> >   else
> >   actual_temp = 0;
> >
> > - actual_temp = actual_temp * 1000;
> > -
> > - return actual_temp;
> > + return actual_temp * 1000;
> >  }
> >
> >  /*
> >
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: simplify padding calculations

2019-10-25 Thread Tuikov, Luben
On 2019-10-25 2:54 a.m., Koenig, Christian wrote:
> Am 25.10.19 um 01:44 schrieb Tuikov, Luben:
>> Simplify padding calculations.
>>
>> Signed-off-by: Luben Tuikov 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/cik_sdma.c  |  4 ++--
>>   drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c |  4 ++--
>>   drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c |  4 ++--
>>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  4 ++--
>>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 14 +-
>>   5 files changed, 17 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c 
>> b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>> index c45304f1047c..1ea9e18d6f08 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>> @@ -228,7 +228,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring 
>> *ring,
>>  u32 extra_bits = vmid & 0xf;
>>   
>>  /* IB packet must end on a 8 DW boundary */
>> -cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 
>> 8);
>> +cik_sdma_ring_insert_nop(ring, (4-lower_32_bits(ring->wptr)) & 7);
>>   
>>  amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, 
>> extra_bits));
>>  amdgpu_ring_write(ring, ib->gpu_addr & 0xffe0); /* base must be 32 
>> byte aligned */
>> @@ -811,7 +811,7 @@ static void cik_sdma_ring_pad_ib(struct amdgpu_ring 
>> *ring, struct amdgpu_ib *ib)
>>  u32 pad_count;
>>  int i;
>>   
>> -pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>> +pad_count = (-ib->length_dw) & 7;
>>  for (i = 0; i < pad_count; i++)
>>  if (sdma && sdma->burst_nop && (i == 0))
>>  ib->ptr[ib->length_dw++] =
>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c 
>> b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>> index a10175838013..d340f179401a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>> @@ -255,7 +255,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring 
>> *ring,
>>  unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>>   
>>  /* IB packet must end on a 8 DW boundary */
>> -sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) 
>> % 8);
>> +sdma_v2_4_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
>>   
>>  amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>>SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
>> @@ -750,7 +750,7 @@ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring 
>> *ring, struct amdgpu_ib *ib
>>  u32 pad_count;
>>  int i;
>>   
>> -pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>> +pad_count = (-ib->length_dw) & 7;
>>  for (i = 0; i < pad_count; i++)
>>  if (sdma && sdma->burst_nop && (i == 0))
>>  ib->ptr[ib->length_dw++] =
>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>> index 5f4e2c616241..5c3c310188b6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>> @@ -429,7 +429,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring 
>> *ring,
>>  unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>>   
>>  /* IB packet must end on a 8 DW boundary */
>> -sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) 
>> % 8);
>> +sdma_v3_0_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
>>   
>>  amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>>SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
>> @@ -1021,7 +1021,7 @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring 
>> *ring, struct amdgpu_ib *ib
>>  u32 pad_count;
>>  int i;
>>   
>> -pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>> +pad_count = (-ib->length_dw) & 7;
>>  for (i = 0; i < pad_count; i++)
>>  if (sdma && sdma->burst_nop && (i == 0))
>>  ib->ptr[ib->length_dw++] =
>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>> index 45bd538ba97e..7c71c88e38a4 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>> @@ -698,7 +698,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring 
>> *ring,
>>  unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>>   
>>  /* IB packet must end on a 8 DW boundary */
>> -sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) 
>> % 8);
>> +sdma_v4_0_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
>>   
>>  amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>>SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
>> @@ -1580,7 +1580,7 @@ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring 
>> *ring, struct amdgpu_ib *ib
>>  u32 pad_count;
>>  int i;
>>   
>> -pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>> +pad_count = (-ib->length_dw) & 7;
>>  for (i = 0; i < 

Re: [PATCH v3] drm/radeon: Fix EEH during kexec

2019-10-25 Thread Kyle Mahlkuch


On 10/10/19 11:27 PM, Greg KH wrote:


On Thu, Oct 10, 2019 at 02:44:29PM -0500, KyleMahlkuch wrote:

During kexec some adapters hit an EEH since they are not properly
shut down in the radeon_pci_shutdown() function. Adding
radeon_suspend_kms() fixes this issue.
Enabled only on PPC because this patch causes issues on some other
boards.

Signed-off-by: Kyle Mahlkuch 

Real email address please, with a '@' sign.

And your "From:" line did not match up with this :(


Greg, thanks for your help, I've resubmitted my patch.


---
  drivers/gpu/drm/radeon/radeon_drv.c | 14 ++
  1 file changed, 14 insertions(+)



This is not the correct way to submit patches for inclusion in the
stable kernel tree.  Please read:
 https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html
for how to do this properly.


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH v3] drm/radeon: Fix EEH during kexec

2019-10-25 Thread KyleMahlkuch
From: Kyle Mahlkuch 

During kexec some adapters hit an EEH since they are not properly
shut down in the radeon_pci_shutdown() function. Adding
radeon_suspend_kms() fixes this issue.
Enabled only on PPC because this patch causes issues on some other
boards.

Signed-off-by: Kyle Mahlkuch 
---
 drivers/gpu/drm/radeon/radeon_drv.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon_drv.c 
b/drivers/gpu/drm/radeon/radeon_drv.c
index 9e55076..4528f4d 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -379,11 +379,25 @@ static int radeon_pci_probe(struct pci_dev *pdev,
 static void
 radeon_pci_shutdown(struct pci_dev *pdev)
 {
+#ifdef CONFIG_PPC64
+   struct drm_device *ddev = pci_get_drvdata(pdev);
+#endif
+
/* if we are running in a VM, make sure the device
 * torn down properly on reboot/shutdown
 */
if (radeon_device_is_virtual())
radeon_pci_remove(pdev);
+
+#ifdef CONFIG_PPC64
+   /* Some adapters need to be suspended before a
+* shutdown occurs in order to prevent an error
+* during kexec.
+* Make this power specific becauase it breaks
+* some non-power boards.
+*/
+   radeon_suspend_kms(ddev, true, true, false);
+#endif
 }
 
 static int radeon_pmops_suspend(struct device *dev)
-- 
1.8.3.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 0/3] drm/amdgpu: fix stack alignment ABI mismatch

2019-10-25 Thread Alex Deucher
On Fri, Oct 25, 2019 at 4:12 AM Nick Desaulniers
 wrote:
>
> On Wed, Oct 16, 2019 at 4:02 PM Nick Desaulniers
>  wrote:
> >
> > The x86 kernel is compiled with an 8B stack alignment via
> > `-mpreferred-stack-boundary=3` for GCC since 3.6-rc1 via
> > commit d9b0cde91c60 ("x86-64, gcc: Use -mpreferred-stack-boundary=3 if 
> > supported")
> > or `-mstack-alignment=8` for Clang. Parts of the AMDGPU driver are
> > compiled with 16B stack alignment.
> >
> > Generally, the stack alignment is part of the ABI. Linking together two
> > different translation units with differing stack alignment is dangerous,
> > particularly when the translation unit with the smaller stack alignment
> > makes calls into the translation unit with the larger stack alignment.
> > While 8B aligned stacks are sometimes also 16B aligned, they are not
> > always.
> >
> > Multiple users have reported General Protection Faults (GPF) when using
> > the AMDGPU driver compiled with Clang. Clang is placing objects in stack
> > slots assuming the stack is 16B aligned, and selecting instructions that
> > require 16B aligned memory operands.
> >
> > At runtime, syscall handlers with 8B aligned stack call into code that
> > assumes 16B stack alignment.  When the stack is a multiple of 8B but not
> > 16B, these instructions result in a GPF.
> >
> > Remove the code that added compatibility between the differing compiler
> > flags, as it will result in runtime GPFs when built with Clang.
> >
> > The series is broken into 3 patches, the first is an important fix for
> > Clang for ChromeOS. The rest are attempted cleanups for GCC, but require
> > additional boot testing. The first patch is critical, the rest are nice
> > to have. I've compile tested the series with ToT Clang, GCC 4.9, and GCC
> > 8.3 **but** I do not have hardware to test on, so I need folks with the
> > above compilers and relevant hardware to help test the series.
> >
> > The first patch is a functional change for Clang only. It does not
> > change anything for any version of GCC. Yuxuan boot tested a previous
> > incarnation on hardware, but I've changed it enough that I think it made
> > sense to drop the previous tested by tag.
>
> Thanks for testing the first patch Shirish. Are you or Yuxuan able to
> test the rest of the series with any combination of {clang|gcc < 7.1|
> gcc >= 7.1} on hardware and report your findings?
>
> >
> > The second patch is a functional change for GCC 7.1+ only. It does not
> > affect older versions of GCC or Clang (though if someone wanted to
> > double check with pre-GCC 7.1 it wouldn't hurt).  It should be boot
> > tested on GCC 7.1+ on the relevant hardware.
> >
> > The final patch is also a functional change for GCC 7.1+ only. It does
> > not affect older versions of GCC or Clang. It should be boot tested on
> > GCC 7.1+ on the relevant hardware. Theoretically, there may be an issue
> > with it, and it's ok to drop it. The series was intentional broken into
> > 3 in order to allow them to be incrementally tested and accepted. It's
> > ok to take earlier patches without the later patches.
> >
> > And finally, I do not condone linking object files of differing stack
> > alignments.  Idealistically, we'd mark the driver broken for pre-GCC
> > 7.1.  Pragmatically, "if it ain't broke, don't fix it."
>
> Harry, Alex,
> Thoughts on the series? Has AMD been able to stress test these more 
> internally?
>

Was out of the office most of the week.  They look good to me.  I'll
pull them in today and see what happens.

Alex

> >
> > Nick Desaulniers (3):
> >   drm/amdgpu: fix stack alignment ABI mismatch for Clang
> >   drm/amdgpu: fix stack alignment ABI mismatch for GCC 7.1+
> >   drm/amdgpu: enable -msse2 for GCC 7.1+ users
> >
> >  drivers/gpu/drm/amd/display/dc/calcs/Makefile | 19 ---
> >  drivers/gpu/drm/amd/display/dc/dcn20/Makefile | 19 ---
> >  drivers/gpu/drm/amd/display/dc/dcn21/Makefile | 19 ---
> >  drivers/gpu/drm/amd/display/dc/dml/Makefile   | 19 ---
> >  drivers/gpu/drm/amd/display/dc/dsc/Makefile   | 19 ---
> >  5 files changed, 60 insertions(+), 35 deletions(-)
> >
> > --
> > 2.23.0.700.g56cf767bdb-goog
> >
>
>
> --
> Thanks,
> ~Nick Desaulniers
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amd/display: remove gcc warning Wunused-but-set-variable

2019-10-25 Thread Alex Deucher
On Sat, Oct 19, 2019 at 8:07 PM Chen Wandun  wrote:
>
> From: Chenwandun 
>
> drivers/gpu/drm/amd/display/dc/dce/dce_aux.c: In function 
> dce_aux_configure_timeout:
> drivers/gpu/drm/amd/display/dc/dce/dce_aux.c: warning: variable timeout set 
> but not used [-Wunused-but-set-variable]
>
> Signed-off-by: Chenwandun 

Applied.  Thanks!

Alex

> ---
>  drivers/gpu/drm/amd/display/dc/dce/dce_aux.c | 5 -
>  1 file changed, 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c 
> b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
> index 976bd49..739f8e2 100644
> --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
> +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
> @@ -432,7 +432,6 @@ static bool dce_aux_configure_timeout(struct ddc_service 
> *ddc,
>  {
> uint32_t multiplier = 0;
> uint32_t length = 0;
> -   uint32_t timeout = 0;
> struct ddc *ddc_pin = ddc->ddc_pin;
> struct dce_aux *aux_engine = 
> ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en];
> struct aux_engine_dce110 *aux110 = FROM_AUX_ENGINE(aux_engine);
> @@ -446,25 +445,21 @@ static bool dce_aux_configure_timeout(struct 
> ddc_service *ddc,
> length = timeout_in_us/TIME_OUT_MULTIPLIER_8;
> if (timeout_in_us % TIME_OUT_MULTIPLIER_8 != 0)
> length++;
> -   timeout = length * TIME_OUT_MULTIPLIER_8;
> } else if (timeout_in_us <= 2 * TIME_OUT_INCREMENT) {
> multiplier = 1;
> length = timeout_in_us/TIME_OUT_MULTIPLIER_16;
> if (timeout_in_us % TIME_OUT_MULTIPLIER_16 != 0)
> length++;
> -   timeout = length * TIME_OUT_MULTIPLIER_16;
> } else if (timeout_in_us <= 4 * TIME_OUT_INCREMENT) {
> multiplier = 2;
> length = timeout_in_us/TIME_OUT_MULTIPLIER_32;
> if (timeout_in_us % TIME_OUT_MULTIPLIER_32 != 0)
> length++;
> -   timeout = length * TIME_OUT_MULTIPLIER_32;
> } else if (timeout_in_us > 4 * TIME_OUT_INCREMENT) {
> multiplier = 3;
> length = timeout_in_us/TIME_OUT_MULTIPLIER_64;
> if (timeout_in_us % TIME_OUT_MULTIPLIER_64 != 0)
> length++;
> -   timeout = length * TIME_OUT_MULTIPLIER_64;
> }
>
> length = (length < MAX_TIMEOUT_LENGTH) ? length : MAX_TIMEOUT_LENGTH;
> --
> 2.7.4
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/5] crypto: nx - Improve debugfs_create_u{32,64}() handling for atomics

2019-10-25 Thread Herbert Xu
On Mon, Oct 21, 2019 at 04:51:45PM +0200, Geert Uytterhoeven wrote:
> Variables of type atomic{,64}_t can be used fine with
> debugfs_create_u{32,64}, when passing a pointer to the embedded counter.
> This allows to get rid of the casts, which prevented compiler checks.
> 
> Signed-off-by: Geert Uytterhoeven 
> ---
>  drivers/crypto/nx/nx_debugfs.c | 18 +-
>  1 file changed, 9 insertions(+), 9 deletions(-)

Patch applied.  Thanks.
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/radeon: Handle workqueue allocation failure

2019-10-25 Thread Michel Dänzer
On 2019-10-25 6:18 p.m., Will Deacon wrote:
> On Fri, Oct 25, 2019 at 06:06:26PM +0200, Michel Dänzer wrote:
>> On 2019-10-25 1:04 p.m., Will Deacon wrote:
>>> In the highly unlikely event that we fail to allocate the "radeon-crtc"
>>> workqueue, we should bail cleanly rather than blindly march on with a
>>> NULL pointer installed for the 'flip_queue' field of the 'radeon_crtc'
>>> structure.
>>>
>>> This was reported previously by Nicolas, but I don't think his fix was
>>> correct:
>>
>> Neither is this one I'm afraid. See my feedback on
>> https://patchwork.freedesktop.org/patch/331534/ .
> 
> Thanks. Although I agree with you wrt the original patch, I don't think
> the workqueue allocation failure is distinguishable from the CRTC allocation
> failure with my patch. Are you saying that the error path there is broken
> too?

The driver won't actually work if radeon_crtc_init bails silently, it'll
just fall over at some later point.


-- 
Earthling Michel Dänzer   |   https://redhat.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: GFX9, GFX10: GRBM requires 1-cycle delay

2019-10-25 Thread Koenig, Christian
Am 25.10.19 um 18:05 schrieb Alex Deucher:
> On Fri, Oct 25, 2019 at 2:49 AM Koenig, Christian
>  wrote:
>> Am 24.10.19 um 23:16 schrieb Tuikov, Luben:
>>> The GRBM interface is now capable of bursting
>>> 1-cycle op per register, a WRITE followed by
>>> another WRITE, or a WRITE followed by a READ--much
>>> faster than previous muti-cycle per
>>> completed-transaction interface. This causes a
>>> problem, whereby status registers requiring a
>>> read/write by hardware, have a 1-cycle delay, due
>>> to the register update having to go through GRBM
>>> interface.
>>>
>>> This patch adds this delay.
>>>
>>> A one cycle read op is added after updating the
>>> invalidate request and before reading the
>>> invalidate-ACK status.
>> Please completely drop all changes for GFX9 since this patch will most
>> likely break SRIOV.
>>
>> Additional to that please apply the workaround only to SDMA since the CP
>> driven engines should handle that in firmware.
> I think the CP only handles this in firmware if we use the new TLB
> invalidation packet.  I don't think it applies it to general register
> writes like we do.

No, on the CP we should use the combined write/wait command even if we 
don't use the new specialized VM invalidate command. Everything else 
won't work with SRIOV.

Even if we want to we can't insert an extra read in this combined 
write/wait command. And if we split up the commands we would break SRIOV 
once more.

So applying this workaround to the CP code doesn't make any sense at all.

The only TODO which I can see is that we maybe don't use the combined 
write/wait command on Navi yet.

Christian.

>
> Alex
>
>> Regards,
>> Christian.
>>
>>> See also commit
>>> 534991731cb5fa94b5519957646cf849ca10d17d.
>>>
>>> Signed-off-by: Luben Tuikov 
>>> ---
>>>drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 ++--
>>>drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 4 ++--
>>>drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 9 +
>>>drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 8 
>>>drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +-
>>>5 files changed, 22 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> index ac43b1af69e3..0042868dbd53 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> @@ -5129,7 +5129,7 @@ static const struct amdgpu_ring_funcs 
>>> gfx_v10_0_ring_funcs_gfx = {
>>>5 + /* COND_EXEC */
>>>7 + /* PIPELINE_SYNC */
>>>SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
>>> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
>>> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
>>>2 + /* VM_FLUSH */
>>>8 + /* FENCE for VM_FLUSH */
>>>20 + /* GDS switch */
>>> @@ -5182,7 +5182,7 @@ static const struct amdgpu_ring_funcs 
>>> gfx_v10_0_ring_funcs_compute = {
>>>5 + /* hdp invalidate */
>>>7 + /* gfx_v10_0_ring_emit_pipeline_sync */
>>>SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
>>> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
>>> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
>>>2 + /* gfx_v10_0_ring_emit_vm_flush */
>>>8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, 
>>> vm fence */
>>>.emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> index 9fe95e7693d5..9a7a717208de 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> @@ -6218,7 +6218,7 @@ static const struct amdgpu_ring_funcs 
>>> gfx_v9_0_ring_funcs_gfx = {
>>>5 +  /* COND_EXEC */
>>>7 +  /* PIPELINE_SYNC */
>>>SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
>>> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
>>> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
>>>2 + /* VM_FLUSH */
>>>8 +  /* FENCE for VM_FLUSH */
>>>20 + /* GDS switch */
>>> @@ -6271,7 +6271,7 @@ static const struct amdgpu_ring_funcs 
>>> gfx_v9_0_ring_funcs_compute = {
>>>5 + /* hdp invalidate */
>>>7 + /* gfx_v9_0_ring_emit_pipeline_sync */
>>>SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
>>> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
>>> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
>>>2 + /* gfx_v9_0_ring_emit_vm_flush */
>>>8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm 
>>> fence */
>>>.emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>> index 6e1b25bd1fe7..100d526e9a42 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>> +++ 

Re: [PATCH] drm/radeon: Handle workqueue allocation failure

2019-10-25 Thread Will Deacon
On Fri, Oct 25, 2019 at 06:06:26PM +0200, Michel Dänzer wrote:
> On 2019-10-25 1:04 p.m., Will Deacon wrote:
> > In the highly unlikely event that we fail to allocate the "radeon-crtc"
> > workqueue, we should bail cleanly rather than blindly march on with a
> > NULL pointer installed for the 'flip_queue' field of the 'radeon_crtc'
> > structure.
> > 
> > This was reported previously by Nicolas, but I don't think his fix was
> > correct:
> 
> Neither is this one I'm afraid. See my feedback on
> https://patchwork.freedesktop.org/patch/331534/ .

Thanks. Although I agree with you wrt the original patch, I don't think
the workqueue allocation failure is distinguishable from the CRTC allocation
failure with my patch. Are you saying that the error path there is broken
too?

Will
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/radeon: Handle workqueue allocation failure

2019-10-25 Thread Michel Dänzer
On 2019-10-25 1:04 p.m., Will Deacon wrote:
> In the highly unlikely event that we fail to allocate the "radeon-crtc"
> workqueue, we should bail cleanly rather than blindly march on with a
> NULL pointer installed for the 'flip_queue' field of the 'radeon_crtc'
> structure.
> 
> This was reported previously by Nicolas, but I don't think his fix was
> correct:

Neither is this one I'm afraid. See my feedback on
https://patchwork.freedesktop.org/patch/331534/ .


-- 
Earthling Michel Dänzer   |   https://redhat.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: GFX9, GFX10: GRBM requires 1-cycle delay

2019-10-25 Thread Alex Deucher
On Fri, Oct 25, 2019 at 2:49 AM Koenig, Christian
 wrote:
>
> Am 24.10.19 um 23:16 schrieb Tuikov, Luben:
> > The GRBM interface is now capable of bursting
> > 1-cycle op per register, a WRITE followed by
> > another WRITE, or a WRITE followed by a READ--much
> > faster than previous muti-cycle per
> > completed-transaction interface. This causes a
> > problem, whereby status registers requiring a
> > read/write by hardware, have a 1-cycle delay, due
> > to the register update having to go through GRBM
> > interface.
> >
> > This patch adds this delay.
> >
> > A one cycle read op is added after updating the
> > invalidate request and before reading the
> > invalidate-ACK status.
>
> Please completely drop all changes for GFX9 since this patch will most
> likely break SRIOV.
>
> Additional to that please apply the workaround only to SDMA since the CP
> driven engines should handle that in firmware.

I think the CP only handles this in firmware if we use the new TLB
invalidation packet.  I don't think it applies it to general register
writes like we do.

Alex

>
> Regards,
> Christian.
>
> >
> > See also commit
> > 534991731cb5fa94b5519957646cf849ca10d17d.
> >
> > Signed-off-by: Luben Tuikov 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 ++--
> >   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 4 ++--
> >   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 9 +
> >   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 8 
> >   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +-
> >   5 files changed, 22 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
> > b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > index ac43b1af69e3..0042868dbd53 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > @@ -5129,7 +5129,7 @@ static const struct amdgpu_ring_funcs 
> > gfx_v10_0_ring_funcs_gfx = {
> >   5 + /* COND_EXEC */
> >   7 + /* PIPELINE_SYNC */
> >   SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> > - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> > + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
> >   2 + /* VM_FLUSH */
> >   8 + /* FENCE for VM_FLUSH */
> >   20 + /* GDS switch */
> > @@ -5182,7 +5182,7 @@ static const struct amdgpu_ring_funcs 
> > gfx_v10_0_ring_funcs_compute = {
> >   5 + /* hdp invalidate */
> >   7 + /* gfx_v10_0_ring_emit_pipeline_sync */
> >   SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> > - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> > + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
> >   2 + /* gfx_v10_0_ring_emit_vm_flush */
> >   8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm 
> > fence */
> >   .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
> > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > index 9fe95e7693d5..9a7a717208de 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > @@ -6218,7 +6218,7 @@ static const struct amdgpu_ring_funcs 
> > gfx_v9_0_ring_funcs_gfx = {
> >   5 +  /* COND_EXEC */
> >   7 +  /* PIPELINE_SYNC */
> >   SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> > - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> > + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
> >   2 + /* VM_FLUSH */
> >   8 +  /* FENCE for VM_FLUSH */
> >   20 + /* GDS switch */
> > @@ -6271,7 +6271,7 @@ static const struct amdgpu_ring_funcs 
> > gfx_v9_0_ring_funcs_compute = {
> >   5 + /* hdp invalidate */
> >   7 + /* gfx_v9_0_ring_emit_pipeline_sync */
> >   SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> > - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> > + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
> >   2 + /* gfx_v9_0_ring_emit_vm_flush */
> >   8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm 
> > fence */
> >   .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
> > b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> > index 6e1b25bd1fe7..100d526e9a42 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> > @@ -346,6 +346,15 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct 
> > amdgpu_ring *ring,
> >
> >   amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
> >
> > + /* Insert a dummy read to delay one cycle before the ACK
> > +  * inquiry.
> > +  */
> > + if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA ||
> > + ring->funcs->type == AMDGPU_RING_TYPE_GFX  ||
> > + ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
> > + amdgpu_ring_emit_reg_wait(ring,
> > +   hub->vm_inv_eng0_req + 

Re: [PATCH] drm/amdgpu: guard ib scheduling while in reset

2019-10-25 Thread Grodzovsky, Andrey

On 10/25/19 11:57 AM, Koenig, Christian wrote:
Am 25.10.19 um 17:35 schrieb Grodzovsky, Andrey:


On 10/25/19 5:26 AM, Koenig, Christian wrote:
Am 25.10.19 um 11:22 schrieb S, Shirish:


On 10/25/2019 2:23 PM, Koenig, Christian wrote:

amdgpu_do_asic_reset starting to resume blocks

...

amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...

amdgpu_device_ip_resume_phase2 resumed gfx_v9_0

amdgpu_device_ip_resume_phase2 resumed sdma_v4_0

amdgpu_device_ip_resume_phase2 resumed powerplay

This is what's the root of the problem.

The scheduler should never be resumed before we are done with bringing back the 
hardware into an usable state.

I dont see the scheduler being resumed when the ib is scheduled, its done way 
after the hardware is ready in reset code path.

Below are the logs:

amdgpu :03:00.0: GPU reset begin!
amdgpu_device_gpu_recover calling drm_sched_stop <==
...
amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...
amdgpu_device_ip_resume_phase2 resumed sdma_v4_0
amdgpu_device_ip_resume_phase2 resumed powerplay
amdgpu_device_ip_resume_phase2 resumed dm
...
[drm] recover vram bo from shadow done
amdgpu_device_gpu_recover calling  drm_sched_start <==
...

As mentioned in the call trace, drm_sched_main() is responsible for this 
job_run which seems to be called during cleanup.

Then the scheduler isn't stopped for some reason and we need to investigate why.

We used to have another kthread_park()/unpark() in drm_sched_entity_fini(), 
maybe an application is crashing while we are trying to reset the GPU?


We still have it and isn't doing kthread_park()/unpark() from 
drm_sched_entity_fini while GPU reset in progress defeats all the purpose of 
drm_sched_stop->kthread_park ? If drm_sched_entity_fini-> kthread_unpark 
happens AFTER drm_sched_stop->kthread_park nothing prevents from another 
(third) thread keep submitting job to HW which will be picked up by the 
unparked scheduler thread try to submit to HW but fail because the HW ring is 
deactivated.

If so maybe we should serialize calls to kthread_park/unpark(sched->thread) ?

Yeah, that was my thinking as well. Probably best to just grab the reset lock 
before calling drm_sched_entity_fini().


Shirish - please try locking >lock_reset around calls to 
drm_sched_entity_fini as Christian suggests and see if this actually helps the 
issue.

Andrey


Alternative I think we could change the kthread_park/unpark to a 
wait_event_ in drm_sched_entity_fini().

Regards,
Christian.


Andrey


Would be rather unlikely, especially that would be rather hard to reproduce but 
currently my best bet what's going wrong here.

Regards,
Christian.


Regards,

Shirish S

Regards,
Christian.

Am 25.10.19 um 10:50 schrieb S, Shirish:

Here is the call trace:

Call Trace:
 dump_stack+0x4d/0x63
 amdgpu_ib_schedule+0x86/0x4b7
 ? __mod_timer+0x21e/0x244
 amdgpu_job_run+0x108/0x178
 drm_sched_main+0x253/0x2fa
 ? remove_wait_queue+0x51/0x51
 ? drm_sched_cleanup_jobs.part.12+0xda/0xda
 kthread+0x14f/0x157
 ? kthread_park+0x86/0x86
 ret_from_fork+0x22/0x40
amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)

printed via below change:

@@ -151,6 +152,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}

if (!ring->sched.ready) {
+  dump_stack();
dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", 
ring->name);
return -EINVAL;

On 10/24/2019 10:00 PM, Christian König wrote:
Am 24.10.19 um 17:06 schrieb Grodzovsky, Andrey:


On 10/24/19 7:01 AM, Christian König wrote:
Am 24.10.19 um 12:58 schrieb S, Shirish:
[Why]
Upon GPU reset, kernel cleans up already submitted jobs
via drm_sched_cleanup_jobs.
This schedules ib's via drm_sched_main()->run_job, leading to
race condition of rings being ready or not, since during reset
rings may be suspended.

NAK, exactly that's what should not happen.

The scheduler should be suspend while a GPU reset is in progress.

So you are running into a completely different race here.

Below is the series of events when the issue occurs.

(Note that as you & Andrey mentioned the scheduler has been suspended but the 
job is scheduled nonetheless.)

amdgpu :03:00.0: GPU reset begin!

...

amdgpu_device_gpu_recover stopping ring sdma0 via drm_sched_stop

...

amdgpu :03:00.0: GPU reset succeeded, trying to resume

amdgpu_do_asic_reset starting to resume blocks

...

amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...

amdgpu_device_ip_resume_phase2 resumed gfx_v9_0

amdgpu_device_ip_resume_phase2 resumed sdma_v4_0

amdgpu_device_ip_resume_phase2 resumed powerplay

...

FWIW, since the job is always NULL when "drm_sched_stop(>sched, job ? 
>base : NULL);" when called during reset, all drm_sched_stop() does


Re: [PATCH 1/2] drm/sched: Set error to s_fence if HW job submission failed.

2019-10-25 Thread Koenig, Christian
Am 25.10.19 um 17:56 schrieb Grodzovsky, Andrey:
> On 10/25/19 11:55 AM, Koenig, Christian wrote:
>> Am 25.10.19 um 16:57 schrieb Grodzovsky, Andrey:
>>> On 10/25/19 4:44 AM, Christian König wrote:
 Am 24.10.19 um 21:57 schrieb Andrey Grodzovsky:
> Problem:
> When run_job fails and HW fence returned is NULL we still signal
> the s_fence to avoid hangs but the user has no way of knowing if
> the actual HW job was ran and finished.
>
> Fix:
> Allow .run_job implementations to return ERR_PTR in the fence pointer
> returned and then set this error for s_fence->finished fence so whoever
> wait on this fence can inspect the signaled fence for an error.
>
> Signed-off-by: Andrey Grodzovsky 
> ---
>  drivers/gpu/drm/scheduler/sched_main.c | 19 ---
>  1 file changed, 16 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
> b/drivers/gpu/drm/scheduler/sched_main.c
> index 9a0ee74..f39b97e 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -479,6 +479,7 @@ void drm_sched_resubmit_jobs(struct
> drm_gpu_scheduler *sched)
>  struct drm_sched_job *s_job, *tmp;
>  uint64_t guilty_context;
>  bool found_guilty = false;
> +    struct dma_fence *fence;
>    list_for_each_entry_safe(s_job, tmp,
> >ring_mirror_list, node) {
>  struct drm_sched_fence *s_fence = s_job->s_fence;
> @@ -492,7 +493,16 @@ void drm_sched_resubmit_jobs(struct
> drm_gpu_scheduler *sched)
>  dma_fence_set_error(_fence->finished, -ECANCELED);
>    dma_fence_put(s_job->s_fence->parent);
> -    s_job->s_fence->parent = sched->ops->run_job(s_job);
> +    fence = sched->ops->run_job(s_job);
> +
> +    if (IS_ERR_OR_NULL(fence)) {
> +    s_job->s_fence->parent = NULL;
> +    dma_fence_set_error(_fence->finished, PTR_ERR(fence));
> +    } else {
> +    s_job->s_fence->parent = fence;
> +    }
> +
> +
 Maybe time for a drm_sched_run_job() function which does that
 handling? And why don't we need to install the callback here?
>>> What code do you want to put in drm_sched_run_job ?
>>>
>>> We reinstall the callback later in drm_sched_start,
>>> drm_sched_resubmit_jobs is conditional on whether the guilty fence did
>>> signal by this time or not and so the split of the logic into
>>> drm_sched_start and drm_sched_resubmit_jobs.
>> Ah, yes of course. In this case the patch is Reviewed-by: Christian
>> König .
>>
>> Regards,
>> Christian.
>
> Thanks, there is also 2/2 patch for amdgpu, please take a look.

Seen that, feel free to add my rb as well.

Christian.

>
> Andrey
>
>
>>> Andrey
>>>
>>>
 Apart from that looks good to me,
 Christian.

>  }
>  }
>  EXPORT_SYMBOL(drm_sched_resubmit_jobs);
> @@ -720,7 +730,7 @@ static int drm_sched_main(void *param)
>  fence = sched->ops->run_job(sched_job);
>  drm_sched_fence_scheduled(s_fence);
>  -    if (fence) {
> +    if (!IS_ERR_OR_NULL(fence)) {
>  s_fence->parent = dma_fence_get(fence);
>  r = dma_fence_add_callback(fence, _job->cb,
> drm_sched_process_job);
> @@ -730,8 +740,11 @@ static int drm_sched_main(void *param)
>  DRM_ERROR("fence add callback failed (%d)\n",
>    r);
>  dma_fence_put(fence);
> -    } else
> +    } else {
> +
> +    dma_fence_set_error(_fence->finished, PTR_ERR(fence));
>  drm_sched_process_job(NULL, _job->cb);
> +    }
>    wake_up(>job_scheduled);
>  }

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: guard ib scheduling while in reset

2019-10-25 Thread Koenig, Christian
Am 25.10.19 um 17:35 schrieb Grodzovsky, Andrey:


On 10/25/19 5:26 AM, Koenig, Christian wrote:
Am 25.10.19 um 11:22 schrieb S, Shirish:


On 10/25/2019 2:23 PM, Koenig, Christian wrote:

amdgpu_do_asic_reset starting to resume blocks

...

amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...

amdgpu_device_ip_resume_phase2 resumed gfx_v9_0

amdgpu_device_ip_resume_phase2 resumed sdma_v4_0

amdgpu_device_ip_resume_phase2 resumed powerplay

This is what's the root of the problem.

The scheduler should never be resumed before we are done with bringing back the 
hardware into an usable state.

I dont see the scheduler being resumed when the ib is scheduled, its done way 
after the hardware is ready in reset code path.

Below are the logs:

amdgpu :03:00.0: GPU reset begin!
amdgpu_device_gpu_recover calling drm_sched_stop <==
...
amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...
amdgpu_device_ip_resume_phase2 resumed sdma_v4_0
amdgpu_device_ip_resume_phase2 resumed powerplay
amdgpu_device_ip_resume_phase2 resumed dm
...
[drm] recover vram bo from shadow done
amdgpu_device_gpu_recover calling  drm_sched_start <==
...

As mentioned in the call trace, drm_sched_main() is responsible for this 
job_run which seems to be called during cleanup.

Then the scheduler isn't stopped for some reason and we need to investigate why.

We used to have another kthread_park()/unpark() in drm_sched_entity_fini(), 
maybe an application is crashing while we are trying to reset the GPU?


We still have it and isn't doing kthread_park()/unpark() from 
drm_sched_entity_fini while GPU reset in progress defeats all the purpose of 
drm_sched_stop->kthread_park ? If drm_sched_entity_fini-> kthread_unpark 
happens AFTER drm_sched_stop->kthread_park nothing prevents from another 
(third) thread keep submitting job to HW which will be picked up by the 
unparked scheduler thread try to submit to HW but fail because the HW ring is 
deactivated.

If so maybe we should serialize calls to kthread_park/unpark(sched->thread) ?

Yeah, that was my thinking as well. Probably best to just grab the reset lock 
before calling drm_sched_entity_fini().

Alternative I think we could change the kthread_park/unpark to a 
wait_event_ in drm_sched_entity_fini().

Regards,
Christian.


Andrey


Would be rather unlikely, especially that would be rather hard to reproduce but 
currently my best bet what's going wrong here.

Regards,
Christian.


Regards,

Shirish S

Regards,
Christian.

Am 25.10.19 um 10:50 schrieb S, Shirish:

Here is the call trace:

Call Trace:
 dump_stack+0x4d/0x63
 amdgpu_ib_schedule+0x86/0x4b7
 ? __mod_timer+0x21e/0x244
 amdgpu_job_run+0x108/0x178
 drm_sched_main+0x253/0x2fa
 ? remove_wait_queue+0x51/0x51
 ? drm_sched_cleanup_jobs.part.12+0xda/0xda
 kthread+0x14f/0x157
 ? kthread_park+0x86/0x86
 ret_from_fork+0x22/0x40
amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)

printed via below change:

@@ -151,6 +152,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}

if (!ring->sched.ready) {
+  dump_stack();
dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", 
ring->name);
return -EINVAL;

On 10/24/2019 10:00 PM, Christian König wrote:
Am 24.10.19 um 17:06 schrieb Grodzovsky, Andrey:


On 10/24/19 7:01 AM, Christian König wrote:
Am 24.10.19 um 12:58 schrieb S, Shirish:
[Why]
Upon GPU reset, kernel cleans up already submitted jobs
via drm_sched_cleanup_jobs.
This schedules ib's via drm_sched_main()->run_job, leading to
race condition of rings being ready or not, since during reset
rings may be suspended.

NAK, exactly that's what should not happen.

The scheduler should be suspend while a GPU reset is in progress.

So you are running into a completely different race here.

Below is the series of events when the issue occurs.

(Note that as you & Andrey mentioned the scheduler has been suspended but the 
job is scheduled nonetheless.)

amdgpu :03:00.0: GPU reset begin!

...

amdgpu_device_gpu_recover stopping ring sdma0 via drm_sched_stop

...

amdgpu :03:00.0: GPU reset succeeded, trying to resume

amdgpu_do_asic_reset starting to resume blocks

...

amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...

amdgpu_device_ip_resume_phase2 resumed gfx_v9_0

amdgpu_device_ip_resume_phase2 resumed sdma_v4_0

amdgpu_device_ip_resume_phase2 resumed powerplay

...

FWIW, since the job is always NULL when "drm_sched_stop(>sched, job ? 
>base : NULL);" when called during reset, all drm_sched_stop() does

is  cancel delayed work and park the sched->thread. There is no job list to be 
iterated to de-activate or remove or update fences.

Based on all this analysis, adding a mutex is more failsafe and less 

Re: [PATCH 1/2] drm/sched: Set error to s_fence if HW job submission failed.

2019-10-25 Thread Grodzovsky, Andrey

On 10/25/19 11:55 AM, Koenig, Christian wrote:
> Am 25.10.19 um 16:57 schrieb Grodzovsky, Andrey:
>> On 10/25/19 4:44 AM, Christian König wrote:
>>> Am 24.10.19 um 21:57 schrieb Andrey Grodzovsky:
 Problem:
 When run_job fails and HW fence returned is NULL we still signal
 the s_fence to avoid hangs but the user has no way of knowing if
 the actual HW job was ran and finished.

 Fix:
 Allow .run_job implementations to return ERR_PTR in the fence pointer
 returned and then set this error for s_fence->finished fence so whoever
 wait on this fence can inspect the signaled fence for an error.

 Signed-off-by: Andrey Grodzovsky 
 ---
     drivers/gpu/drm/scheduler/sched_main.c | 19 ---
     1 file changed, 16 insertions(+), 3 deletions(-)

 diff --git a/drivers/gpu/drm/scheduler/sched_main.c
 b/drivers/gpu/drm/scheduler/sched_main.c
 index 9a0ee74..f39b97e 100644
 --- a/drivers/gpu/drm/scheduler/sched_main.c
 +++ b/drivers/gpu/drm/scheduler/sched_main.c
 @@ -479,6 +479,7 @@ void drm_sched_resubmit_jobs(struct
 drm_gpu_scheduler *sched)
     struct drm_sched_job *s_job, *tmp;
     uint64_t guilty_context;
     bool found_guilty = false;
 +    struct dma_fence *fence;
       list_for_each_entry_safe(s_job, tmp,
 >ring_mirror_list, node) {
     struct drm_sched_fence *s_fence = s_job->s_fence;
 @@ -492,7 +493,16 @@ void drm_sched_resubmit_jobs(struct
 drm_gpu_scheduler *sched)
     dma_fence_set_error(_fence->finished, -ECANCELED);
       dma_fence_put(s_job->s_fence->parent);
 -    s_job->s_fence->parent = sched->ops->run_job(s_job);
 +    fence = sched->ops->run_job(s_job);
 +
 +    if (IS_ERR_OR_NULL(fence)) {
 +    s_job->s_fence->parent = NULL;
 +    dma_fence_set_error(_fence->finished, PTR_ERR(fence));
 +    } else {
 +    s_job->s_fence->parent = fence;
 +    }
 +
 +
>>> Maybe time for a drm_sched_run_job() function which does that
>>> handling? And why don't we need to install the callback here?
>> What code do you want to put in drm_sched_run_job ?
>>
>> We reinstall the callback later in drm_sched_start,
>> drm_sched_resubmit_jobs is conditional on whether the guilty fence did
>> signal by this time or not and so the split of the logic into
>> drm_sched_start and drm_sched_resubmit_jobs.
> Ah, yes of course. In this case the patch is Reviewed-by: Christian
> König .
>
> Regards,
> Christian.


Thanks, there is also 2/2 patch for amdgpu, please take a look.

Andrey


>
>> Andrey
>>
>>
>>> Apart from that looks good to me,
>>> Christian.
>>>
     }
     }
     EXPORT_SYMBOL(drm_sched_resubmit_jobs);
 @@ -720,7 +730,7 @@ static int drm_sched_main(void *param)
     fence = sched->ops->run_job(sched_job);
     drm_sched_fence_scheduled(s_fence);
     -    if (fence) {
 +    if (!IS_ERR_OR_NULL(fence)) {
     s_fence->parent = dma_fence_get(fence);
     r = dma_fence_add_callback(fence, _job->cb,
    drm_sched_process_job);
 @@ -730,8 +740,11 @@ static int drm_sched_main(void *param)
     DRM_ERROR("fence add callback failed (%d)\n",
       r);
     dma_fence_put(fence);
 -    } else
 +    } else {
 +
 +    dma_fence_set_error(_fence->finished, PTR_ERR(fence));
     drm_sched_process_job(NULL, _job->cb);
 +    }
       wake_up(>job_scheduled);
     }
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/2] drm/sched: Set error to s_fence if HW job submission failed.

2019-10-25 Thread Koenig, Christian
Am 25.10.19 um 16:57 schrieb Grodzovsky, Andrey:
> On 10/25/19 4:44 AM, Christian König wrote:
>> Am 24.10.19 um 21:57 schrieb Andrey Grodzovsky:
>>> Problem:
>>> When run_job fails and HW fence returned is NULL we still signal
>>> the s_fence to avoid hangs but the user has no way of knowing if
>>> the actual HW job was ran and finished.
>>>
>>> Fix:
>>> Allow .run_job implementations to return ERR_PTR in the fence pointer
>>> returned and then set this error for s_fence->finished fence so whoever
>>> wait on this fence can inspect the signaled fence for an error.
>>>
>>> Signed-off-by: Andrey Grodzovsky 
>>> ---
>>>    drivers/gpu/drm/scheduler/sched_main.c | 19 ---
>>>    1 file changed, 16 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
>>> b/drivers/gpu/drm/scheduler/sched_main.c
>>> index 9a0ee74..f39b97e 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>> @@ -479,6 +479,7 @@ void drm_sched_resubmit_jobs(struct
>>> drm_gpu_scheduler *sched)
>>>    struct drm_sched_job *s_job, *tmp;
>>>    uint64_t guilty_context;
>>>    bool found_guilty = false;
>>> +    struct dma_fence *fence;
>>>      list_for_each_entry_safe(s_job, tmp,
>>> >ring_mirror_list, node) {
>>>    struct drm_sched_fence *s_fence = s_job->s_fence;
>>> @@ -492,7 +493,16 @@ void drm_sched_resubmit_jobs(struct
>>> drm_gpu_scheduler *sched)
>>>    dma_fence_set_error(_fence->finished, -ECANCELED);
>>>      dma_fence_put(s_job->s_fence->parent);
>>> -    s_job->s_fence->parent = sched->ops->run_job(s_job);
>>> +    fence = sched->ops->run_job(s_job);
>>> +
>>> +    if (IS_ERR_OR_NULL(fence)) {
>>> +    s_job->s_fence->parent = NULL;
>>> +    dma_fence_set_error(_fence->finished, PTR_ERR(fence));
>>> +    } else {
>>> +    s_job->s_fence->parent = fence;
>>> +    }
>>> +
>>> +
>> Maybe time for a drm_sched_run_job() function which does that
>> handling? And why don't we need to install the callback here?
>
> What code do you want to put in drm_sched_run_job ?
>
> We reinstall the callback later in drm_sched_start,
> drm_sched_resubmit_jobs is conditional on whether the guilty fence did
> signal by this time or not and so the split of the logic into
> drm_sched_start and drm_sched_resubmit_jobs.

Ah, yes of course. In this case the patch is Reviewed-by: Christian 
König .

Regards,
Christian.

>
> Andrey
>
>
>> Apart from that looks good to me,
>> Christian.
>>
>>>    }
>>>    }
>>>    EXPORT_SYMBOL(drm_sched_resubmit_jobs);
>>> @@ -720,7 +730,7 @@ static int drm_sched_main(void *param)
>>>    fence = sched->ops->run_job(sched_job);
>>>    drm_sched_fence_scheduled(s_fence);
>>>    -    if (fence) {
>>> +    if (!IS_ERR_OR_NULL(fence)) {
>>>    s_fence->parent = dma_fence_get(fence);
>>>    r = dma_fence_add_callback(fence, _job->cb,
>>>   drm_sched_process_job);
>>> @@ -730,8 +740,11 @@ static int drm_sched_main(void *param)
>>>    DRM_ERROR("fence add callback failed (%d)\n",
>>>      r);
>>>    dma_fence_put(fence);
>>> -    } else
>>> +    } else {
>>> +
>>> +    dma_fence_set_error(_fence->finished, PTR_ERR(fence));
>>>    drm_sched_process_job(NULL, _job->cb);
>>> +    }
>>>      wake_up(>job_scheduled);
>>>    }

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: GFX9, GFX10: GRBM requires 1-cycle delay

2019-10-25 Thread Koenig, Christian
Hi Changfeng,

that won't work, you can't add this to 
amdgpu_ring_emit_reg_write_reg_wait_helper or break all read triggered 
registers (like the semaphore ones).

Additional to that it will never work on GFX9, since the CP firmware 
there uses the integrated write/wait command and you can't add an 
additional dummy read there.

Regards,
Christian.

Am 25.10.19 um 16:22 schrieb Zhu, Changfeng:
> I try to write a patch based on the patch of Tuikov,Luben.
>
> Inspired by Luben,here is the patch:
>
>  From 1980d8f1ed44fb9a84a5ea1f6e2edd2bc25c629a Mon Sep 17 00:00:00 2001
> From: changzhu 
> Date: Thu, 10 Oct 2019 11:02:33 +0800
> Subject: [PATCH] drm/amdgpu: add dummy read by engines for some GCVM status
>   registers
>
> The GRBM register interface is now capable of bursting 1 cycle per
> register wr->wr, wr->rd much faster than previous muticycle per
> transaction done interface.  This has caused a problem where
> status registers requiring HW to update have a 1 cycle delay, due
> to the register update having to go through GRBM.
>
> SW may operate on an incorrect value if they write a register and
> immediately check the corresponding status register.
>
> Registers requiring HW to clear or set fields may be delayed by 1 cycle.
> For example,
>
> 1. write VM_INVALIDATE_ENG0_REQ mask = 5a
> 2. read VM_INVALIDATE_ENG0_ACKb till the ack is same as the request mask = 5a
>   a. HW will reset VM_INVALIDATE_ENG0_ACK = 0 until invalidation is 
> complete
> 3. write VM_INVALIDATE_ENG0_REQ mask = 5a
> 4. read VM_INVALIDATE_ENG0_ACK till the ack is same as the request mask = 5a
>   a. First read of VM_INVALIDATE_ENG0_ACK = 5a instead of 0
>   b. Second read of VM_INVALIDATE_ENG0_ACK = 0 because the remote GRBM h/w
>  register takes one extra cycle to be cleared
>   c. In this case,SW wil see a false ACK if they exit on first read
>
> Affected registers (only GC variant)  | Recommended Dummy Read
> --+
> VM_INVALIDATE_ENG*_ACK  |  VM_INVALIDATE_ENG*_REQ
> VM_L2_STATUS|  VM_L2_STATUS
> VM_L2_PROTECTION_FAULT_STATUS   |  VM_L2_PROTECTION_FAULT_STATUS
> VM_L2_PROTECTION_FAULT_ADDR_HI/LO32   |  VM_L2_PROTECTION_FAULT_ADDR_HI/LO32
> VM_L2_IH_LOG_BUSY   |  VM_L2_IH_LOG_BUSY
> MC_VM_L2_PERFCOUNTER_HI/LO  |  MC_VM_L2_PERFCOUNTER_HI/LO
> ATC_L2_PERFCOUNTER_HI/LO|  ATC_L2_PERFCOUNTER_HI/LO
> ATC_L2_PERFCOUNTER2_HI/LO   |  ATC_L2_PERFCOUNTER2_HI/LO
>
> It also needs dummy read by engines for these gc registers.
>
> Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
> Signed-off-by: changzhu 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  5 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   |  2 ++
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c|  2 ++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   |  4 
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c   | 18 ++
>   5 files changed, 31 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index 4b3f58dbf36f..c2fbf6087ecf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -392,6 +392,11 @@ void amdgpu_ring_emit_reg_write_reg_wait_helper(struct 
> amdgpu_ring *ring,
>   uint32_t ref, uint32_t mask)
>   {
>   amdgpu_ring_emit_wreg(ring, reg0, ref);
> +
> + /* wait for a cycle to reset vm_inv_eng0_ack */
> + if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
> + amdgpu_ring_emit_rreg(ring, reg0);
> +
>   amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index ef1975a5323a..104c47734316 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -5155,6 +5155,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v10_0_ring_funcs_gfx = {
>   .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
>   .preempt_ib = gfx_v10_0_ring_preempt_ib,
>   .emit_tmz = gfx_v10_0_ring_emit_tmz,
> + .emit_rreg = gfx_v10_0_ring_emit_rreg,
>   .emit_wreg = gfx_v10_0_ring_emit_wreg,
>   .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
>   };
> @@ -5188,6 +5189,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v10_0_ring_funcs_compute = {
>   .test_ib = gfx_v10_0_ring_test_ib,
>   .insert_nop = amdgpu_ring_insert_nop,
>   .pad_ib = amdgpu_ring_generic_pad_ib,
> + .emit_rreg = gfx_v10_0_ring_emit_rreg,
>   .emit_wreg = gfx_v10_0_ring_emit_wreg,
>   .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
>   };
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 2f03bf533d41..d00b53de0fdc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

Re: [PATCH] drm/amdgpu: guard ib scheduling while in reset

2019-10-25 Thread Grodzovsky, Andrey

On 10/25/19 5:26 AM, Koenig, Christian wrote:
Am 25.10.19 um 11:22 schrieb S, Shirish:


On 10/25/2019 2:23 PM, Koenig, Christian wrote:

amdgpu_do_asic_reset starting to resume blocks

...

amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...

amdgpu_device_ip_resume_phase2 resumed gfx_v9_0

amdgpu_device_ip_resume_phase2 resumed sdma_v4_0

amdgpu_device_ip_resume_phase2 resumed powerplay

This is what's the root of the problem.

The scheduler should never be resumed before we are done with bringing back the 
hardware into an usable state.

I dont see the scheduler being resumed when the ib is scheduled, its done way 
after the hardware is ready in reset code path.

Below are the logs:

amdgpu :03:00.0: GPU reset begin!
amdgpu_device_gpu_recover calling drm_sched_stop <==
...
amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...
amdgpu_device_ip_resume_phase2 resumed sdma_v4_0
amdgpu_device_ip_resume_phase2 resumed powerplay
amdgpu_device_ip_resume_phase2 resumed dm
...
[drm] recover vram bo from shadow done
amdgpu_device_gpu_recover calling  drm_sched_start <==
...

As mentioned in the call trace, drm_sched_main() is responsible for this 
job_run which seems to be called during cleanup.

Then the scheduler isn't stopped for some reason and we need to investigate why.

We used to have another kthread_park()/unpark() in drm_sched_entity_fini(), 
maybe an application is crashing while we are trying to reset the GPU?


We still have it and isn't doing kthread_park()/unpark() from 
drm_sched_entity_fini while GPU reset in progress defeats all the purpose of 
drm_sched_stop->kthread_park ? If drm_sched_entity_fini-> kthread_unpark 
happens AFTER drm_sched_stop->kthread_park nothing prevents from another 
(third) thread keep submitting job to HW which will be picked up by the 
unparked scheduler thread try to submit to HW but fail because the HW ring is 
deactivated.

If so maybe we should serialize calls to kthread_park/unpark(sched->thread) ?

Andrey


Would be rather unlikely, especially that would be rather hard to reproduce but 
currently my best bet what's going wrong here.

Regards,
Christian.


Regards,

Shirish S

Regards,
Christian.

Am 25.10.19 um 10:50 schrieb S, Shirish:

Here is the call trace:

Call Trace:
 dump_stack+0x4d/0x63
 amdgpu_ib_schedule+0x86/0x4b7
 ? __mod_timer+0x21e/0x244
 amdgpu_job_run+0x108/0x178
 drm_sched_main+0x253/0x2fa
 ? remove_wait_queue+0x51/0x51
 ? drm_sched_cleanup_jobs.part.12+0xda/0xda
 kthread+0x14f/0x157
 ? kthread_park+0x86/0x86
 ret_from_fork+0x22/0x40
amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)

printed via below change:

@@ -151,6 +152,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}

if (!ring->sched.ready) {
+  dump_stack();
dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", 
ring->name);
return -EINVAL;

On 10/24/2019 10:00 PM, Christian König wrote:
Am 24.10.19 um 17:06 schrieb Grodzovsky, Andrey:


On 10/24/19 7:01 AM, Christian König wrote:
Am 24.10.19 um 12:58 schrieb S, Shirish:
[Why]
Upon GPU reset, kernel cleans up already submitted jobs
via drm_sched_cleanup_jobs.
This schedules ib's via drm_sched_main()->run_job, leading to
race condition of rings being ready or not, since during reset
rings may be suspended.

NAK, exactly that's what should not happen.

The scheduler should be suspend while a GPU reset is in progress.

So you are running into a completely different race here.

Below is the series of events when the issue occurs.

(Note that as you & Andrey mentioned the scheduler has been suspended but the 
job is scheduled nonetheless.)

amdgpu :03:00.0: GPU reset begin!

...

amdgpu_device_gpu_recover stopping ring sdma0 via drm_sched_stop

...

amdgpu :03:00.0: GPU reset succeeded, trying to resume

amdgpu_do_asic_reset starting to resume blocks

...

amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...

amdgpu_device_ip_resume_phase2 resumed gfx_v9_0

amdgpu_device_ip_resume_phase2 resumed sdma_v4_0

amdgpu_device_ip_resume_phase2 resumed powerplay

...

FWIW, since the job is always NULL when "drm_sched_stop(>sched, job ? 
>base : NULL);" when called during reset, all drm_sched_stop() does

is  cancel delayed work and park the sched->thread. There is no job list to be 
iterated to de-activate or remove or update fences.

Based on all this analysis, adding a mutex is more failsafe and less intrusive 
in the current code flow and lastly seems to be logical as well, hence I 
devised this approach


Please sync up with Andrey how this was able to happen.

Regards,
Christian.


Shirish - Christian makes a good point - note that in amdgpu_device_gpu_recover 
drm_sched_stop which stop all the 

Re: [PATCH 1/2] drm/sched: Set error to s_fence if HW job submission failed.

2019-10-25 Thread Grodzovsky, Andrey

On 10/25/19 4:44 AM, Christian König wrote:
> Am 24.10.19 um 21:57 schrieb Andrey Grodzovsky:
>> Problem:
>> When run_job fails and HW fence returned is NULL we still signal
>> the s_fence to avoid hangs but the user has no way of knowing if
>> the actual HW job was ran and finished.
>>
>> Fix:
>> Allow .run_job implementations to return ERR_PTR in the fence pointer
>> returned and then set this error for s_fence->finished fence so whoever
>> wait on this fence can inspect the signaled fence for an error.
>>
>> Signed-off-by: Andrey Grodzovsky 
>> ---
>>   drivers/gpu/drm/scheduler/sched_main.c | 19 ---
>>   1 file changed, 16 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
>> b/drivers/gpu/drm/scheduler/sched_main.c
>> index 9a0ee74..f39b97e 100644
>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>> @@ -479,6 +479,7 @@ void drm_sched_resubmit_jobs(struct 
>> drm_gpu_scheduler *sched)
>>   struct drm_sched_job *s_job, *tmp;
>>   uint64_t guilty_context;
>>   bool found_guilty = false;
>> +    struct dma_fence *fence;
>>     list_for_each_entry_safe(s_job, tmp, 
>> >ring_mirror_list, node) {
>>   struct drm_sched_fence *s_fence = s_job->s_fence;
>> @@ -492,7 +493,16 @@ void drm_sched_resubmit_jobs(struct 
>> drm_gpu_scheduler *sched)
>>   dma_fence_set_error(_fence->finished, -ECANCELED);
>>     dma_fence_put(s_job->s_fence->parent);
>> -    s_job->s_fence->parent = sched->ops->run_job(s_job);
>> +    fence = sched->ops->run_job(s_job);
>> +
>> +    if (IS_ERR_OR_NULL(fence)) {
>> +    s_job->s_fence->parent = NULL;
>> +    dma_fence_set_error(_fence->finished, PTR_ERR(fence));
>> +    } else {
>> +    s_job->s_fence->parent = fence;
>> +    }
>> +
>> +
>
> Maybe time for a drm_sched_run_job() function which does that 
> handling? And why don't we need to install the callback here?


What code do you want to put in drm_sched_run_job ?

We reinstall the callback later in drm_sched_start, 
drm_sched_resubmit_jobs is conditional on whether the guilty fence did 
signal by this time or not and so the split of the logic into 
drm_sched_start and drm_sched_resubmit_jobs.

Andrey


>
> Apart from that looks good to me,
> Christian.
>
>>   }
>>   }
>>   EXPORT_SYMBOL(drm_sched_resubmit_jobs);
>> @@ -720,7 +730,7 @@ static int drm_sched_main(void *param)
>>   fence = sched->ops->run_job(sched_job);
>>   drm_sched_fence_scheduled(s_fence);
>>   -    if (fence) {
>> +    if (!IS_ERR_OR_NULL(fence)) {
>>   s_fence->parent = dma_fence_get(fence);
>>   r = dma_fence_add_callback(fence, _job->cb,
>>  drm_sched_process_job);
>> @@ -730,8 +740,11 @@ static int drm_sched_main(void *param)
>>   DRM_ERROR("fence add callback failed (%d)\n",
>>     r);
>>   dma_fence_put(fence);
>> -    } else
>> +    } else {
>> +
>> +    dma_fence_set_error(_fence->finished, PTR_ERR(fence));
>>   drm_sched_process_job(NULL, _job->cb);
>> +    }
>>     wake_up(>job_scheduled);
>>   }
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH] drm/amdgpu: GFX9, GFX10: GRBM requires 1-cycle delay

2019-10-25 Thread Zhu, Changfeng
I try to write a patch based on the patch of Tuikov,Luben.

Inspired by Luben,here is the patch:

From 1980d8f1ed44fb9a84a5ea1f6e2edd2bc25c629a Mon Sep 17 00:00:00 2001
From: changzhu 
Date: Thu, 10 Oct 2019 11:02:33 +0800
Subject: [PATCH] drm/amdgpu: add dummy read by engines for some GCVM status
 registers

The GRBM register interface is now capable of bursting 1 cycle per
register wr->wr, wr->rd much faster than previous muticycle per
transaction done interface.  This has caused a problem where
status registers requiring HW to update have a 1 cycle delay, due
to the register update having to go through GRBM.

SW may operate on an incorrect value if they write a register and
immediately check the corresponding status register.

Registers requiring HW to clear or set fields may be delayed by 1 cycle.
For example,

1. write VM_INVALIDATE_ENG0_REQ mask = 5a
2. read VM_INVALIDATE_ENG0_ACKb till the ack is same as the request mask = 5a
a. HW will reset VM_INVALIDATE_ENG0_ACK = 0 until invalidation is 
complete
3. write VM_INVALIDATE_ENG0_REQ mask = 5a
4. read VM_INVALIDATE_ENG0_ACK till the ack is same as the request mask = 5a
a. First read of VM_INVALIDATE_ENG0_ACK = 5a instead of 0
b. Second read of VM_INVALIDATE_ENG0_ACK = 0 because the remote GRBM h/w
   register takes one extra cycle to be cleared
c. In this case,SW wil see a false ACK if they exit on first read

Affected registers (only GC variant)  | Recommended Dummy Read
--+
VM_INVALIDATE_ENG*_ACK|  VM_INVALIDATE_ENG*_REQ
VM_L2_STATUS  |  VM_L2_STATUS
VM_L2_PROTECTION_FAULT_STATUS |  VM_L2_PROTECTION_FAULT_STATUS
VM_L2_PROTECTION_FAULT_ADDR_HI/LO32   |  VM_L2_PROTECTION_FAULT_ADDR_HI/LO32
VM_L2_IH_LOG_BUSY |  VM_L2_IH_LOG_BUSY
MC_VM_L2_PERFCOUNTER_HI/LO|  MC_VM_L2_PERFCOUNTER_HI/LO
ATC_L2_PERFCOUNTER_HI/LO  |  ATC_L2_PERFCOUNTER_HI/LO
ATC_L2_PERFCOUNTER2_HI/LO |  ATC_L2_PERFCOUNTER2_HI/LO

It also needs dummy read by engines for these gc registers.

Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
Signed-off-by: changzhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  5 +
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c|  2 ++
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   |  4 
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c   | 18 ++
 5 files changed, 31 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 4b3f58dbf36f..c2fbf6087ecf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -392,6 +392,11 @@ void amdgpu_ring_emit_reg_write_reg_wait_helper(struct 
amdgpu_ring *ring,
uint32_t ref, uint32_t mask)
 {
amdgpu_ring_emit_wreg(ring, reg0, ref);
+
+   /* wait for a cycle to reset vm_inv_eng0_ack */
+   if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
+   amdgpu_ring_emit_rreg(ring, reg0);
+
amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index ef1975a5323a..104c47734316 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -5155,6 +5155,7 @@ static const struct amdgpu_ring_funcs 
gfx_v10_0_ring_funcs_gfx = {
.patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
.preempt_ib = gfx_v10_0_ring_preempt_ib,
.emit_tmz = gfx_v10_0_ring_emit_tmz,
+   .emit_rreg = gfx_v10_0_ring_emit_rreg,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
 };
@@ -5188,6 +5189,7 @@ static const struct amdgpu_ring_funcs 
gfx_v10_0_ring_funcs_compute = {
.test_ib = gfx_v10_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
+   .emit_rreg = gfx_v10_0_ring_emit_rreg,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 2f03bf533d41..d00b53de0fdc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -6253,6 +6253,7 @@ static const struct amdgpu_ring_funcs 
gfx_v9_0_ring_funcs_gfx = {
.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
.emit_tmz = gfx_v9_0_ring_emit_tmz,
+   .emit_rreg = gfx_v9_0_ring_emit_rreg,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
@@ -6289,6 +6290,7 @@ static const struct amdgpu_ring_funcs 

[PATCH AUTOSEL 4.19 12/37] drm/amdgpu/display: Fix reload driver error

2019-10-25 Thread Sasha Levin
From: Emily Deng 

[ Upstream commit 526c654a8a0641d4289bf65effde4d6095bd8384 ]

Issue:
Will have follow error when reload driver:
[ 3986.567739] sysfs: cannot create duplicate filename 
'/devices/pci:00/:00:07.0/drm_dp_aux_dev'
[ 3986.567743] CPU: 6 PID: 1767 Comm: modprobe Tainted: G   OE 
5.0.0-rc1-custom #1
[ 3986.567745] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 3986.567746] Call Trace:
..
[ 3986.567808]  drm_dp_aux_register_devnode+0xdc/0x140 [drm_kms_helper]
..
[ 3986.569081] kobject_add_internal failed for drm_dp_aux_dev with -EEXIST, 
don't try to register things with the same name in the same directory.

Reproduce sequences:
1.modprobe amdgpu
2.modprobe -r amdgpu
3.modprobe amdgpu

Root cause:
When unload driver, it doesn't unregister aux.

v2: Don't use has_aux

Signed-off-by: Emily Deng 
Reviewed-by: Nicholas Kazlauskas 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 3b07a316680c2..5209b76262311 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2870,6 +2870,13 @@ int amdgpu_dm_connector_atomic_get_property(struct 
drm_connector *connector,
return ret;
 }
 
+static void amdgpu_dm_connector_unregister(struct drm_connector *connector)
+{
+   struct amdgpu_dm_connector *amdgpu_dm_connector = 
to_amdgpu_dm_connector(connector);
+
+   drm_dp_aux_unregister(_dm_connector->dm_dp_aux.aux);
+}
+
 static void amdgpu_dm_connector_destroy(struct drm_connector *connector)
 {
struct amdgpu_dm_connector *aconnector = 
to_amdgpu_dm_connector(connector);
@@ -2889,6 +2896,11 @@ static void amdgpu_dm_connector_destroy(struct 
drm_connector *connector)
 #endif
drm_connector_unregister(connector);
drm_connector_cleanup(connector);
+   if (aconnector->i2c) {
+   i2c_del_adapter(>i2c->base);
+   kfree(aconnector->i2c);
+   }
+
kfree(connector);
 }
 
@@ -2942,7 +2954,8 @@ static const struct drm_connector_funcs 
amdgpu_dm_connector_funcs = {
.atomic_duplicate_state = amdgpu_dm_connector_atomic_duplicate_state,
.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
.atomic_set_property = amdgpu_dm_connector_atomic_set_property,
-   .atomic_get_property = amdgpu_dm_connector_atomic_get_property
+   .atomic_get_property = amdgpu_dm_connector_atomic_get_property,
+   .early_unregister = amdgpu_dm_connector_unregister
 };
 
 static struct drm_encoder *best_encoder(struct drm_connector *connector)
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v5] drm/amd/display: Add MST atomic routines

2019-10-25 Thread Mikita Lipski

On 25.10.2019 8:06, Kazlauskas, Nicholas wrote:
> On 2019-10-24 5:06 p.m., mikita.lip...@amd.com wrote:
>> From: Mikita Lipski 
>>
>> - Adding encoder atomic check to find vcpi slots for a connector
>> - Using DRM helper functions to calculate PBN
>> - Adding connector atomic check to release vcpi slots if connector
>> loses CRTC
>> - Calculate  PBN and VCPI slots only once during atomic
>> check and store them on crtc_state to eliminate
>> redundant calculation
>> - Call drm_dp_mst_atomic_check to verify validity of MST topology
>> during state atomic check
>>
>> v2: squashed previous 3 separate patches, removed DSC PBN calculation,
>> and added PBN and VCPI slots properties to amdgpu connector
>>
>> v3:
>> - moved vcpi_slots and pbn properties to dm_crtc_state and dc_stream_state
>> - updates stream's vcpi_slots and pbn on commit
>> - separated patch from the DSC MST series
>>
>> v4:
>> - set vcpi_slots and pbn properties to dm_connector_state
>> - copy porperties from connector state on to crtc state
>>
>> v5:
>> - keep the pbn and vcpi values only on connnector state
>> - added a void pointer to the stream state instead on two ints,
>> because dc_stream_state is OS agnostic. Pointer points to the
>> current dm_connector_state.
>>
>> Cc: Jun Lei 
>> Cc: Jerry Zuo 
>> Cc: Harry Wentland 
>> Cc: Nicholas Kazlauskas 
>> Cc: Lyude Paul 
>> Signed-off-by: Mikita Lipski 
> Few comments below, mostly about how you're storing the DRM state in the
> DC stream.

Hi Nick,

Thanks for pointing that out.

It is definitely better not to introduce a new state pointer to the stream.

I'll apply your comments for the next version.

>
>> ---
>>.../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 46 ++-
>>.../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  2 +
>>.../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 44 ++
>>.../display/amdgpu_dm/amdgpu_dm_mst_types.c   | 32 +
>>drivers/gpu/drm/amd/display/dc/dc_stream.h|  1 +
>>5 files changed, 84 insertions(+), 41 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> index 48f5b43e2698..1d8d7aaba197 100644
>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> @@ -3747,6 +3747,7 @@ create_stream_for_sink(struct amdgpu_dm_connector 
>> *aconnector,
>>  }
>>
>>  stream->dm_stream_context = aconnector;
>> +stream->dm_stream_state = dm_state;
>>
>>  stream->timing.flags.LTE_340MCSC_SCRAMBLE =
>>  drm_connector->display_info.hdmi.scdc.scrambling.low_rates;
>> @@ -4180,7 +4181,8 @@ void amdgpu_dm_connector_funcs_reset(struct 
>> drm_connector *connector)
>>  state->underscan_hborder = 0;
>>  state->underscan_vborder = 0;
>>  state->base.max_requested_bpc = 8;
>> -
>> +state->vcpi_slots = 0;
>> +state->pbn = 0;
>>  if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
>>  state->abm_level = amdgpu_dm_abm_level;
>>
>> @@ -4209,7 +4211,8 @@ amdgpu_dm_connector_atomic_duplicate_state(struct 
>> drm_connector *connector)
>>  new_state->underscan_enable = state->underscan_enable;
>>  new_state->underscan_hborder = state->underscan_hborder;
>>  new_state->underscan_vborder = state->underscan_vborder;
>> -
>> +new_state->vcpi_slots = state->vcpi_slots;
>> +new_state->pbn = state->pbn;
>>  return _state->base;
>>}
>>
>> @@ -4610,6 +4613,37 @@ static int dm_encoder_helper_atomic_check(struct 
>> drm_encoder *encoder,
>>struct drm_crtc_state *crtc_state,
>>struct drm_connector_state 
>> *conn_state)
>>{
>> +struct drm_atomic_state *state = crtc_state->state;
>> +struct drm_connector *connector = conn_state->connector;
>> +struct amdgpu_dm_connector *aconnector = 
>> to_amdgpu_dm_connector(connector);
>> +struct dm_connector_state *dm_new_connector_state = 
>> to_dm_connector_state(conn_state);
>> +const struct drm_display_mode *adjusted_mode = 
>> _state->adjusted_mode;
>> +struct drm_dp_mst_topology_mgr *mst_mgr;
>> +struct drm_dp_mst_port *mst_port;
>> +int clock, bpp = 0;
>> +
>> +if (!aconnector->port || !aconnector->dc_sink)
>> +return 0;
>> +
>> +mst_port = aconnector->port;
>> +mst_mgr = >mst_port->mst_mgr;
>> +
>> +if (!crtc_state->connectors_changed && !crtc_state->mode_changed)
>> +return 0;
>> +
>> +if (!state->duplicated) {
>> +bpp = (uint8_t)connector->display_info.bpc * 3;
>> +clock = adjusted_mode->clock;
>> +dm_new_connector_state->pbn = drm_dp_calc_pbn_mode(clock, bpp);
>> +}
>> +dm_new_connector_state->vcpi_slots = 
>> drm_dp_atomic_find_vcpi_slots(state,
>> +

RE: [PATCH] drm/amd/powerplay: Disable gfx CGPG when suspend smu

2019-10-25 Thread Huang, Ray
Why do you disable CGPG for all APU?

Thanks,
Ray

-Original Message-
From: amd-gfx  On Behalf Of chen gong
Sent: Friday, October 25, 2019 7:07 PM
To: amd-gfx@lists.freedesktop.org
Cc: Gong, Curry 
Subject: [PATCH] drm/amd/powerplay: Disable gfx CGPG when suspend smu

if no disable gfx CGPG when suspend smu, enabling gfx CGPG will fail when 
resume smu.

Platform: Renoir
dmesg log information:

[  151.844110 ] amdgpu: [powerplay] SMU is resuming...
[  151.844116 ] amdgpu: [powerplay] dpm has been disabled [  151.844604 ] 
amdgpu: [powerplay] Failed to send message 0x2f,response 0xfffb param 0x1 [ 
 151.844605 ] amdgpu: [powerplay] SMU is resumed successfully!

Signed-off-by: chen gong 
---
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c 
b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 26923aa..4e468b0 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -1351,6 +1351,8 @@ static int smu_suspend(void *handle)
if (adev->asic_type >= CHIP_NAVI10 &&
adev->gfx.rlc.funcs->stop)
adev->gfx.rlc.funcs->stop(adev);
+   if (smu->is_apu)
+   smu_set_gfx_cgpg(>smu, false);
 
return 0;
 }
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdgpu: add independent DMA-buf export v7

2019-10-25 Thread Christian König
Add an DMA-buf export implementation independent of the DRM helpers.

This not only avoids the caching of DMA-buf mappings, but also
allows us to use the new dynamic locking approach.

This is also a prerequisite of unpinned DMA-buf handling.

v2: fix unintended recursion, remove debugging leftovers
v3: split out from unpinned DMA-buf work
v4: rebase on top of new no_sgt_cache flag
v5: fix some warnings by including amdgpu_dma_buf.h
v6: fix locking for non amdgpu exports
v7: rebased on new DMA-buf locking patch

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 172 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h |   1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |   1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |   1 +
 4 files changed, 97 insertions(+), 78 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 61f108ec2b5c..f14b52cc7205 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -34,26 +34,11 @@
 #include "amdgpu.h"
 #include "amdgpu_display.h"
 #include "amdgpu_gem.h"
+#include "amdgpu_dma_buf.h"
 #include 
 #include 
 #include 
 
-/**
- * amdgpu_gem_prime_get_sg_table - _driver.gem_prime_get_sg_table
- * implementation
- * @obj: GEM buffer object (BO)
- *
- * Returns:
- * A scatter/gather table for the pinned pages of the BO's memory.
- */
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
-{
-   struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-   int npages = bo->tbo.num_pages;
-
-   return drm_prime_pages_to_sg(bo->tbo.ttm->pages, npages);
-}
-
 /**
  * amdgpu_gem_prime_vmap - _buf_ops.vmap implementation
  * @obj: GEM BO
@@ -179,92 +164,126 @@ __dma_resv_make_exclusive(struct dma_resv *obj)
 }
 
 /**
- * amdgpu_dma_buf_map_attach - _buf_ops.attach implementation
- * @dma_buf: Shared DMA buffer
+ * amdgpu_dma_buf_attach - _buf_ops.attach implementation
+ *
+ * @dmabuf: DMA-buf where we attach to
+ * @attach: attachment to add
+ *
+ * Add the attachment as user to the exported DMA-buf.
+ */
+static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
+struct dma_buf_attachment *attach)
+{
+   struct drm_gem_object *obj = dmabuf->priv;
+   struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+   struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+   int r;
+
+   if (attach->dev->driver == adev->dev->driver)
+   return 0;
+
+   r = amdgpu_bo_reserve(bo, false);
+   if (unlikely(r != 0))
+   return r;
+
+   /*
+* We only create shared fences for internal use, but importers
+* of the dmabuf rely on exclusive fences for implicitly
+* tracking write hazards. As any of the current fences may
+* correspond to a write, we need to convert all existing
+* fences on the reservation object into a single exclusive
+* fence.
+*/
+   r = __dma_resv_make_exclusive(bo->tbo.base.resv);
+   if (r)
+   return r;
+
+   bo->prime_shared_count++;
+   amdgpu_bo_unreserve(bo);
+   return 0;
+}
+
+/**
+ * amdgpu_dma_buf_detach - _buf_ops.detach implementation
+ *
+ * @dmabuf: DMA-buf where we remove the attachment from
+ * @attach: the attachment to remove
+ *
+ * Called when an attachment is removed from the DMA-buf.
+ */
+static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
+ struct dma_buf_attachment *attach)
+{
+   struct drm_gem_object *obj = dmabuf->priv;
+   struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+   struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+
+   if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
+   bo->prime_shared_count--;
+}
+
+/**
+ * amdgpu_dma_buf_map - _buf_ops.map_dma_buf implementation
  * @attach: DMA-buf attachment
+ * @dir: DMA direction
  *
  * Makes sure that the shared DMA buffer can be accessed by the target device.
  * For now, simply pins it to the GTT domain, where it should be accessible by
  * all DMA devices.
  *
  * Returns:
- * 0 on success or a negative error code on failure.
+ * sg_table filled with the DMA addresses to use or ERR_PRT with negative error
+ * code.
  */
-static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf,
-struct dma_buf_attachment *attach)
+static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
+  enum dma_data_direction dir)
 {
+   struct dma_buf *dma_buf = attach->dmabuf;
struct drm_gem_object *obj = dma_buf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-   struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+   struct sg_table *sgt;
long r;
 
-   r = drm_gem_map_attach(dma_buf, attach);
-   if (r)
-

[PATCH 2/2] drm/amdgpu: add independent DMA-buf import v8

2019-10-25 Thread Christian König
Instead of relying on the DRM functions just implement our own import
functions. This prepares support for taking care of unpinned DMA-buf.

v2: enable for all exporters, not just amdgpu, fix invalidation
handling, lock reservation object while setting callback
v3: change to new dma_buf attach interface
v4: split out from unpinned DMA-buf work
v5: rebased and cleanup on new DMA-buf interface
v6: squash with invalidation callback change,
stop using _(map|unmap)_locked
v7: drop invalidations when the BO is already in system domain
v8: rebase on new DMA-buf patch and drop move notification

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 38 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h |  4 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 32 ++---
 4 files changed, 50 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index f14b52cc7205..c22d11df013e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -370,31 +370,28 @@ struct dma_buf *amdgpu_gem_prime_export(struct 
drm_gem_object *gobj,
 }
 
 /**
- * amdgpu_gem_prime_import_sg_table - _driver.gem_prime_import_sg_table
- * implementation
+ * amdgpu_dma_buf_create_obj - create BO for DMA-buf import
+ *
  * @dev: DRM device
- * @attach: DMA-buf attachment
- * @sg: Scatter/gather table
+ * @dma_buf: DMA-buf
  *
- * Imports shared DMA buffer memory exported by another device.
+ * Creates an empty SG BO for DMA-buf import.
  *
  * Returns:
  * A new GEM BO of the given DRM device, representing the memory
  * described by the given DMA-buf attachment and scatter/gather table.
  */
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
-struct dma_buf_attachment *attach,
-struct sg_table *sg)
+static struct drm_gem_object *
+amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
 {
-   struct dma_resv *resv = attach->dmabuf->resv;
+   struct dma_resv *resv = dma_buf->resv;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_bo *bo;
struct amdgpu_bo_param bp;
int ret;
 
memset(, 0, sizeof(bp));
-   bp.size = attach->dmabuf->size;
+   bp.size = dma_buf->size;
bp.byte_align = PAGE_SIZE;
bp.domain = AMDGPU_GEM_DOMAIN_CPU;
bp.flags = 0;
@@ -405,11 +402,9 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
if (ret)
goto error;
 
-   bo->tbo.sg = sg;
-   bo->tbo.ttm->sg = sg;
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
-   if (attach->dmabuf->ops != _dmabuf_ops)
+   if (dma_buf->ops != _dmabuf_ops)
bo->prime_shared_count = 1;
 
dma_resv_unlock(resv);
@@ -434,6 +429,7 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
struct dma_buf *dma_buf)
 {
+   struct dma_buf_attachment *attach;
struct drm_gem_object *obj;
 
if (dma_buf->ops == _dmabuf_ops) {
@@ -448,5 +444,17 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct 
drm_device *dev,
}
}
 
-   return drm_gem_prime_import(dev, dma_buf);
+   obj = amdgpu_dma_buf_create_obj(dev, dma_buf);
+   if (IS_ERR(obj))
+   return obj;
+
+   attach = dma_buf_dynamic_attach(dma_buf, dev->dev, true);
+   if (IS_ERR(attach)) {
+   drm_gem_object_put(obj);
+   return ERR_CAST(attach);
+   }
+
+   get_dma_buf(dma_buf);
+   obj->import_attach = attach;
+   return obj;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
index ce1b3f017451..ec447a7b6b28 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
@@ -25,10 +25,6 @@
 
 #include 
 
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
-struct dma_buf_attachment *attach,
-struct sg_table *sg);
 struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
int flags);
 struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 8805776c8c52..25adf2b847e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1445,7 +1445,6 @@ static struct drm_driver kms_driver = {
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_export 

Re: [PATCH v5] drm/amd/display: Add MST atomic routines

2019-10-25 Thread Kazlauskas, Nicholas
On 2019-10-24 5:06 p.m., mikita.lip...@amd.com wrote:
> From: Mikita Lipski 
> 
> - Adding encoder atomic check to find vcpi slots for a connector
> - Using DRM helper functions to calculate PBN
> - Adding connector atomic check to release vcpi slots if connector
> loses CRTC
> - Calculate  PBN and VCPI slots only once during atomic
> check and store them on crtc_state to eliminate
> redundant calculation
> - Call drm_dp_mst_atomic_check to verify validity of MST topology
> during state atomic check
> 
> v2: squashed previous 3 separate patches, removed DSC PBN calculation,
> and added PBN and VCPI slots properties to amdgpu connector
> 
> v3:
> - moved vcpi_slots and pbn properties to dm_crtc_state and dc_stream_state
> - updates stream's vcpi_slots and pbn on commit
> - separated patch from the DSC MST series
> 
> v4:
> - set vcpi_slots and pbn properties to dm_connector_state
> - copy porperties from connector state on to crtc state
> 
> v5:
> - keep the pbn and vcpi values only on connnector state
> - added a void pointer to the stream state instead on two ints,
> because dc_stream_state is OS agnostic. Pointer points to the
> current dm_connector_state.
> 
> Cc: Jun Lei 
> Cc: Jerry Zuo 
> Cc: Harry Wentland 
> Cc: Nicholas Kazlauskas 
> Cc: Lyude Paul 
> Signed-off-by: Mikita Lipski 

Few comments below, mostly about how you're storing the DRM state in the 
DC stream.


> ---
>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 46 ++-
>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  2 +
>   .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 44 ++
>   .../display/amdgpu_dm/amdgpu_dm_mst_types.c   | 32 +
>   drivers/gpu/drm/amd/display/dc/dc_stream.h|  1 +
>   5 files changed, 84 insertions(+), 41 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 48f5b43e2698..1d8d7aaba197 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -3747,6 +3747,7 @@ create_stream_for_sink(struct amdgpu_dm_connector 
> *aconnector,
>   }
>   
>   stream->dm_stream_context = aconnector;
> + stream->dm_stream_state = dm_state;
>   
>   stream->timing.flags.LTE_340MCSC_SCRAMBLE =
>   drm_connector->display_info.hdmi.scdc.scrambling.low_rates;
> @@ -4180,7 +4181,8 @@ void amdgpu_dm_connector_funcs_reset(struct 
> drm_connector *connector)
>   state->underscan_hborder = 0;
>   state->underscan_vborder = 0;
>   state->base.max_requested_bpc = 8;
> -
> + state->vcpi_slots = 0;
> + state->pbn = 0;
>   if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
>   state->abm_level = amdgpu_dm_abm_level;
>   
> @@ -4209,7 +4211,8 @@ amdgpu_dm_connector_atomic_duplicate_state(struct 
> drm_connector *connector)
>   new_state->underscan_enable = state->underscan_enable;
>   new_state->underscan_hborder = state->underscan_hborder;
>   new_state->underscan_vborder = state->underscan_vborder;
> -
> + new_state->vcpi_slots = state->vcpi_slots;
> + new_state->pbn = state->pbn;
>   return _state->base;
>   }
>   
> @@ -4610,6 +4613,37 @@ static int dm_encoder_helper_atomic_check(struct 
> drm_encoder *encoder,
> struct drm_crtc_state *crtc_state,
> struct drm_connector_state 
> *conn_state)
>   {
> + struct drm_atomic_state *state = crtc_state->state;
> + struct drm_connector *connector = conn_state->connector;
> + struct amdgpu_dm_connector *aconnector = 
> to_amdgpu_dm_connector(connector);
> + struct dm_connector_state *dm_new_connector_state = 
> to_dm_connector_state(conn_state);
> + const struct drm_display_mode *adjusted_mode = 
> _state->adjusted_mode;
> + struct drm_dp_mst_topology_mgr *mst_mgr;
> + struct drm_dp_mst_port *mst_port;
> + int clock, bpp = 0;
> +
> + if (!aconnector->port || !aconnector->dc_sink)
> + return 0;
> +
> + mst_port = aconnector->port;
> + mst_mgr = >mst_port->mst_mgr;
> +
> + if (!crtc_state->connectors_changed && !crtc_state->mode_changed)
> + return 0;
> +
> + if (!state->duplicated) {
> + bpp = (uint8_t)connector->display_info.bpc * 3;
> + clock = adjusted_mode->clock;
> + dm_new_connector_state->pbn = drm_dp_calc_pbn_mode(clock, bpp);
> + }
> + dm_new_connector_state->vcpi_slots = 
> drm_dp_atomic_find_vcpi_slots(state,
> +mst_mgr,
> +mst_port,
> +
> dm_new_connector_state->pbn);
> + if (dm_new_connector_state->vcpi_slots < 0) {
> + DRM_DEBUG_ATOMIC("failed 

[PATCH] drm/amd/powerplay: Disable gfx CGPG when suspend smu

2019-10-25 Thread chen gong
if no disable gfx CGPG when suspend smu, enabling gfx CGPG will fail when 
resume smu.

Platform: Renoir
dmesg log information:

[  151.844110 ] amdgpu: [powerplay] SMU is resuming...
[  151.844116 ] amdgpu: [powerplay] dpm has been disabled
[  151.844604 ] amdgpu: [powerplay] Failed to send message 0x2f,response 
0xfffb param 0x1
[  151.844605 ] amdgpu: [powerplay] SMU is resumed successfully!

Signed-off-by: chen gong 
---
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c 
b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 26923aa..4e468b0 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -1351,6 +1351,8 @@ static int smu_suspend(void *handle)
if (adev->asic_type >= CHIP_NAVI10 &&
adev->gfx.rlc.funcs->stop)
adev->gfx.rlc.funcs->stop(adev);
+   if (smu->is_apu)
+   smu_set_gfx_cgpg(>smu, false);
 
return 0;
 }
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/radeon: Handle workqueue allocation failure

2019-10-25 Thread Will Deacon
In the highly unlikely event that we fail to allocate the "radeon-crtc"
workqueue, we should bail cleanly rather than blindly march on with a
NULL pointer installed for the 'flip_queue' field of the 'radeon_crtc'
structure.

This was reported previously by Nicolas, but I don't think his fix was
correct:

Cc: Alex Deucher 
Cc: "Christian König" 
Cc: "David (ChunMing) Zhou" 
Cc: David Airlie 
Cc: Daniel Vetter 
Reported-by: Nicolas Waisman 
Link: 
https://lore.kernel.org/lkml/cadj_3a8wfrs5nouxnqs5wye7rebfp+_a5cheeqayd_p7dfj...@mail.gmail.com/
Signed-off-by: Will Deacon 
---

Compile-tested only.

 drivers/gpu/drm/radeon/radeon_display.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_display.c 
b/drivers/gpu/drm/radeon/radeon_display.c
index e81b01f8db90..3e4ef1380fca 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -672,17 +672,25 @@ static void radeon_crtc_init(struct drm_device *dev, int 
index)
 {
struct radeon_device *rdev = dev->dev_private;
struct radeon_crtc *radeon_crtc;
+   struct workqueue_struct *wq;
int i;
 
radeon_crtc = kzalloc(sizeof(struct radeon_crtc) + (RADEONFB_CONN_LIMIT 
* sizeof(struct drm_connector *)), GFP_KERNEL);
if (radeon_crtc == NULL)
return;
 
+   wq = alloc_workqueue("radeon-crtc", WQ_HIGHPRI, 0);
+   if (unlikely(!wq)) {
+   kfree(radeon_crtc);
+   return;
+   }
+
drm_crtc_init(dev, _crtc->base, _crtc_funcs);
 
drm_mode_crtc_set_gamma_size(_crtc->base, 256);
radeon_crtc->crtc_id = index;
-   radeon_crtc->flip_queue = alloc_workqueue("radeon-crtc", WQ_HIGHPRI, 0);
+   radeon_crtc->flip_queue = wq;
+
rdev->mode_info.crtcs[index] = radeon_crtc;
 
if (rdev->family >= CHIP_BONAIRE) {
-- 
2.24.0.rc0.303.g954a862665-goog

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: guard ib scheduling while in reset

2019-10-25 Thread Koenig, Christian
Am 25.10.19 um 12:08 schrieb S, Shirish:


On 10/25/2019 2:56 PM, Koenig, Christian wrote:
Am 25.10.19 um 11:22 schrieb S, Shirish:


On 10/25/2019 2:23 PM, Koenig, Christian wrote:

amdgpu_do_asic_reset starting to resume blocks

...

amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...

amdgpu_device_ip_resume_phase2 resumed gfx_v9_0

amdgpu_device_ip_resume_phase2 resumed sdma_v4_0

amdgpu_device_ip_resume_phase2 resumed powerplay

This is what's the root of the problem.

The scheduler should never be resumed before we are done with bringing back the 
hardware into an usable state.

I dont see the scheduler being resumed when the ib is scheduled, its done way 
after the hardware is ready in reset code path.

Below are the logs:

amdgpu :03:00.0: GPU reset begin!
amdgpu_device_gpu_recover calling drm_sched_stop <==
...
amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...
amdgpu_device_ip_resume_phase2 resumed sdma_v4_0
amdgpu_device_ip_resume_phase2 resumed powerplay
amdgpu_device_ip_resume_phase2 resumed dm
...
[drm] recover vram bo from shadow done
amdgpu_device_gpu_recover calling  drm_sched_start <==
...

As mentioned in the call trace, drm_sched_main() is responsible for this 
job_run which seems to be called during cleanup.

Then the scheduler isn't stopped for some reason and we need to investigate why.


The drm_sched_stop() as i mentioned only parks the thread

That should be sufficient for the thread to be halted and not submit more jobs 
to the hardware.


and cancels work and nothing else, not sure why you think it hasn't stopped or 
done what it is supposed to do.

Since it works 3/5 times.

We used to have another kthread_park()/unpark() in drm_sched_entity_fini(), 
maybe an application is crashing while we are trying to reset the GPU?

Would be rather unlikely, especially that would be rather hard to reproduce but 
currently my best bet what's going wrong here.

Its sometimes triggered from drm_sched_entity_fini(), as i can see in prints 
but not always.

I believe application crashing while GPU resets is anticipated, depending upon 
workloads and state of gfx renderer when reset has happened.

Since reset is something that is not a usual/routine/regular event, such 
anomalies are to be expected when it happens,

so we need to have failsafe methods like this patch and may be some more based 
on system behavior upon reset.

Sorry but your patch is complete nonsense from the design point of view.

It is absolutely mandatory that the scheduler is stopped and the thread 
correctly parked for the GPU reset to work properly.

See you not only need to prevent running new jobs, but also job preparation 
(e.g. grabbing VMIDs) or otherwise you will immediately run into the next GPU 
reset after the first one finished.

Regards,
Christian.


Regards,

Shirish S

Regards,
Christian.


Regards,

Shirish S

Regards,
Christian.

Am 25.10.19 um 10:50 schrieb S, Shirish:

Here is the call trace:

Call Trace:
 dump_stack+0x4d/0x63
 amdgpu_ib_schedule+0x86/0x4b7
 ? __mod_timer+0x21e/0x244
 amdgpu_job_run+0x108/0x178
 drm_sched_main+0x253/0x2fa
 ? remove_wait_queue+0x51/0x51
 ? drm_sched_cleanup_jobs.part.12+0xda/0xda
 kthread+0x14f/0x157
 ? kthread_park+0x86/0x86
 ret_from_fork+0x22/0x40
amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)

printed via below change:

@@ -151,6 +152,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}

if (!ring->sched.ready) {
+  dump_stack();
dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", 
ring->name);
return -EINVAL;

On 10/24/2019 10:00 PM, Christian König wrote:
Am 24.10.19 um 17:06 schrieb Grodzovsky, Andrey:


On 10/24/19 7:01 AM, Christian König wrote:
Am 24.10.19 um 12:58 schrieb S, Shirish:
[Why]
Upon GPU reset, kernel cleans up already submitted jobs
via drm_sched_cleanup_jobs.
This schedules ib's via drm_sched_main()->run_job, leading to
race condition of rings being ready or not, since during reset
rings may be suspended.

NAK, exactly that's what should not happen.

The scheduler should be suspend while a GPU reset is in progress.

So you are running into a completely different race here.

Below is the series of events when the issue occurs.

(Note that as you & Andrey mentioned the scheduler has been suspended but the 
job is scheduled nonetheless.)

amdgpu :03:00.0: GPU reset begin!

...

amdgpu_device_gpu_recover stopping ring sdma0 via drm_sched_stop

...

amdgpu :03:00.0: GPU reset succeeded, trying to resume

amdgpu_do_asic_reset starting to resume blocks

...

amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...

amdgpu_device_ip_resume_phase2 resumed gfx_v9_0

amdgpu_device_ip_resume_phase2 resumed 

Re: [PATCH] drm/amdgpu: guard ib scheduling while in reset

2019-10-25 Thread S, Shirish

On 10/25/2019 2:56 PM, Koenig, Christian wrote:
Am 25.10.19 um 11:22 schrieb S, Shirish:


On 10/25/2019 2:23 PM, Koenig, Christian wrote:

amdgpu_do_asic_reset starting to resume blocks

...

amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...

amdgpu_device_ip_resume_phase2 resumed gfx_v9_0

amdgpu_device_ip_resume_phase2 resumed sdma_v4_0

amdgpu_device_ip_resume_phase2 resumed powerplay

This is what's the root of the problem.

The scheduler should never be resumed before we are done with bringing back the 
hardware into an usable state.

I dont see the scheduler being resumed when the ib is scheduled, its done way 
after the hardware is ready in reset code path.

Below are the logs:

amdgpu :03:00.0: GPU reset begin!
amdgpu_device_gpu_recover calling drm_sched_stop <==
...
amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...
amdgpu_device_ip_resume_phase2 resumed sdma_v4_0
amdgpu_device_ip_resume_phase2 resumed powerplay
amdgpu_device_ip_resume_phase2 resumed dm
...
[drm] recover vram bo from shadow done
amdgpu_device_gpu_recover calling  drm_sched_start <==
...

As mentioned in the call trace, drm_sched_main() is responsible for this 
job_run which seems to be called during cleanup.

Then the scheduler isn't stopped for some reason and we need to investigate why.


The drm_sched_stop() as i mentioned only parks the thread and cancels work and 
nothing else, not sure why you think it hasn't stopped or done what it is 
supposed to do.

Since it works 3/5 times.

We used to have another kthread_park()/unpark() in drm_sched_entity_fini(), 
maybe an application is crashing while we are trying to reset the GPU?

Would be rather unlikely, especially that would be rather hard to reproduce but 
currently my best bet what's going wrong here.

Its sometimes triggered from drm_sched_entity_fini(), as i can see in prints 
but not always.

I believe application crashing while GPU resets is anticipated, depending upon 
workloads and state of gfx renderer when reset has happened.

Since reset is something that is not a usual/routine/regular event, such 
anomalies are to be expected when it happens,

so we need to have failsafe methods like this patch and may be some more based 
on system behavior upon reset.

Regards,

Shirish S

Regards,
Christian.


Regards,

Shirish S

Regards,
Christian.

Am 25.10.19 um 10:50 schrieb S, Shirish:

Here is the call trace:

Call Trace:
 dump_stack+0x4d/0x63
 amdgpu_ib_schedule+0x86/0x4b7
 ? __mod_timer+0x21e/0x244
 amdgpu_job_run+0x108/0x178
 drm_sched_main+0x253/0x2fa
 ? remove_wait_queue+0x51/0x51
 ? drm_sched_cleanup_jobs.part.12+0xda/0xda
 kthread+0x14f/0x157
 ? kthread_park+0x86/0x86
 ret_from_fork+0x22/0x40
amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)

printed via below change:

@@ -151,6 +152,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}

if (!ring->sched.ready) {
+  dump_stack();
dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", 
ring->name);
return -EINVAL;

On 10/24/2019 10:00 PM, Christian König wrote:
Am 24.10.19 um 17:06 schrieb Grodzovsky, Andrey:


On 10/24/19 7:01 AM, Christian König wrote:
Am 24.10.19 um 12:58 schrieb S, Shirish:
[Why]
Upon GPU reset, kernel cleans up already submitted jobs
via drm_sched_cleanup_jobs.
This schedules ib's via drm_sched_main()->run_job, leading to
race condition of rings being ready or not, since during reset
rings may be suspended.

NAK, exactly that's what should not happen.

The scheduler should be suspend while a GPU reset is in progress.

So you are running into a completely different race here.

Below is the series of events when the issue occurs.

(Note that as you & Andrey mentioned the scheduler has been suspended but the 
job is scheduled nonetheless.)

amdgpu :03:00.0: GPU reset begin!

...

amdgpu_device_gpu_recover stopping ring sdma0 via drm_sched_stop

...

amdgpu :03:00.0: GPU reset succeeded, trying to resume

amdgpu_do_asic_reset starting to resume blocks

...

amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...

amdgpu_device_ip_resume_phase2 resumed gfx_v9_0

amdgpu_device_ip_resume_phase2 resumed sdma_v4_0

amdgpu_device_ip_resume_phase2 resumed powerplay

...

FWIW, since the job is always NULL when "drm_sched_stop(>sched, job ? 
>base : NULL);" when called during reset, all drm_sched_stop() does

is  cancel delayed work and park the sched->thread. There is no job list to be 
iterated to de-activate or remove or update fences.

Based on all this analysis, adding a mutex is more failsafe and less intrusive 
in the current code flow and lastly seems to be logical as well, hence I 
devised this approach


Please sync up with Andrey how this was able to 

Re: [PATCH] drm/amdgpu: GFX9, GFX10: GRBM requires 1-cycle delay

2019-10-25 Thread Huang, Ray
On Thu, Oct 24, 2019 at 09:16:55PM +, Tuikov, Luben wrote:
> The GRBM interface is now capable of bursting 1-cycle op per register, 
> a WRITE followed by another WRITE, or a WRITE followed by a READ--much 
> faster than previous muti-cycle per completed-transaction interface. 
> This causes a problem, whereby status registers requiring a read/write 
> by hardware, have a 1-cycle delay, due to the register update having 
> to go through GRBM interface.
> 
> This patch adds this delay.
> 
> A one cycle read op is added after updating the invalidate request and 
> before reading the invalidate-ACK status.
> 
> See also commit
> 534991731cb5fa94b5519957646cf849ca10d17d.
> 
> Signed-off-by: Luben Tuikov 
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 ++--  
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 4 ++--  
> drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 9 +  
> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 8   
> drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +-
>  5 files changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index ac43b1af69e3..0042868dbd53 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -5129,7 +5129,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v10_0_ring_funcs_gfx = {
>   5 + /* COND_EXEC */
>   7 + /* PIPELINE_SYNC */
>   SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
>   2 + /* VM_FLUSH */
>   8 + /* FENCE for VM_FLUSH */
>   20 + /* GDS switch */
> @@ -5182,7 +5182,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v10_0_ring_funcs_compute = {
>   5 + /* hdp invalidate */
>   7 + /* gfx_v10_0_ring_emit_pipeline_sync */
>   SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
>   2 + /* gfx_v10_0_ring_emit_vm_flush */
>   8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm 
> fence */
>   .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 9fe95e7693d5..9a7a717208de 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -6218,7 +6218,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v9_0_ring_funcs_gfx = {
>   5 +  /* COND_EXEC */
>   7 +  /* PIPELINE_SYNC */
>   SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
>   2 + /* VM_FLUSH */
>   8 +  /* FENCE for VM_FLUSH */
>   20 + /* GDS switch */
> @@ -6271,7 +6271,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v9_0_ring_funcs_compute = {
>   5 + /* hdp invalidate */
>   7 + /* gfx_v9_0_ring_emit_pipeline_sync */
>   SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
>   2 + /* gfx_v9_0_ring_emit_vm_flush */
>   8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm 
> fence */
>   .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 6e1b25bd1fe7..100d526e9a42 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -346,6 +346,15 @@ static uint64_t 
> gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>  
>   amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
>  
> + /* Insert a dummy read to delay one cycle before the ACK
> +  * inquiry.
> +  */
> + if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA ||
> + ring->funcs->type == AMDGPU_RING_TYPE_GFX  ||
> + ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
> + amdgpu_ring_emit_reg_wait(ring,
> +   hub->vm_inv_eng0_req + eng, 0, 0);
> +
>   /* wait for the invalidate to complete */
>   amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> 1 << vmid, 1 << vmid);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 9f2a893871ec..8f3097e45299 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -495,6 +495,14 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct 
> amdgpu_ring *ring,
>   amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
> upper_32_bits(pd_addr));
>  

Re: [PATCH] drm/amdgpu: guard ib scheduling while in reset

2019-10-25 Thread Koenig, Christian
Am 25.10.19 um 11:22 schrieb S, Shirish:


On 10/25/2019 2:23 PM, Koenig, Christian wrote:

amdgpu_do_asic_reset starting to resume blocks

...

amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...

amdgpu_device_ip_resume_phase2 resumed gfx_v9_0

amdgpu_device_ip_resume_phase2 resumed sdma_v4_0

amdgpu_device_ip_resume_phase2 resumed powerplay

This is what's the root of the problem.

The scheduler should never be resumed before we are done with bringing back the 
hardware into an usable state.

I dont see the scheduler being resumed when the ib is scheduled, its done way 
after the hardware is ready in reset code path.

Below are the logs:

amdgpu :03:00.0: GPU reset begin!
amdgpu_device_gpu_recover calling drm_sched_stop <==
...
amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...
amdgpu_device_ip_resume_phase2 resumed sdma_v4_0
amdgpu_device_ip_resume_phase2 resumed powerplay
amdgpu_device_ip_resume_phase2 resumed dm
...
[drm] recover vram bo from shadow done
amdgpu_device_gpu_recover calling  drm_sched_start <==
...

As mentioned in the call trace, drm_sched_main() is responsible for this 
job_run which seems to be called during cleanup.

Then the scheduler isn't stopped for some reason and we need to investigate why.

We used to have another kthread_park()/unpark() in drm_sched_entity_fini(), 
maybe an application is crashing while we are trying to reset the GPU?

Would be rather unlikely, especially that would be rather hard to reproduce but 
currently my best bet what's going wrong here.

Regards,
Christian.


Regards,

Shirish S

Regards,
Christian.

Am 25.10.19 um 10:50 schrieb S, Shirish:

Here is the call trace:

Call Trace:
 dump_stack+0x4d/0x63
 amdgpu_ib_schedule+0x86/0x4b7
 ? __mod_timer+0x21e/0x244
 amdgpu_job_run+0x108/0x178
 drm_sched_main+0x253/0x2fa
 ? remove_wait_queue+0x51/0x51
 ? drm_sched_cleanup_jobs.part.12+0xda/0xda
 kthread+0x14f/0x157
 ? kthread_park+0x86/0x86
 ret_from_fork+0x22/0x40
amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)

printed via below change:

@@ -151,6 +152,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}

if (!ring->sched.ready) {
+  dump_stack();
dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", 
ring->name);
return -EINVAL;

On 10/24/2019 10:00 PM, Christian König wrote:
Am 24.10.19 um 17:06 schrieb Grodzovsky, Andrey:


On 10/24/19 7:01 AM, Christian König wrote:
Am 24.10.19 um 12:58 schrieb S, Shirish:
[Why]
Upon GPU reset, kernel cleans up already submitted jobs
via drm_sched_cleanup_jobs.
This schedules ib's via drm_sched_main()->run_job, leading to
race condition of rings being ready or not, since during reset
rings may be suspended.

NAK, exactly that's what should not happen.

The scheduler should be suspend while a GPU reset is in progress.

So you are running into a completely different race here.

Below is the series of events when the issue occurs.

(Note that as you & Andrey mentioned the scheduler has been suspended but the 
job is scheduled nonetheless.)

amdgpu :03:00.0: GPU reset begin!

...

amdgpu_device_gpu_recover stopping ring sdma0 via drm_sched_stop

...

amdgpu :03:00.0: GPU reset succeeded, trying to resume

amdgpu_do_asic_reset starting to resume blocks

...

amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...

amdgpu_device_ip_resume_phase2 resumed gfx_v9_0

amdgpu_device_ip_resume_phase2 resumed sdma_v4_0

amdgpu_device_ip_resume_phase2 resumed powerplay

...

FWIW, since the job is always NULL when "drm_sched_stop(>sched, job ? 
>base : NULL);" when called during reset, all drm_sched_stop() does

is  cancel delayed work and park the sched->thread. There is no job list to be 
iterated to de-activate or remove or update fences.

Based on all this analysis, adding a mutex is more failsafe and less intrusive 
in the current code flow and lastly seems to be logical as well, hence I 
devised this approach


Please sync up with Andrey how this was able to happen.

Regards,
Christian.


Shirish - Christian makes a good point - note that in amdgpu_device_gpu_recover 
drm_sched_stop which stop all the scheduler threads is called way before we 
suspend the HW in amdgpu_device_pre_asic_reset->amdgpu_device_ip_suspend where 
SDMA suspension is happening and where the HW ring marked as not ready - please 
provide call stack for when you hit [drm:amdgpu_job_run] *ERROR* Error 
scheduling IBs (-22) to identify the code path which tried to submit the SDMA IB

Well the most likely cause of this is that the hardware failed to resume after 
the reset.

Infact hardware resume has not yet started, when the job is scheduled, which is 
the race am trying to address with this patch.

Regards,


Re: [PATCH] drm/amdgpu: guard ib scheduling while in reset

2019-10-25 Thread S, Shirish

On 10/25/2019 2:23 PM, Koenig, Christian wrote:

amdgpu_do_asic_reset starting to resume blocks

...

amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...

amdgpu_device_ip_resume_phase2 resumed gfx_v9_0

amdgpu_device_ip_resume_phase2 resumed sdma_v4_0

amdgpu_device_ip_resume_phase2 resumed powerplay

This is what's the root of the problem.

The scheduler should never be resumed before we are done with bringing back the 
hardware into an usable state.

I dont see the scheduler being resumed when the ib is scheduled, its done way 
after the hardware is ready in reset code path.

Below are the logs:

amdgpu :03:00.0: GPU reset begin!
amdgpu_device_gpu_recover calling drm_sched_stop <==
...
amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...
amdgpu_device_ip_resume_phase2 resumed sdma_v4_0
amdgpu_device_ip_resume_phase2 resumed powerplay
amdgpu_device_ip_resume_phase2 resumed dm
...
[drm] recover vram bo from shadow done
amdgpu_device_gpu_recover calling  drm_sched_start <==
...

As mentioned in the call trace, drm_sched_main() is responsible for this 
job_run which seems to be called during cleanup.

Regards,

Shirish S

Regards,
Christian.

Am 25.10.19 um 10:50 schrieb S, Shirish:

Here is the call trace:

Call Trace:
 dump_stack+0x4d/0x63
 amdgpu_ib_schedule+0x86/0x4b7
 ? __mod_timer+0x21e/0x244
 amdgpu_job_run+0x108/0x178
 drm_sched_main+0x253/0x2fa
 ? remove_wait_queue+0x51/0x51
 ? drm_sched_cleanup_jobs.part.12+0xda/0xda
 kthread+0x14f/0x157
 ? kthread_park+0x86/0x86
 ret_from_fork+0x22/0x40
amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)

printed via below change:

@@ -151,6 +152,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}

if (!ring->sched.ready) {
+  dump_stack();
dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", 
ring->name);
return -EINVAL;

On 10/24/2019 10:00 PM, Christian König wrote:
Am 24.10.19 um 17:06 schrieb Grodzovsky, Andrey:


On 10/24/19 7:01 AM, Christian König wrote:
Am 24.10.19 um 12:58 schrieb S, Shirish:
[Why]
Upon GPU reset, kernel cleans up already submitted jobs
via drm_sched_cleanup_jobs.
This schedules ib's via drm_sched_main()->run_job, leading to
race condition of rings being ready or not, since during reset
rings may be suspended.

NAK, exactly that's what should not happen.

The scheduler should be suspend while a GPU reset is in progress.

So you are running into a completely different race here.

Below is the series of events when the issue occurs.

(Note that as you & Andrey mentioned the scheduler has been suspended but the 
job is scheduled nonetheless.)

amdgpu :03:00.0: GPU reset begin!

...

amdgpu_device_gpu_recover stopping ring sdma0 via drm_sched_stop

...

amdgpu :03:00.0: GPU reset succeeded, trying to resume

amdgpu_do_asic_reset starting to resume blocks

...

amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...

amdgpu_device_ip_resume_phase2 resumed gfx_v9_0

amdgpu_device_ip_resume_phase2 resumed sdma_v4_0

amdgpu_device_ip_resume_phase2 resumed powerplay

...

FWIW, since the job is always NULL when "drm_sched_stop(>sched, job ? 
>base : NULL);" when called during reset, all drm_sched_stop() does

is  cancel delayed work and park the sched->thread. There is no job list to be 
iterated to de-activate or remove or update fences.

Based on all this analysis, adding a mutex is more failsafe and less intrusive 
in the current code flow and lastly seems to be logical as well, hence I 
devised this approach


Please sync up with Andrey how this was able to happen.

Regards,
Christian.


Shirish - Christian makes a good point - note that in amdgpu_device_gpu_recover 
drm_sched_stop which stop all the scheduler threads is called way before we 
suspend the HW in amdgpu_device_pre_asic_reset->amdgpu_device_ip_suspend where 
SDMA suspension is happening and where the HW ring marked as not ready - please 
provide call stack for when you hit [drm:amdgpu_job_run] *ERROR* Error 
scheduling IBs (-22) to identify the code path which tried to submit the SDMA IB

Well the most likely cause of this is that the hardware failed to resume after 
the reset.

Infact hardware resume has not yet started, when the job is scheduled, which is 
the race am trying to address with this patch.

Regards,

Shirish S

Christian.


Andrey



[How]
make GPU reset's amdgpu_device_ip_resume_phase2() &
amdgpu_ib_schedule() in amdgpu_job_run() mutually exclusive.

Signed-off-by: Shirish S 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c| 2 ++
  3 files changed, 6 insertions(+)

diff --git 

Re: [PATCH] drm/amdgpu: guard ib scheduling while in reset

2019-10-25 Thread Koenig, Christian
amdgpu_do_asic_reset starting to resume blocks

...

amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...

amdgpu_device_ip_resume_phase2 resumed gfx_v9_0

amdgpu_device_ip_resume_phase2 resumed sdma_v4_0

amdgpu_device_ip_resume_phase2 resumed powerplay

This is what's the root of the problem.

The scheduler should never be resumed before we are done with bringing back the 
hardware into an usable state.

Regards,
Christian.

Am 25.10.19 um 10:50 schrieb S, Shirish:

Here is the call trace:

Call Trace:
 dump_stack+0x4d/0x63
 amdgpu_ib_schedule+0x86/0x4b7
 ? __mod_timer+0x21e/0x244
 amdgpu_job_run+0x108/0x178
 drm_sched_main+0x253/0x2fa
 ? remove_wait_queue+0x51/0x51
 ? drm_sched_cleanup_jobs.part.12+0xda/0xda
 kthread+0x14f/0x157
 ? kthread_park+0x86/0x86
 ret_from_fork+0x22/0x40
amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)

printed via below change:

@@ -151,6 +152,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}

if (!ring->sched.ready) {
+  dump_stack();
dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", 
ring->name);
return -EINVAL;

On 10/24/2019 10:00 PM, Christian König wrote:
Am 24.10.19 um 17:06 schrieb Grodzovsky, Andrey:


On 10/24/19 7:01 AM, Christian König wrote:
Am 24.10.19 um 12:58 schrieb S, Shirish:
[Why]
Upon GPU reset, kernel cleans up already submitted jobs
via drm_sched_cleanup_jobs.
This schedules ib's via drm_sched_main()->run_job, leading to
race condition of rings being ready or not, since during reset
rings may be suspended.

NAK, exactly that's what should not happen.

The scheduler should be suspend while a GPU reset is in progress.

So you are running into a completely different race here.

Below is the series of events when the issue occurs.

(Note that as you & Andrey mentioned the scheduler has been suspended but the 
job is scheduled nonetheless.)

amdgpu :03:00.0: GPU reset begin!

...

amdgpu_device_gpu_recover stopping ring sdma0 via drm_sched_stop

...

amdgpu :03:00.0: GPU reset succeeded, trying to resume

amdgpu_do_asic_reset starting to resume blocks

...

amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...

amdgpu_device_ip_resume_phase2 resumed gfx_v9_0

amdgpu_device_ip_resume_phase2 resumed sdma_v4_0

amdgpu_device_ip_resume_phase2 resumed powerplay

...

FWIW, since the job is always NULL when "drm_sched_stop(>sched, job ? 
>base : NULL);" when called during reset, all drm_sched_stop() does

is  cancel delayed work and park the sched->thread. There is no job list to be 
iterated to de-activate or remove or update fences.

Based on all this analysis, adding a mutex is more failsafe and less intrusive 
in the current code flow and lastly seems to be logical as well, hence I 
devised this approach


Please sync up with Andrey how this was able to happen.

Regards,
Christian.


Shirish - Christian makes a good point - note that in amdgpu_device_gpu_recover 
drm_sched_stop which stop all the scheduler threads is called way before we 
suspend the HW in amdgpu_device_pre_asic_reset->amdgpu_device_ip_suspend where 
SDMA suspension is happening and where the HW ring marked as not ready - please 
provide call stack for when you hit [drm:amdgpu_job_run] *ERROR* Error 
scheduling IBs (-22) to identify the code path which tried to submit the SDMA IB

Well the most likely cause of this is that the hardware failed to resume after 
the reset.

Infact hardware resume has not yet started, when the job is scheduled, which is 
the race am trying to address with this patch.

Regards,

Shirish S

Christian.


Andrey



[How]
make GPU reset's amdgpu_device_ip_resume_phase2() &
amdgpu_ib_schedule() in amdgpu_job_run() mutually exclusive.

Signed-off-by: Shirish S 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c| 2 ++
  3 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f4d9041..7b07a47b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -973,6 +973,7 @@ struct amdgpu_device {
  boolin_gpu_reset;
  enum pp_mp1_state   mp1_state;
  struct mutex  lock_reset;
+struct mutex  lock_ib_sched;
  struct amdgpu_doorbell_index doorbell_index;
int asic_reset_res;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 676cad1..63cad74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2759,6 +2759,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
  mutex_init(>virt.vf_errors.lock);
 

Re: [PATCH] drm/amdgpu: guard ib scheduling while in reset

2019-10-25 Thread S, Shirish
Here is the call trace:

Call Trace:
 dump_stack+0x4d/0x63
 amdgpu_ib_schedule+0x86/0x4b7
 ? __mod_timer+0x21e/0x244
 amdgpu_job_run+0x108/0x178
 drm_sched_main+0x253/0x2fa
 ? remove_wait_queue+0x51/0x51
 ? drm_sched_cleanup_jobs.part.12+0xda/0xda
 kthread+0x14f/0x157
 ? kthread_park+0x86/0x86
 ret_from_fork+0x22/0x40
amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)

printed via below change:

@@ -151,6 +152,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}

if (!ring->sched.ready) {
+  dump_stack();
dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", 
ring->name);
return -EINVAL;

On 10/24/2019 10:00 PM, Christian König wrote:
Am 24.10.19 um 17:06 schrieb Grodzovsky, Andrey:


On 10/24/19 7:01 AM, Christian König wrote:
Am 24.10.19 um 12:58 schrieb S, Shirish:
[Why]
Upon GPU reset, kernel cleans up already submitted jobs
via drm_sched_cleanup_jobs.
This schedules ib's via drm_sched_main()->run_job, leading to
race condition of rings being ready or not, since during reset
rings may be suspended.

NAK, exactly that's what should not happen.

The scheduler should be suspend while a GPU reset is in progress.

So you are running into a completely different race here.

Below is the series of events when the issue occurs.

(Note that as you & Andrey mentioned the scheduler has been suspended but the 
job is scheduled nonetheless.)

amdgpu :03:00.0: GPU reset begin!

...

amdgpu_device_gpu_recover stopping ring sdma0 via drm_sched_stop

...

amdgpu :03:00.0: GPU reset succeeded, trying to resume

amdgpu_do_asic_reset starting to resume blocks

...

amdgpu :03:00.0: couldn't schedule ib on ring 
[drm:amdgpu_job_run] *ERROR* Error scheduling IBs (-22)
...

amdgpu_device_ip_resume_phase2 resumed gfx_v9_0

amdgpu_device_ip_resume_phase2 resumed sdma_v4_0

amdgpu_device_ip_resume_phase2 resumed powerplay

...

FWIW, since the job is always NULL when "drm_sched_stop(>sched, job ? 
>base : NULL);" when called during reset, all drm_sched_stop() does

is  cancel delayed work and park the sched->thread. There is no job list to be 
iterated to de-activate or remove or update fences.

Based on all this analysis, adding a mutex is more failsafe and less intrusive 
in the current code flow and lastly seems to be logical as well, hence I 
devised this approach


Please sync up with Andrey how this was able to happen.

Regards,
Christian.


Shirish - Christian makes a good point - note that in amdgpu_device_gpu_recover 
drm_sched_stop which stop all the scheduler threads is called way before we 
suspend the HW in amdgpu_device_pre_asic_reset->amdgpu_device_ip_suspend where 
SDMA suspension is happening and where the HW ring marked as not ready - please 
provide call stack for when you hit [drm:amdgpu_job_run] *ERROR* Error 
scheduling IBs (-22) to identify the code path which tried to submit the SDMA IB

Well the most likely cause of this is that the hardware failed to resume after 
the reset.

Infact hardware resume has not yet started, when the job is scheduled, which is 
the race am trying to address with this patch.

Regards,

Shirish S

Christian.


Andrey



[How]
make GPU reset's amdgpu_device_ip_resume_phase2() &
amdgpu_ib_schedule() in amdgpu_job_run() mutually exclusive.

Signed-off-by: Shirish S 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c| 2 ++
  3 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f4d9041..7b07a47b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -973,6 +973,7 @@ struct amdgpu_device {
  boolin_gpu_reset;
  enum pp_mp1_state   mp1_state;
  struct mutex  lock_reset;
+struct mutex  lock_ib_sched;
  struct amdgpu_doorbell_index doorbell_index;
int asic_reset_res;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 676cad1..63cad74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2759,6 +2759,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
  mutex_init(>virt.vf_errors.lock);
  hash_init(adev->mn_hash);
  mutex_init(>lock_reset);
+mutex_init(>lock_ib_sched);
  mutex_init(>virt.dpm_mutex);
  mutex_init(>psp.mutex);
  @@ -3795,7 +3796,9 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info 
*hive,
  if (r)
  return r;
  +mutex_lock(_adev->lock_ib_sched);
  r = amdgpu_device_ip_resume_phase2(tmp_adev);
+mutex_unlock(_adev->lock_ib_sched);
  if (r)
  goto out;
  

Re: [PATCH 1/2] drm/sched: Set error to s_fence if HW job submission failed.

2019-10-25 Thread Christian König

Am 24.10.19 um 21:57 schrieb Andrey Grodzovsky:

Problem:
When run_job fails and HW fence returned is NULL we still signal
the s_fence to avoid hangs but the user has no way of knowing if
the actual HW job was ran and finished.

Fix:
Allow .run_job implementations to return ERR_PTR in the fence pointer
returned and then set this error for s_fence->finished fence so whoever
wait on this fence can inspect the signaled fence for an error.

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/scheduler/sched_main.c | 19 ---
  1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 9a0ee74..f39b97e 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -479,6 +479,7 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler 
*sched)
struct drm_sched_job *s_job, *tmp;
uint64_t guilty_context;
bool found_guilty = false;
+   struct dma_fence *fence;
  
  	list_for_each_entry_safe(s_job, tmp, >ring_mirror_list, node) {

struct drm_sched_fence *s_fence = s_job->s_fence;
@@ -492,7 +493,16 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler 
*sched)
dma_fence_set_error(_fence->finished, -ECANCELED);
  
  		dma_fence_put(s_job->s_fence->parent);

-   s_job->s_fence->parent = sched->ops->run_job(s_job);
+   fence = sched->ops->run_job(s_job);
+
+   if (IS_ERR_OR_NULL(fence)) {
+   s_job->s_fence->parent = NULL;
+   dma_fence_set_error(_fence->finished, PTR_ERR(fence));
+   } else {
+   s_job->s_fence->parent = fence;
+   }
+
+


Maybe time for a drm_sched_run_job() function which does that handling? 
And why don't we need to install the callback here?


Apart from that looks good to me,
Christian.


}
  }
  EXPORT_SYMBOL(drm_sched_resubmit_jobs);
@@ -720,7 +730,7 @@ static int drm_sched_main(void *param)
fence = sched->ops->run_job(sched_job);
drm_sched_fence_scheduled(s_fence);
  
-		if (fence) {

+   if (!IS_ERR_OR_NULL(fence)) {
s_fence->parent = dma_fence_get(fence);
r = dma_fence_add_callback(fence, _job->cb,
   drm_sched_process_job);
@@ -730,8 +740,11 @@ static int drm_sched_main(void *param)
DRM_ERROR("fence add callback failed (%d)\n",
  r);
dma_fence_put(fence);
-   } else
+   } else {
+
+   dma_fence_set_error(_fence->finished, PTR_ERR(fence));
drm_sched_process_job(NULL, _job->cb);
+   }
  
  		wake_up(>job_scheduled);

}


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] Cleanup: replace prefered with preferred

2019-10-25 Thread Greg Kroah-Hartman
On Thu, Oct 24, 2019 at 03:26:28PM +0300, Jani Nikula wrote:
> On Wed, 23 Oct 2019, Mark Salyzyn  wrote:
> > I will split this between pure and inert documentation/comments for now, 
> > with a followup later for the code portion which understandably is more 
> > controversial.
> 
> Please split by driver/subsystem too, and it'll be all around much
> easier for everyone.

I agree, spelling fixes are trivial and should go in per-subsystem.

thanks,

greg k-h
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] Cleanup: replace prefered with preferred

2019-10-25 Thread Greg Kroah-Hartman
On Wed, Oct 23, 2019 at 08:40:59AM -0700, Mark Salyzyn wrote:
> On 10/23/19 4:56 AM, Jarkko Sakkinen wrote:
> > On Tue, Oct 22, 2019 at 02:41:45PM -0700, Mark Salyzyn wrote:
> > > Replace all occurrences of prefered with preferred to make future
> > > checkpatch.pl's happy.  A few places the incorrect spelling is
> > > matched with the correct spelling to preserve existing user space API.
> > > 
> > > Signed-off-by: Mark Salyzyn 
> > I'd fix such things when the code is otherwise change and scope this
> > patch only to Documentation/. There is no pragmatic benefit of doing
> > this for the code.
> > 
> > /Jarkko
> 
> The pragmatic benefit comes with the use of an ABI/API checker (which is a
> 'distro' thing, not a top of tree kernel thing) produces its map which is
> typically required to be co-located in the same tree as the kernel
> repository. Quite a few ABI/API update checkins result in a checkpatch.pl
> complaint about the misspelled elements being (re-)recorded due to
> proximity. We have a separate task to improve how it is tracked in Android
> to reduce milepost marker changes that result in sweeping changes to the
> database which would reduce the occurrences.

Requiring checkpatch spelling warnings to be correct based on function
names is crazy, you should fix your tools if you are requiring something
as looney as that :)

> I will split this between pure and inert documentation/comments for now,
> with a followup later for the code portion which understandably is more
> controversial.

Please break up per subsystem, like all trivial patches, as this
isn't anything special.

thanks,

greg k-h
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] Cleanup: replace prefered with preferred

2019-10-25 Thread Jarkko Sakkinen
On Wed, Oct 23, 2019 at 08:40:59AM -0700, Mark Salyzyn wrote:
> On 10/23/19 4:56 AM, Jarkko Sakkinen wrote:
> > On Tue, Oct 22, 2019 at 02:41:45PM -0700, Mark Salyzyn wrote:
> > > Replace all occurrences of prefered with preferred to make future
> > > checkpatch.pl's happy.  A few places the incorrect spelling is
> > > matched with the correct spelling to preserve existing user space API.
> > > 
> > > Signed-off-by: Mark Salyzyn 
> > I'd fix such things when the code is otherwise change and scope this
> > patch only to Documentation/. There is no pragmatic benefit of doing
> > this for the code.
> > 
> > /Jarkko
> 
> The pragmatic benefit comes with the use of an ABI/API checker (which is a
> 'distro' thing, not a top of tree kernel thing) produces its map which is
> typically required to be co-located in the same tree as the kernel
> repository. Quite a few ABI/API update checkins result in a checkpatch.pl
> complaint about the misspelled elements being (re-)recorded due to
> proximity. We have a separate task to improve how it is tracked in Android
> to reduce milepost marker changes that result in sweeping changes to the
> database which would reduce the occurrences.
> 
> I will split this between pure and inert documentation/comments for now,
> with a followup later for the code portion which understandably is more
> controversial.
> 
> Cleanup is the least appreciated part of kernel maintenance ;-}.
> 
> Sincerely -- Mark Salyzyn

I'm a strong believer of "evolutionary" approach. Patch sets for the
most part (everything in the end has to be considered case by case, not
a strict rule) should have some functional changes involved.

What I do require for the parts that I maintain is that any new change
will result cleaner code base than the one that existed before that
change was applied. Again, there are some exceptions to this e.g.
circulating a firmware bug but this is my driving guideline as a
maintainer.

Doing cleanups just for cleanups can sometimes add unnecessary merge
conflicts when backporting patches to stable kernels. Thus, if you are
doing just a cleanup you should have extremely good reasons to do so.

/Jarkko
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amd/amdgpu: make undeclared variables static

2019-10-25 Thread Wambui Karuga
On Wed, Oct 23, 2019 at 03:09:34PM +, Harry Wentland wrote:
> On 2019-10-19 3:24 a.m., Wambui Karuga wrote:
> > Make the `amdgpu_lockup_timeout` and `amdgpu_exp_hw_support` variables
> > static to remove the following sparse warnings:
> > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c:103:19: warning: symbol 
> > 'amdgpu_lockup_timeout' was not declared. Should it be static?
> 
> This should be declared in amdgpu.h. amdgpu is maintained on the
> amd-staging-drm-next branch from
> https://cgit.freedesktop.org/~agd5f/linux/?h=amd-staging-drm-next. Can
> you check there?
> 
Hey Harry,
I checked the amd-staging-drm-next branch, and 'amdgpu_lockup_timeout'
is already declared as extern in amdgpu.h, so sparse only warns about
'amdgpu_exp_hw_support'. 
I'll do the same for 'amdgpu_exp_hw_support' and send an update patch
series for this and the "_LENTH" mispelling.

Thanks,
wambui karuga
> > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c:117:18: warning: symbol 
> > 'amdgpu_exp_hw_support' was not declared. Should it be static?
> > 
> > Signed-off-by: Wambui Karuga 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > index 3fae1007143e..c5b3c0c9193b 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > @@ -100,7 +100,7 @@ int amdgpu_disp_priority = 0;
> >  int amdgpu_hw_i2c = 0;
> >  int amdgpu_pcie_gen2 = -1;
> >  int amdgpu_msi = -1;
> > -char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENTH];
> > +static char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENTH];
> >  int amdgpu_dpm = -1;
> >  int amdgpu_fw_load_type = -1;
> >  int amdgpu_aspm = -1;
> > @@ -114,7 +114,7 @@ int amdgpu_vm_block_size = -1;
> >  int amdgpu_vm_fault_stop = 0;
> >  int amdgpu_vm_debug = 0;
> >  int amdgpu_vm_update_mode = -1;
> > -int amdgpu_exp_hw_support = 0;
> > +static int amdgpu_exp_hw_support;
> 
> This is indeed only used in this file but for consistency's sake it's
> probably better to also declare it in amdgpu.h rather than make it
> static here.
> 
> Harry
> 
> >  int amdgpu_dc = -1;
> >  int amdgpu_sched_jobs = 32;
> >  int amdgpu_sched_hw_submission = 2;
> > 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 0/3] drm/amdgpu: fix stack alignment ABI mismatch

2019-10-25 Thread Nick Desaulniers
On Wed, Oct 16, 2019 at 4:02 PM Nick Desaulniers
 wrote:
>
> The x86 kernel is compiled with an 8B stack alignment via
> `-mpreferred-stack-boundary=3` for GCC since 3.6-rc1 via
> commit d9b0cde91c60 ("x86-64, gcc: Use -mpreferred-stack-boundary=3 if 
> supported")
> or `-mstack-alignment=8` for Clang. Parts of the AMDGPU driver are
> compiled with 16B stack alignment.
>
> Generally, the stack alignment is part of the ABI. Linking together two
> different translation units with differing stack alignment is dangerous,
> particularly when the translation unit with the smaller stack alignment
> makes calls into the translation unit with the larger stack alignment.
> While 8B aligned stacks are sometimes also 16B aligned, they are not
> always.
>
> Multiple users have reported General Protection Faults (GPF) when using
> the AMDGPU driver compiled with Clang. Clang is placing objects in stack
> slots assuming the stack is 16B aligned, and selecting instructions that
> require 16B aligned memory operands.
>
> At runtime, syscall handlers with 8B aligned stack call into code that
> assumes 16B stack alignment.  When the stack is a multiple of 8B but not
> 16B, these instructions result in a GPF.
>
> Remove the code that added compatibility between the differing compiler
> flags, as it will result in runtime GPFs when built with Clang.
>
> The series is broken into 3 patches, the first is an important fix for
> Clang for ChromeOS. The rest are attempted cleanups for GCC, but require
> additional boot testing. The first patch is critical, the rest are nice
> to have. I've compile tested the series with ToT Clang, GCC 4.9, and GCC
> 8.3 **but** I do not have hardware to test on, so I need folks with the
> above compilers and relevant hardware to help test the series.
>
> The first patch is a functional change for Clang only. It does not
> change anything for any version of GCC. Yuxuan boot tested a previous
> incarnation on hardware, but I've changed it enough that I think it made
> sense to drop the previous tested by tag.

Thanks for testing the first patch Shirish. Are you or Yuxuan able to
test the rest of the series with any combination of {clang|gcc < 7.1|
gcc >= 7.1} on hardware and report your findings?

>
> The second patch is a functional change for GCC 7.1+ only. It does not
> affect older versions of GCC or Clang (though if someone wanted to
> double check with pre-GCC 7.1 it wouldn't hurt).  It should be boot
> tested on GCC 7.1+ on the relevant hardware.
>
> The final patch is also a functional change for GCC 7.1+ only. It does
> not affect older versions of GCC or Clang. It should be boot tested on
> GCC 7.1+ on the relevant hardware. Theoretically, there may be an issue
> with it, and it's ok to drop it. The series was intentional broken into
> 3 in order to allow them to be incrementally tested and accepted. It's
> ok to take earlier patches without the later patches.
>
> And finally, I do not condone linking object files of differing stack
> alignments.  Idealistically, we'd mark the driver broken for pre-GCC
> 7.1.  Pragmatically, "if it ain't broke, don't fix it."

Harry, Alex,
Thoughts on the series? Has AMD been able to stress test these more internally?

>
> Nick Desaulniers (3):
>   drm/amdgpu: fix stack alignment ABI mismatch for Clang
>   drm/amdgpu: fix stack alignment ABI mismatch for GCC 7.1+
>   drm/amdgpu: enable -msse2 for GCC 7.1+ users
>
>  drivers/gpu/drm/amd/display/dc/calcs/Makefile | 19 ---
>  drivers/gpu/drm/amd/display/dc/dcn20/Makefile | 19 ---
>  drivers/gpu/drm/amd/display/dc/dcn21/Makefile | 19 ---
>  drivers/gpu/drm/amd/display/dc/dml/Makefile   | 19 ---
>  drivers/gpu/drm/amd/display/dc/dsc/Makefile   | 19 ---
>  5 files changed, 60 insertions(+), 35 deletions(-)
>
> --
> 2.23.0.700.g56cf767bdb-goog
>


-- 
Thanks,
~Nick Desaulniers
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH] drm/ttm: use the parent resv for ghost objects v3

2019-10-25 Thread Huang, Ray
Reviewed-by: Huang Rui 

-Original Message-
From: amd-gfx  On Behalf Of Christian 
König
Sent: Thursday, October 24, 2019 7:17 PM
To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; Zhou, 
David(ChunMing) 
Subject: [PATCH] drm/ttm: use the parent resv for ghost objects v3

This way the TTM is destroyed with the correct dma_resv object locked and we 
can even pipeline imported BO evictions.

v2: Limit this to only cases when the parent object uses a separate
reservation object as well. This fixes another OOM problem.
v3: fix init and try_lock on the wrong object

Signed-off-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 20 +++-
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 73a1b0186029..f7b57ca1a95b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -516,9 +516,11 @@ static int ttm_buffer_object_transfer(struct 
ttm_buffer_object *bo,
kref_init(>base.kref);
fbo->base.destroy = _transfered_destroy;
fbo->base.acc_size = 0;
-   fbo->base.base.resv = >base.base._resv;
-   dma_resv_init(fbo->base.base.resv);
-   ret = dma_resv_trylock(fbo->base.base.resv);
+   if (bo->base.resv == >base._resv)
+   fbo->base.base.resv = >base.base._resv;
+
+   dma_resv_init(>base.base._resv);
+   ret = dma_resv_trylock(>base.base._resv);
WARN_ON(!ret);
 
*new_obj = >base;
@@ -715,7 +717,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
if (ret)
return ret;
 
-   dma_resv_add_excl_fence(ghost_obj->base.resv, fence);
+   dma_resv_add_excl_fence(_obj->base._resv, fence);
 
/**
 * If we're not moving to fixed memory, the TTM object @@ 
-728,7 +730,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
else
bo->ttm = NULL;
 
-   ttm_bo_unreserve(ghost_obj);
+   dma_resv_unlock(_obj->base._resv);
ttm_bo_put(ghost_obj);
}
 
@@ -771,7 +773,7 @@ int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
if (ret)
return ret;
 
-   dma_resv_add_excl_fence(ghost_obj->base.resv, fence);
+   dma_resv_add_excl_fence(_obj->base._resv, fence);
 
/**
 * If we're not moving to fixed memory, the TTM object @@ 
-784,7 +786,7 @@ int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
else
bo->ttm = NULL;
 
-   ttm_bo_unreserve(ghost_obj);
+   dma_resv_unlock(_obj->base._resv);
ttm_bo_put(ghost_obj);
 
} else if (from->flags & TTM_MEMTYPE_FLAG_FIXED) { @@ -840,7 +842,7 @@ 
int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
if (ret)
return ret;
 
-   ret = dma_resv_copy_fences(ghost->base.resv, bo->base.resv);
+   ret = dma_resv_copy_fences(>base._resv, bo->base.resv);
/* Last resort, wait for the BO to be idle when we are OOM */
if (ret)
ttm_bo_wait(bo, false, false);
@@ -849,7 +851,7 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
bo->mem.mem_type = TTM_PL_SYSTEM;
bo->ttm = NULL;
 
-   ttm_bo_unreserve(ghost);
+   dma_resv_unlock(>base._resv);
ttm_bo_put(ghost);
 
return 0;
--
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: simplify padding calculations

2019-10-25 Thread Koenig, Christian
Am 25.10.19 um 01:44 schrieb Tuikov, Luben:
> Simplify padding calculations.
>
> Signed-off-by: Luben Tuikov 
> ---
>   drivers/gpu/drm/amd/amdgpu/cik_sdma.c  |  4 ++--
>   drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c |  4 ++--
>   drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c |  4 ++--
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  4 ++--
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 14 +-
>   5 files changed, 17 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c 
> b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> index c45304f1047c..1ea9e18d6f08 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> @@ -228,7 +228,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring 
> *ring,
>   u32 extra_bits = vmid & 0xf;
>   
>   /* IB packet must end on a 8 DW boundary */
> - cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 
> 8);
> + cik_sdma_ring_insert_nop(ring, (4-lower_32_bits(ring->wptr)) & 7);
>   
>   amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, 
> extra_bits));
>   amdgpu_ring_write(ring, ib->gpu_addr & 0xffe0); /* base must be 32 
> byte aligned */
> @@ -811,7 +811,7 @@ static void cik_sdma_ring_pad_ib(struct amdgpu_ring 
> *ring, struct amdgpu_ib *ib)
>   u32 pad_count;
>   int i;
>   
> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> + pad_count = (-ib->length_dw) & 7;
>   for (i = 0; i < pad_count; i++)
>   if (sdma && sdma->burst_nop && (i == 0))
>   ib->ptr[ib->length_dw++] =
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> index a10175838013..d340f179401a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> @@ -255,7 +255,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring 
> *ring,
>   unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>   
>   /* IB packet must end on a 8 DW boundary */
> - sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) 
> % 8);
> + sdma_v2_4_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
>   
>   amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
> SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -750,7 +750,7 @@ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring 
> *ring, struct amdgpu_ib *ib
>   u32 pad_count;
>   int i;
>   
> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> + pad_count = (-ib->length_dw) & 7;
>   for (i = 0; i < pad_count; i++)
>   if (sdma && sdma->burst_nop && (i == 0))
>   ib->ptr[ib->length_dw++] =
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> index 5f4e2c616241..5c3c310188b6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> @@ -429,7 +429,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring 
> *ring,
>   unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>   
>   /* IB packet must end on a 8 DW boundary */
> - sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) 
> % 8);
> + sdma_v3_0_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
>   
>   amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
> SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -1021,7 +1021,7 @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring 
> *ring, struct amdgpu_ib *ib
>   u32 pad_count;
>   int i;
>   
> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> + pad_count = (-ib->length_dw) & 7;
>   for (i = 0; i < pad_count; i++)
>   if (sdma && sdma->burst_nop && (i == 0))
>   ib->ptr[ib->length_dw++] =
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index 45bd538ba97e..7c71c88e38a4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -698,7 +698,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring 
> *ring,
>   unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>   
>   /* IB packet must end on a 8 DW boundary */
> - sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) 
> % 8);
> + sdma_v4_0_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
>   
>   amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
> SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -1580,7 +1580,7 @@ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring 
> *ring, struct amdgpu_ib *ib
>   u32 pad_count;
>   int i;
>   
> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> + pad_count = (-ib->length_dw) & 7;
>   for (i = 0; i < pad_count; i++)
>   if (sdma && sdma->burst_nop && (i == 0))
>   ib->ptr[ib->length_dw++] =

Re: [PATCH] drm/amdgpu: GFX9, GFX10: GRBM requires 1-cycle delay

2019-10-25 Thread Koenig, Christian
Am 24.10.19 um 23:16 schrieb Tuikov, Luben:
> The GRBM interface is now capable of bursting
> 1-cycle op per register, a WRITE followed by
> another WRITE, or a WRITE followed by a READ--much
> faster than previous muti-cycle per
> completed-transaction interface. This causes a
> problem, whereby status registers requiring a
> read/write by hardware, have a 1-cycle delay, due
> to the register update having to go through GRBM
> interface.
>
> This patch adds this delay.
>
> A one cycle read op is added after updating the
> invalidate request and before reading the
> invalidate-ACK status.

Please completely drop all changes for GFX9 since this patch will most 
likely break SRIOV.

Additional to that please apply the workaround only to SDMA since the CP 
driven engines should handle that in firmware.

Regards,
Christian.

>
> See also commit
> 534991731cb5fa94b5519957646cf849ca10d17d.
>
> Signed-off-by: Luben Tuikov 
> ---
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 ++--
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 4 ++--
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 9 +
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 8 
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +-
>   5 files changed, 22 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index ac43b1af69e3..0042868dbd53 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -5129,7 +5129,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v10_0_ring_funcs_gfx = {
>   5 + /* COND_EXEC */
>   7 + /* PIPELINE_SYNC */
>   SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
>   2 + /* VM_FLUSH */
>   8 + /* FENCE for VM_FLUSH */
>   20 + /* GDS switch */
> @@ -5182,7 +5182,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v10_0_ring_funcs_compute = {
>   5 + /* hdp invalidate */
>   7 + /* gfx_v10_0_ring_emit_pipeline_sync */
>   SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
>   2 + /* gfx_v10_0_ring_emit_vm_flush */
>   8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm 
> fence */
>   .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 9fe95e7693d5..9a7a717208de 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -6218,7 +6218,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v9_0_ring_funcs_gfx = {
>   5 +  /* COND_EXEC */
>   7 +  /* PIPELINE_SYNC */
>   SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
>   2 + /* VM_FLUSH */
>   8 +  /* FENCE for VM_FLUSH */
>   20 + /* GDS switch */
> @@ -6271,7 +6271,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v9_0_ring_funcs_compute = {
>   5 + /* hdp invalidate */
>   7 + /* gfx_v9_0_ring_emit_pipeline_sync */
>   SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
>   2 + /* gfx_v9_0_ring_emit_vm_flush */
>   8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm 
> fence */
>   .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 6e1b25bd1fe7..100d526e9a42 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -346,6 +346,15 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct 
> amdgpu_ring *ring,
>   
>   amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
>   
> + /* Insert a dummy read to delay one cycle before the ACK
> +  * inquiry.
> +  */
> + if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA ||
> + ring->funcs->type == AMDGPU_RING_TYPE_GFX  ||
> + ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
> + amdgpu_ring_emit_reg_wait(ring,
> +   hub->vm_inv_eng0_req + eng, 0, 0);
> +
>   /* wait for the invalidate to complete */
>   amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> 1 << vmid, 1 << vmid);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 9f2a893871ec..8f3097e45299 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@