Re: [PATCH hmm 00/15] Consolidate the mmu notifier interval_tree and locking

2019-10-24 Thread Jason Gunthorpe
On Wed, Oct 23, 2019 at 12:52:23PM -0400, Jerome Glisse wrote:
> > Going another step further what hinders us to put the lock into the mmu
> > range notifier itself and have _lock()/_unlock() helpers?
> > 
> > I mean having the lock in the driver only makes sense when the driver would
> > be using the same lock for multiple things, e.g. multiple MMU range
> > notifiers under the same lock. But I really don't see that use case here.
> 
> I actualy do, nouveau use one lock to protect the page table and that's the
> lock that matter. You can have multiple range for a single page table, idea
> being only a sub-set of the process address space is ever accessed by the
> GPU and those it is better to focus on this sub-set and track invalidation in
> a finer grain.

mlx5 is similar, but not currently coded quite right, there is one
lock that protects the command queue for submitting invalidations to
the HW and it doesn't make a lot of sense to have additional fine
grained locking beyond that.

So I suppose the intent here that most drivers would have a single
'page table lock' that protects the HW's page table update, and this
lock is the one that should be held while upating and checking the
sequence number.

dma_fence based drivers are possibly a little different, I think they
can just use a spinlock, their pattern should probably be something
like

fault:
 hmm_range_fault()

 spin_lock()
 if (mmu_range_read_retry()))
 goto again
 dma_fence_init(mrn->fence)
 spin_unlock()

invalidate:
 spin_lock()
 is_inited = 'dma fence init has been called'
 spin_unlock()
 if (is_inited)
dma_fence_wait(fence)


I'm not sure, never used dma_fence before. The key thing is that the
dma_fence_wait() cannot block until after the mmu_range_read_retry() &
unlock completes. Otherwise it can deadlock with hmm_range_fault().

It would be nice to figure this out and add it to the hmm.rst as we do
have two drivers using the dma_fence scheme.

Also, the use of a spinlock here probably says we should keep the lock
external.

But, it sounds like the mmu_range_notifier_update_seq() is a good
idea, so let me add that in v2.

Jason
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: remove unused variable in amdgpu_gfx_kiq_free_ring

2019-10-24 Thread Nirmoy Das
Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 3 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 3 +--
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 2 +-
 5 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 069515f57c2a..c9d1fada6188 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -319,8 +319,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
return r;
 }
 
-void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
- struct amdgpu_irq_src *irq)
+void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
 {
amdgpu_device_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
amdgpu_ring_fini(ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 35eff9e6ce16..459aa9059542 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -330,8 +330,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
 struct amdgpu_ring *ring,
 struct amdgpu_irq_src *irq);
 
-void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
- struct amdgpu_irq_src *irq);
+void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring);
 
 void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev);
 int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 8fca6ab5fa8f..ac43b1af69e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -1443,7 +1443,7 @@ static int gfx_v10_0_sw_fini(void *handle)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
 
amdgpu_gfx_mqd_sw_fini(adev);
-   amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
+   amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
amdgpu_gfx_kiq_fini(adev);
 
gfx_v10_0_pfp_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index a7fe0ea24d1f..e4c645da4e28 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -2103,7 +2103,7 @@ static int gfx_v8_0_sw_fini(void *handle)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
 
amdgpu_gfx_mqd_sw_fini(adev);
-   amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
+   amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
amdgpu_gfx_kiq_fini(adev);
 
gfx_v8_0_mec_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index dd345fcedb97..9fe95e7693d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2153,7 +2153,7 @@ static int gfx_v9_0_sw_fini(void *handle)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
 
amdgpu_gfx_mqd_sw_fini(adev);
-   amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
+   amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
amdgpu_gfx_kiq_fini(adev);
 
gfx_v9_0_mec_fini(adev);
-- 
2.23.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] dc.c:use kzalloc without test

2019-10-24 Thread zhongshiqi
dc.c:583:null check is needed after using kzalloc function

Signed-off-by: zhongshiqi 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 5d1aded..4b8819c 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -580,6 +580,10 @@ static bool construct(struct dc *dc,
 #ifdef CONFIG_DRM_AMD_DC_DCN2_0
// Allocate memory for the vm_helper
dc->vm_helper = kzalloc(sizeof(struct vm_helper), GFP_KERNEL);
+   if (!dc->vm_helper) {
+   dm_error("%s: failed to create dc->vm_helper\n", __func__);
+   goto fail;
+   }
 
 #endif
memcpy(&dc->bb_overrides, &init_params->bb_overrides, 
sizeof(dc->bb_overrides));
-- 
2.9.5

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH hmm 00/15] Consolidate the mmu notifier interval_tree and locking

2019-10-24 Thread Christoph Hellwig
On Wed, Oct 23, 2019 at 05:24:45PM +, Jason Gunthorpe wrote:
> mlx5 is similar, but not currently coded quite right, there is one
> lock that protects the command queue for submitting invalidations to
> the HW and it doesn't make a lot of sense to have additional fine
> grained locking beyond that.

IFF all drivers could agree on a lock type (rw_semaphore?) for this
protection you could add a pointer to the range which would clear
things up a lot.  I'm just not sure you could get everyone to
agree.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amd/powerplay: modify the parameters of SMU_MSG_PowerUpVcn to 0

2019-10-24 Thread chen gong
The parameters what SMU_MSG_PowerUpVcn need is 0, not 1

Signed-off-by: chen gong 
---
 drivers/gpu/drm/amd/powerplay/renoir_ppt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c 
b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
index 45c5f54..4a97519 100644
--- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
@@ -282,7 +282,7 @@ static int renoir_dpm_set_uvd_enable(struct smu_context 
*smu, bool enable)
if (enable) {
/* vcn dpm on is a prerequisite for vcn power gate messages */
if (smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) {
-   ret = smu_send_smc_msg_with_param(smu, 
SMU_MSG_PowerUpVcn, 1);
+   ret = smu_send_smc_msg_with_param(smu, 
SMU_MSG_PowerUpVcn, 0);
if (ret)
return ret;
}
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH] drm/amd/powerplay: modify the parameters of SMU_MSG_PowerUpVcn to 0

2019-10-24 Thread Liu, Aaron
Reviewed-by: Aaron Liu 

BR,
Aaron Liu

> -Original Message-
> From: amd-gfx  On Behalf Of chen
> gong
> Sent: Thursday, October 24, 2019 4:59 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Gong, Curry 
> Subject: [PATCH] drm/amd/powerplay: modify the parameters of
> SMU_MSG_PowerUpVcn to 0
> 
> The parameters what SMU_MSG_PowerUpVcn need is 0, not 1
> 
> Signed-off-by: chen gong 
> ---
>  drivers/gpu/drm/amd/powerplay/renoir_ppt.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
> b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
> index 45c5f54..4a97519 100644
> --- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
> +++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
> @@ -282,7 +282,7 @@ static int renoir_dpm_set_uvd_enable(struct
> smu_context *smu, bool enable)
>   if (enable) {
>   /* vcn dpm on is a prerequisite for vcn power gate messages
> */
>   if (smu_feature_is_enabled(smu,
> SMU_FEATURE_VCN_PG_BIT)) {
> - ret = smu_send_smc_msg_with_param(smu,
> SMU_MSG_PowerUpVcn, 1);
> + ret = smu_send_smc_msg_with_param(smu,
> SMU_MSG_PowerUpVcn, 0);
>   if (ret)
>   return ret;
>   }
> --
> 2.7.4
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/3] drm/amdgpu/gfx10: update gfx golden settings for navi14

2019-10-24 Thread Tianci Yin
From: "Tianci.Yin" 

update registers: mmCGTT_SPI_CLK_CTRL

Change-Id: Ib2539aae1fb0d001278b7f89c90ad6296f9fb85f
Signed-off-by: Tianci.Yin 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 11e863c4c40b..22d0fade9c71 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -140,7 +140,7 @@ static const struct soc15_reg_golden 
golden_settings_gc_10_1_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x, 
0x003c0014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0x8fff, 
0x8100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0x0fff, 
0x0100),
-   SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc000, 
0xc100),
+   SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd00, 
0x0d000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xf8ff0fff, 
0x6100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x4ff0, 
0x4100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0x8fff, 
0x8100),
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/3] drm/amdgpu/gfx10: update gfx golden settings

2019-10-24 Thread Tianci Yin
From: "Tianci.Yin" 

update registers: mmCGTT_SPI_CLK_CTRL

Change-Id: Ic64d532c61adfdeb681903f1133d9b353579ac55
Signed-off-by: Tianci.Yin 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index ac43b1af69e3..11e863c4c40b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -93,7 +93,7 @@ static const struct soc15_reg_golden 
golden_settings_gc_10_1[] =
 {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x, 
0x00400014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 
0xf8000100),
-   SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc000, 
0xc100),
+   SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd00, 
0x0d000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0x6ff0, 
0x6100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x4000, 
0x4100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0x8fff, 
0x8100),
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/3] drm/amdgpu/gfx10: update gfx golden settings for navi12

2019-10-24 Thread Tianci Yin
From: "Tianci.Yin" 

update registers: mmCGTT_SPI_CLK_CTRL

Change-Id: I35fb25be1340d8c062e0e5bfff642009a00d52cf
Signed-off-by: Tianci.Yin 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 22d0fade9c71..d126d66cb781 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -179,7 +179,7 @@ static const struct soc15_reg_golden 
golden_settings_gc_10_1_2[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x003e001f, 
0x003c0014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0x8fff, 
0x8100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0x0fff, 
0x0100),
-   SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 
0xc100),
+   SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 
0x0d000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xcfff, 
0x6100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x0fff, 
0x4100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0x8fff, 
0x8100),
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 2/3] drm/amdgpu/gfx10: update gfx golden settings for navi14

2019-10-24 Thread Xu, Feifei
Series is reviewed-by: Feifei Xu 

Thanks,
Feifei

-Original Message-
From: amd-gfx  On Behalf Of Tianci Yin
Sent: Thursday, October 24, 2019 6:10 PM
To: amd-gfx@lists.freedesktop.org
Cc: Xu, Feifei ; Xiao, Jack ; Yuan, 
Xiaojie ; Yin, Tianci (Rico) ; Zhang, 
Hawking 
Subject: [PATCH 2/3] drm/amdgpu/gfx10: update gfx golden settings for navi14

From: "Tianci.Yin" 

update registers: mmCGTT_SPI_CLK_CTRL

Change-Id: Ib2539aae1fb0d001278b7f89c90ad6296f9fb85f
Signed-off-by: Tianci.Yin 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 11e863c4c40b..22d0fade9c71 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -140,7 +140,7 @@ static const struct soc15_reg_golden 
golden_settings_gc_10_1_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x, 
0x003c0014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0x8fff, 
0x8100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0x0fff, 
0x0100),
-   SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc000, 
0xc100),
+   SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd00, 
0x0d000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xf8ff0fff, 
0x6100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x4ff0, 
0x4100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0x8fff, 
0x8100),
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/ttm: use the parent resv for ghost objects v2

2019-10-24 Thread Christian König

Ping?

Am 18.10.19 um 13:58 schrieb Christian König:

This way the TTM is destroyed with the correct dma_resv object
locked and we can even pipeline imported BO evictions.

v2: Limit this to only cases when the parent object uses a separate
 reservation object as well. This fixes another OOM problem.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/ttm/ttm_bo_util.c | 16 +---
  1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index e030c27f53cf..45e440f80b7b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -512,7 +512,9 @@ static int ttm_buffer_object_transfer(struct 
ttm_buffer_object *bo,
kref_init(&fbo->base.kref);
fbo->base.destroy = &ttm_transfered_destroy;
fbo->base.acc_size = 0;
-   fbo->base.base.resv = &fbo->base.base._resv;
+   if (bo->base.resv == &bo->base._resv)
+   fbo->base.base.resv = &fbo->base.base._resv;
+
dma_resv_init(fbo->base.base.resv);
ret = dma_resv_trylock(fbo->base.base.resv);
WARN_ON(!ret);
@@ -711,7 +713,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
if (ret)
return ret;
  
-		dma_resv_add_excl_fence(ghost_obj->base.resv, fence);

+   dma_resv_add_excl_fence(&ghost_obj->base._resv, fence);
  
  		/**

 * If we're not moving to fixed memory, the TTM object
@@ -724,7 +726,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
else
bo->ttm = NULL;
  
-		ttm_bo_unreserve(ghost_obj);

+   dma_resv_unlock(&ghost_obj->base._resv);
ttm_bo_put(ghost_obj);
}
  
@@ -767,7 +769,7 @@ int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,

if (ret)
return ret;
  
-		dma_resv_add_excl_fence(ghost_obj->base.resv, fence);

+   dma_resv_add_excl_fence(&ghost_obj->base._resv, fence);
  
  		/**

 * If we're not moving to fixed memory, the TTM object
@@ -780,7 +782,7 @@ int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
else
bo->ttm = NULL;
  
-		ttm_bo_unreserve(ghost_obj);

+   dma_resv_unlock(&ghost_obj->base._resv);
ttm_bo_put(ghost_obj);
  
  	} else if (from->flags & TTM_MEMTYPE_FLAG_FIXED) {

@@ -836,7 +838,7 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
if (ret)
return ret;
  
-	ret = dma_resv_copy_fences(ghost->base.resv, bo->base.resv);

+   ret = dma_resv_copy_fences(&ghost->base._resv, bo->base.resv);
/* Last resort, wait for the BO to be idle when we are OOM */
if (ret)
ttm_bo_wait(bo, false, false);
@@ -845,7 +847,7 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
bo->mem.mem_type = TTM_PL_SYSTEM;
bo->ttm = NULL;
  
-	ttm_bo_unreserve(ghost);

+   dma_resv_unlock(&ghost->base._resv);
ttm_bo_put(ghost);
  
  	return 0;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 2/3] drm/amdgpu/gfx10: update gfx golden settings for navi14

2019-10-24 Thread Yin, Tianci (Rico)
Thanks Feifei!

From: Xu, Feifei 
Sent: Thursday, October 24, 2019 18:12
To: Yin, Tianci (Rico) ; amd-gfx@lists.freedesktop.org 

Cc: Xiao, Jack ; Yuan, Xiaojie ; Yin, 
Tianci (Rico) ; Zhang, Hawking 
Subject: RE: [PATCH 2/3] drm/amdgpu/gfx10: update gfx golden settings for navi14

Series is reviewed-by: Feifei Xu 

Thanks,
Feifei

-Original Message-
From: amd-gfx  On Behalf Of Tianci Yin
Sent: Thursday, October 24, 2019 6:10 PM
To: amd-gfx@lists.freedesktop.org
Cc: Xu, Feifei ; Xiao, Jack ; Yuan, 
Xiaojie ; Yin, Tianci (Rico) ; Zhang, 
Hawking 
Subject: [PATCH 2/3] drm/amdgpu/gfx10: update gfx golden settings for navi14

From: "Tianci.Yin" 

update registers: mmCGTT_SPI_CLK_CTRL

Change-Id: Ib2539aae1fb0d001278b7f89c90ad6296f9fb85f
Signed-off-by: Tianci.Yin 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 11e863c4c40b..22d0fade9c71 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -140,7 +140,7 @@ static const struct soc15_reg_golden 
golden_settings_gc_10_1_1[] =
 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x, 
0x003c0014),
 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0x8fff, 
0x8100),
 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0x0fff, 
0x0100),
-   SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc000, 
0xc100),
+   SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd00, 
0x0d000100),
 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xf8ff0fff, 
0x6100),
 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x4ff0, 
0x4100),
 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0x8fff, 
0x8100),
--
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu/powerplay: add one flag to show that no one message be sent yet by SMU

2019-10-24 Thread chen gong
The value of the register mmMP1_SMN_C2PMSG_90 should be 0 when
initializing smu and after resuming smu.

Signed-off-by: chen gong 
---
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c |  3 ++-
 drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h |  1 +
 drivers/gpu/drm/amd/powerplay/smu_v12_0.c  | 15 ++-
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c 
b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 3ce01e1..d93040b 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -738,6 +738,7 @@ static int smu_early_init(void *handle)
smu->adev = adev;
smu->pm_enabled = !!amdgpu_dpm;
smu->is_apu = false;
+   smu->not_yet_sent_one_msg = true;
mutex_init(&smu->mutex);
 
return smu_set_funcs(adev);
@@ -1381,7 +1382,7 @@ static int smu_resume(void *handle)
smu_set_gfx_cgpg(&adev->smu, true);
 
smu->disable_uclk_switch = 0;
-
+   smu->not_yet_sent_one_msg = true;
pr_info("SMU is resumed successfully!\n");
 
return 0;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
index 8120e75..1c03163 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
@@ -388,6 +388,7 @@ struct smu_context
uint32_t default_power_profile_mode;
bool pm_enabled;
bool is_apu;
+   bool not_yet_sent_one_msg;
 
uint32_t smc_if_version;
 
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c 
b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c
index 139dd73..2199a39 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c
@@ -113,10 +113,11 @@ smu_v12_0_send_msg_with_param(struct smu_context *smu, 
uint16_t msg,
if (index < 0)
return index;
 
-   ret = smu_v12_0_wait_for_response(smu);
-   if (ret)
-   pr_err("Failed to send message 0x%x, response 0x%x, param 
0x%x\n",
-  index, ret, param);
+   if(!smu->not_yet_sent_one_msg){
+   ret = smu_v12_0_wait_for_response(smu);
+   if (ret)
+   pr_err("Failed to send message 0x%x, response 0x%x, 
param 0x%x\n",index, ret, param);
+   }
 
WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
 
@@ -125,9 +126,13 @@ smu_v12_0_send_msg_with_param(struct smu_context *smu, 
uint16_t msg,
smu_v12_0_send_msg_without_waiting(smu, (uint16_t)index);
 
ret = smu_v12_0_wait_for_response(smu);
-   if (ret)
+   if (ret){
pr_err("Failed to send message 0x%x, response 0x%x param 
0x%x\n",
   index, ret, param);
+   }
+   else if(smu->not_yet_sent_one_msg){
+   smu->not_yet_sent_one_msg = false;
+   }
 
return ret;
 }
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/ttm: use the parent resv for ghost objects v2

2019-10-24 Thread Zhou, David(ChunMing)

On 2019/10/24 下午6:25, Christian König wrote:
> Ping?
>
> Am 18.10.19 um 13:58 schrieb Christian König:
>> This way the TTM is destroyed with the correct dma_resv object
>> locked and we can even pipeline imported BO evictions.
>>
>> v2: Limit this to only cases when the parent object uses a separate
>>  reservation object as well. This fixes another OOM problem.
>>
>> Signed-off-by: Christian König 
>> ---
>>   drivers/gpu/drm/ttm/ttm_bo_util.c | 16 +---
>>   1 file changed, 9 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
>> b/drivers/gpu/drm/ttm/ttm_bo_util.c
>> index e030c27f53cf..45e440f80b7b 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
>> @@ -512,7 +512,9 @@ static int ttm_buffer_object_transfer(struct 
>> ttm_buffer_object *bo,
>>   kref_init(&fbo->base.kref);
>>   fbo->base.destroy = &ttm_transfered_destroy;
>>   fbo->base.acc_size = 0;
>> -    fbo->base.base.resv = &fbo->base.base._resv;
>> +    if (bo->base.resv == &bo->base._resv)
>> +    fbo->base.base.resv = &fbo->base.base._resv;
>> +
>>   dma_resv_init(fbo->base.base.resv);

Doesn't this lead to issue if you force to init parent resv? Otherwise 
how to deal with if parent->resv is locking?


>>   ret = dma_resv_trylock(fbo->base.base.resv);
>>   WARN_ON(!ret);
>> @@ -711,7 +713,7 @@ int ttm_bo_move_accel_cleanup(struct 
>> ttm_buffer_object *bo,
>>   if (ret)
>>   return ret;
>>   -    dma_resv_add_excl_fence(ghost_obj->base.resv, fence);
>> +    dma_resv_add_excl_fence(&ghost_obj->base._resv, fence);
>>     /**
>>    * If we're not moving to fixed memory, the TTM object
>> @@ -724,7 +726,7 @@ int ttm_bo_move_accel_cleanup(struct 
>> ttm_buffer_object *bo,
>>   else
>>   bo->ttm = NULL;
>>   -    ttm_bo_unreserve(ghost_obj);
>> +    dma_resv_unlock(&ghost_obj->base._resv);

fbo->base.base.resv?

-David

>>   ttm_bo_put(ghost_obj);
>>   }
>>   @@ -767,7 +769,7 @@ int ttm_bo_pipeline_move(struct 
>> ttm_buffer_object *bo,
>>   if (ret)
>>   return ret;
>>   -    dma_resv_add_excl_fence(ghost_obj->base.resv, fence);
>> +    dma_resv_add_excl_fence(&ghost_obj->base._resv, fence);
>>     /**
>>    * If we're not moving to fixed memory, the TTM object
>> @@ -780,7 +782,7 @@ int ttm_bo_pipeline_move(struct ttm_buffer_object 
>> *bo,
>>   else
>>   bo->ttm = NULL;
>>   -    ttm_bo_unreserve(ghost_obj);
>> +    dma_resv_unlock(&ghost_obj->base._resv);
>>   ttm_bo_put(ghost_obj);
>>     } else if (from->flags & TTM_MEMTYPE_FLAG_FIXED) {
>> @@ -836,7 +838,7 @@ int ttm_bo_pipeline_gutting(struct 
>> ttm_buffer_object *bo)
>>   if (ret)
>>   return ret;
>>   -    ret = dma_resv_copy_fences(ghost->base.resv, bo->base.resv);
>> +    ret = dma_resv_copy_fences(&ghost->base._resv, bo->base.resv);
>>   /* Last resort, wait for the BO to be idle when we are OOM */
>>   if (ret)
>>   ttm_bo_wait(bo, false, false);
>> @@ -845,7 +847,7 @@ int ttm_bo_pipeline_gutting(struct 
>> ttm_buffer_object *bo)
>>   bo->mem.mem_type = TTM_PL_SYSTEM;
>>   bo->ttm = NULL;
>>   -    ttm_bo_unreserve(ghost);
>> +    dma_resv_unlock(&ghost->base._resv);
>>   ttm_bo_put(ghost);
>>     return 0;
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/ttm: use the parent resv for ghost objects v2

2019-10-24 Thread Christian König

Am 24.10.19 um 12:51 schrieb Zhou, David(ChunMing):

On 2019/10/24 下午6:25, Christian König wrote:

Ping?

Am 18.10.19 um 13:58 schrieb Christian König:

This way the TTM is destroyed with the correct dma_resv object
locked and we can even pipeline imported BO evictions.

v2: Limit this to only cases when the parent object uses a separate
  reservation object as well. This fixes another OOM problem.

Signed-off-by: Christian König 
---
   drivers/gpu/drm/ttm/ttm_bo_util.c | 16 +---
   1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index e030c27f53cf..45e440f80b7b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -512,7 +512,9 @@ static int ttm_buffer_object_transfer(struct
ttm_buffer_object *bo,
   kref_init(&fbo->base.kref);
   fbo->base.destroy = &ttm_transfered_destroy;
   fbo->base.acc_size = 0;
-    fbo->base.base.resv = &fbo->base.base._resv;
+    if (bo->base.resv == &bo->base._resv)
+    fbo->base.base.resv = &fbo->base.base._resv;
+
   dma_resv_init(fbo->base.base.resv);

Doesn't this lead to issue if you force to init parent resv? Otherwise
how to deal with if parent->resv is locking?


Ups, good point. That is indeed a really bad typo added during the 
rebase. Going to fix that.


Thanks,
Christian.





   ret = dma_resv_trylock(fbo->base.base.resv);
   WARN_ON(!ret);
@@ -711,7 +713,7 @@ int ttm_bo_move_accel_cleanup(struct
ttm_buffer_object *bo,
   if (ret)
   return ret;
   -    dma_resv_add_excl_fence(ghost_obj->base.resv, fence);
+    dma_resv_add_excl_fence(&ghost_obj->base._resv, fence);
     /**
    * If we're not moving to fixed memory, the TTM object
@@ -724,7 +726,7 @@ int ttm_bo_move_accel_cleanup(struct
ttm_buffer_object *bo,
   else
   bo->ttm = NULL;
   -    ttm_bo_unreserve(ghost_obj);
+    dma_resv_unlock(&ghost_obj->base._resv);

fbo->base.base.resv?

-David


   ttm_bo_put(ghost_obj);
   }
   @@ -767,7 +769,7 @@ int ttm_bo_pipeline_move(struct
ttm_buffer_object *bo,
   if (ret)
   return ret;
   -    dma_resv_add_excl_fence(ghost_obj->base.resv, fence);
+    dma_resv_add_excl_fence(&ghost_obj->base._resv, fence);
     /**
    * If we're not moving to fixed memory, the TTM object
@@ -780,7 +782,7 @@ int ttm_bo_pipeline_move(struct ttm_buffer_object
*bo,
   else
   bo->ttm = NULL;
   -    ttm_bo_unreserve(ghost_obj);
+    dma_resv_unlock(&ghost_obj->base._resv);
   ttm_bo_put(ghost_obj);
     } else if (from->flags & TTM_MEMTYPE_FLAG_FIXED) {
@@ -836,7 +838,7 @@ int ttm_bo_pipeline_gutting(struct
ttm_buffer_object *bo)
   if (ret)
   return ret;
   -    ret = dma_resv_copy_fences(ghost->base.resv, bo->base.resv);
+    ret = dma_resv_copy_fences(&ghost->base._resv, bo->base.resv);
   /* Last resort, wait for the BO to be idle when we are OOM */
   if (ret)
   ttm_bo_wait(bo, false, false);
@@ -845,7 +847,7 @@ int ttm_bo_pipeline_gutting(struct
ttm_buffer_object *bo)
   bo->mem.mem_type = TTM_PL_SYSTEM;
   bo->ttm = NULL;
   -    ttm_bo_unreserve(ghost);
+    dma_resv_unlock(&ghost->base._resv);
   ttm_bo_put(ghost);
     return 0;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: guard ib scheduling while in reset

2019-10-24 Thread S, Shirish
[Why]
Upon GPU reset, kernel cleans up already submitted jobs
via drm_sched_cleanup_jobs.
This schedules ib's via drm_sched_main()->run_job, leading to
race condition of rings being ready or not, since during reset
rings may be suspended.

[How]
make GPU reset's amdgpu_device_ip_resume_phase2() &
amdgpu_ib_schedule() in amdgpu_job_run() mutually exclusive.

Signed-off-by: Shirish S 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c| 2 ++
 3 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f4d9041..7b07a47b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -973,6 +973,7 @@ struct amdgpu_device {
boolin_gpu_reset;
enum pp_mp1_state   mp1_state;
struct mutex  lock_reset;
+   struct mutex  lock_ib_sched;
struct amdgpu_doorbell_index doorbell_index;
 
int asic_reset_res;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 676cad1..63cad74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2759,6 +2759,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->virt.vf_errors.lock);
hash_init(adev->mn_hash);
mutex_init(&adev->lock_reset);
+   mutex_init(&adev->lock_ib_sched);
mutex_init(&adev->virt.dpm_mutex);
mutex_init(&adev->psp.mutex);
 
@@ -3795,7 +3796,9 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info 
*hive,
if (r)
return r;
 
+   mutex_lock(&tmp_adev->lock_ib_sched);
r = amdgpu_device_ip_resume_phase2(tmp_adev);
+   mutex_unlock(&tmp_adev->lock_ib_sched);
if (r)
goto out;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index e1bad99..cd6082d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -233,8 +233,10 @@ static struct dma_fence *amdgpu_job_run(struct 
drm_sched_job *sched_job)
if (finished->error < 0) {
DRM_INFO("Skip scheduling IBs!\n");
} else {
+   mutex_lock(&ring->adev->lock_ib_sched);
r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
   &fence);
+   mutex_unlock(&ring->adev->lock_ib_sched);
if (r)
DRM_ERROR("Error scheduling IBs (%d)\n", r);
}
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: guard ib scheduling while in reset

2019-10-24 Thread Christian König

Am 24.10.19 um 12:58 schrieb S, Shirish:

[Why]
Upon GPU reset, kernel cleans up already submitted jobs
via drm_sched_cleanup_jobs.
This schedules ib's via drm_sched_main()->run_job, leading to
race condition of rings being ready or not, since during reset
rings may be suspended.


NAK, exactly that's what should not happen.

The scheduler should be suspend while a GPU reset is in progress.

So you are running into a completely different race here.

Please sync up with Andrey how this was able to happen.

Regards,
Christian.



[How]
make GPU reset's amdgpu_device_ip_resume_phase2() &
amdgpu_ib_schedule() in amdgpu_job_run() mutually exclusive.

Signed-off-by: Shirish S 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c| 2 ++
  3 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f4d9041..7b07a47b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -973,6 +973,7 @@ struct amdgpu_device {
boolin_gpu_reset;
enum pp_mp1_state   mp1_state;
struct mutex  lock_reset;
+   struct mutex  lock_ib_sched;
struct amdgpu_doorbell_index doorbell_index;
  
  	int asic_reset_res;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 676cad1..63cad74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2759,6 +2759,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->virt.vf_errors.lock);
hash_init(adev->mn_hash);
mutex_init(&adev->lock_reset);
+   mutex_init(&adev->lock_ib_sched);
mutex_init(&adev->virt.dpm_mutex);
mutex_init(&adev->psp.mutex);
  
@@ -3795,7 +3796,9 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,

if (r)
return r;
  
+mutex_lock(&tmp_adev->lock_ib_sched);

r = amdgpu_device_ip_resume_phase2(tmp_adev);
+   mutex_unlock(&tmp_adev->lock_ib_sched);
if (r)
goto out;
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index e1bad99..cd6082d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -233,8 +233,10 @@ static struct dma_fence *amdgpu_job_run(struct 
drm_sched_job *sched_job)
if (finished->error < 0) {
DRM_INFO("Skip scheduling IBs!\n");
} else {
+   mutex_lock(&ring->adev->lock_ib_sched);
r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
   &fence);
+   mutex_unlock(&ring->adev->lock_ib_sched);
if (r)
DRM_ERROR("Error scheduling IBs (%d)\n", r);
}


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/ttm: use the parent resv for ghost objects v3

2019-10-24 Thread Christian König
This way the TTM is destroyed with the correct dma_resv object
locked and we can even pipeline imported BO evictions.

v2: Limit this to only cases when the parent object uses a separate
reservation object as well. This fixes another OOM problem.
v3: fix init and try_lock on the wrong object

Signed-off-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 20 +++-
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 73a1b0186029..f7b57ca1a95b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -516,9 +516,11 @@ static int ttm_buffer_object_transfer(struct 
ttm_buffer_object *bo,
kref_init(&fbo->base.kref);
fbo->base.destroy = &ttm_transfered_destroy;
fbo->base.acc_size = 0;
-   fbo->base.base.resv = &fbo->base.base._resv;
-   dma_resv_init(fbo->base.base.resv);
-   ret = dma_resv_trylock(fbo->base.base.resv);
+   if (bo->base.resv == &bo->base._resv)
+   fbo->base.base.resv = &fbo->base.base._resv;
+
+   dma_resv_init(&fbo->base.base._resv);
+   ret = dma_resv_trylock(&fbo->base.base._resv);
WARN_ON(!ret);
 
*new_obj = &fbo->base;
@@ -715,7 +717,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
if (ret)
return ret;
 
-   dma_resv_add_excl_fence(ghost_obj->base.resv, fence);
+   dma_resv_add_excl_fence(&ghost_obj->base._resv, fence);
 
/**
 * If we're not moving to fixed memory, the TTM object
@@ -728,7 +730,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
else
bo->ttm = NULL;
 
-   ttm_bo_unreserve(ghost_obj);
+   dma_resv_unlock(&ghost_obj->base._resv);
ttm_bo_put(ghost_obj);
}
 
@@ -771,7 +773,7 @@ int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
if (ret)
return ret;
 
-   dma_resv_add_excl_fence(ghost_obj->base.resv, fence);
+   dma_resv_add_excl_fence(&ghost_obj->base._resv, fence);
 
/**
 * If we're not moving to fixed memory, the TTM object
@@ -784,7 +786,7 @@ int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
else
bo->ttm = NULL;
 
-   ttm_bo_unreserve(ghost_obj);
+   dma_resv_unlock(&ghost_obj->base._resv);
ttm_bo_put(ghost_obj);
 
} else if (from->flags & TTM_MEMTYPE_FLAG_FIXED) {
@@ -840,7 +842,7 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
if (ret)
return ret;
 
-   ret = dma_resv_copy_fences(ghost->base.resv, bo->base.resv);
+   ret = dma_resv_copy_fences(&ghost->base._resv, bo->base.resv);
/* Last resort, wait for the BO to be idle when we are OOM */
if (ret)
ttm_bo_wait(bo, false, false);
@@ -849,7 +851,7 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
bo->mem.mem_type = TTM_PL_SYSTEM;
bo->ttm = NULL;
 
-   ttm_bo_unreserve(ghost);
+   dma_resv_unlock(&ghost->base._resv);
ttm_bo_put(ghost);
 
return 0;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] Cleanup: replace prefered with preferred

2019-10-24 Thread Jani Nikula
On Wed, 23 Oct 2019, Mark Salyzyn  wrote:
> I will split this between pure and inert documentation/comments for now, 
> with a followup later for the code portion which understandably is more 
> controversial.

Please split by driver/subsystem too, and it'll be all around much
easier for everyone.

BR,
Jani.


-- 
Jani Nikula, Intel Open Source Graphics Center
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/ttm: use the parent resv for ghost objects v3

2019-10-24 Thread Daniel Vetter
On Thu, Oct 24, 2019 at 01:16:32PM +0200, Christian König wrote:
> This way the TTM is destroyed with the correct dma_resv object
> locked and we can even pipeline imported BO evictions.
> 
> v2: Limit this to only cases when the parent object uses a separate
> reservation object as well. This fixes another OOM problem.
> v3: fix init and try_lock on the wrong object

Uh this makes a lot more sense with that fixed :-)

Now even feeling bold enough for a

Reviewed-by: Daniel Vetter 

> 
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/ttm/ttm_bo_util.c | 20 +++-
>  1 file changed, 11 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
> b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 73a1b0186029..f7b57ca1a95b 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -516,9 +516,11 @@ static int ttm_buffer_object_transfer(struct 
> ttm_buffer_object *bo,
>   kref_init(&fbo->base.kref);
>   fbo->base.destroy = &ttm_transfered_destroy;
>   fbo->base.acc_size = 0;
> - fbo->base.base.resv = &fbo->base.base._resv;
> - dma_resv_init(fbo->base.base.resv);
> - ret = dma_resv_trylock(fbo->base.base.resv);
> + if (bo->base.resv == &bo->base._resv)
> + fbo->base.base.resv = &fbo->base.base._resv;
> +
> + dma_resv_init(&fbo->base.base._resv);
> + ret = dma_resv_trylock(&fbo->base.base._resv);
>   WARN_ON(!ret);
>  
>   *new_obj = &fbo->base;
> @@ -715,7 +717,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object 
> *bo,
>   if (ret)
>   return ret;
>  
> - dma_resv_add_excl_fence(ghost_obj->base.resv, fence);
> + dma_resv_add_excl_fence(&ghost_obj->base._resv, fence);
>  
>   /**
>* If we're not moving to fixed memory, the TTM object
> @@ -728,7 +730,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object 
> *bo,
>   else
>   bo->ttm = NULL;
>  
> - ttm_bo_unreserve(ghost_obj);
> + dma_resv_unlock(&ghost_obj->base._resv);
>   ttm_bo_put(ghost_obj);
>   }
>  
> @@ -771,7 +773,7 @@ int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
>   if (ret)
>   return ret;
>  
> - dma_resv_add_excl_fence(ghost_obj->base.resv, fence);
> + dma_resv_add_excl_fence(&ghost_obj->base._resv, fence);
>  
>   /**
>* If we're not moving to fixed memory, the TTM object
> @@ -784,7 +786,7 @@ int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
>   else
>   bo->ttm = NULL;
>  
> - ttm_bo_unreserve(ghost_obj);
> + dma_resv_unlock(&ghost_obj->base._resv);
>   ttm_bo_put(ghost_obj);
>  
>   } else if (from->flags & TTM_MEMTYPE_FLAG_FIXED) {
> @@ -840,7 +842,7 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
>   if (ret)
>   return ret;
>  
> - ret = dma_resv_copy_fences(ghost->base.resv, bo->base.resv);
> + ret = dma_resv_copy_fences(&ghost->base._resv, bo->base.resv);
>   /* Last resort, wait for the BO to be idle when we are OOM */
>   if (ret)
>   ttm_bo_wait(bo, false, false);
> @@ -849,7 +851,7 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
>   bo->mem.mem_type = TTM_PL_SYSTEM;
>   bo->ttm = NULL;
>  
> - ttm_bo_unreserve(ghost);
> + dma_resv_unlock(&ghost->base._resv);
>   ttm_bo_put(ghost);
>  
>   return 0;
> -- 
> 2.17.1
> 
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH] drm/amd/powerplay: correct current clock level label for Arcturus

2019-10-24 Thread Deucher, Alexander
> -Original Message-
> From: amd-gfx  On Behalf Of
> Quan, Evan
> Sent: Thursday, October 24, 2019 2:25 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Li, Candice ; Quan, Evan 
> Subject: [PATCH] drm/amd/powerplay: correct current clock level label for
> Arcturus
> 
> For dpm disabled case, it's assumed the only one support clock level is always
> current clock level.
> 
> Change-Id: I5cc2b7e82af888dc5e8268597ee761e9e1a26855
> Signed-off-by: Evan Quan 

Acked-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/powerplay/arcturus_ppt.c | 24 +--
> -
>  1 file changed, 16 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
> b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
> index 1bcc5ab2873d..fcc9b6d24e8e 100644
> --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
> +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
> @@ -670,12 +670,17 @@ static int arcturus_print_clk_levels(struct
> smu_context *smu,
>   return ret;
>   }
> 
> + /*
> +  * For DPM disabled case, there will be only one clock level.
> +  * And it's safe to assume that is always the current clock.
> +  */
>   for (i = 0; i < clocks.num_levels; i++)
>   size += sprintf(buf + size, "%d: %uMhz %s\n", i,
>   clocks.data[i].clocks_in_khz / 1000,
> - arcturus_freqs_in_same_level(
> + (clocks.num_levels == 1) ? "*" :
> + (arcturus_freqs_in_same_level(
>   clocks.data[i].clocks_in_khz / 1000,
> - now / 100) ? "*" : "");
> + now / 100) ? "*" : ""));
>   break;
> 
>   case SMU_MCLK:
> @@ -695,9 +700,10 @@ static int arcturus_print_clk_levels(struct
> smu_context *smu,
>   for (i = 0; i < clocks.num_levels; i++)
>   size += sprintf(buf + size, "%d: %uMhz %s\n",
>   i, clocks.data[i].clocks_in_khz / 1000,
> - arcturus_freqs_in_same_level(
> + (clocks.num_levels == 1) ? "*" :
> + (arcturus_freqs_in_same_level(
>   clocks.data[i].clocks_in_khz / 1000,
> - now / 100) ? "*" : "");
> + now / 100) ? "*" : ""));
>   break;
> 
>   case SMU_SOCCLK:
> @@ -717,9 +723,10 @@ static int arcturus_print_clk_levels(struct
> smu_context *smu,
>   for (i = 0; i < clocks.num_levels; i++)
>   size += sprintf(buf + size, "%d: %uMhz %s\n",
>   i, clocks.data[i].clocks_in_khz / 1000,
> - arcturus_freqs_in_same_level(
> + (clocks.num_levels == 1) ? "*" :
> + (arcturus_freqs_in_same_level(
>   clocks.data[i].clocks_in_khz / 1000,
> - now / 100) ? "*" : "");
> + now / 100) ? "*" : ""));
>   break;
> 
>   case SMU_FCLK:
> @@ -739,9 +746,10 @@ static int arcturus_print_clk_levels(struct
> smu_context *smu,
>   for (i = 0; i < single_dpm_table->count; i++)
>   size += sprintf(buf + size, "%d: %uMhz %s\n",
>   i, single_dpm_table->dpm_levels[i].value,
> - arcturus_freqs_in_same_level(
> + (clocks.num_levels == 1) ? "*" :
> + (arcturus_freqs_in_same_level(
>   clocks.data[i].clocks_in_khz / 1000,
> - now / 100) ? "*" : "");
> + now / 100) ? "*" : ""));
>   break;
> 
>   default:
> --
> 2.23.0
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] Ring range argument improvements (v2)

2019-10-24 Thread StDenis, Tom
NAK, this patch breaks something in decoding the words of the packets.  
Instead of decoding I get a lot of PKT0 lines.  I'll see if I can debug 
this.


Tom

On 2019-10-23 5:11 p.m., Tuikov, Luben wrote:
> The valid contents of rings is invariably the
> range [rptr, wptr). Augment the ring range to
> interpret the '.' ("here") notation to mean rptr
> or wptr when given on the left or right of the
> range limits. This augments the range notation as
> follows:
>
> 1) No range given, print the whole ring.
>
> 2) [.] or [.:.], print [rptr, wptr],
>
> 3) [.:k], k >= 0, print [rptr, rptr + k], this is
> a range relative to the left limit, rptr, the
> consumer pointer.
>
> 4) [k:.] or [k], k >= 0, print [wptr - k, wptr], this is
> a range relative to the right limit, wptr, the
> producer pointer.
>
> 5) [k:r], both greater than 0, print [k,r] of the
> named ring. This is an absolute range limit
> notation.
>
> In any case, the ring contents is interpreted, if
> the ring contents can be interpreted.
>
> v2: Fix spelling mistake in the commit message:
> "then" --> "than".
>
> Signed-off-by: Luben Tuikov 
> ---
>   doc/umr.1   | 33 +++-
>   src/app/ring_read.c | 52 -
>   2 files changed, 55 insertions(+), 30 deletions(-)
>
> diff --git a/doc/umr.1 b/doc/umr.1
> index 1e585fa..137ff1e 100644
> --- a/doc/umr.1
> +++ b/doc/umr.1
> @@ -216,17 +216,30 @@ Disassemble 'size' bytes (in hex) from a given address 
> (in hex).  The size can b
>   specified as zero to have umr try and compute the shader size.
>   
>   .SH Ring and PM4 Decoding
> -.IP "--ring, -R (from:to)"
> -Read the contents of a ring named by the string without the
> -.B amdgpu_ring_
> -prefix.  By default it will read and display the entire ring.  A
> -starting and ending address can be specified in decimal or a '.' can
> -be used to indicate relative to the current
> +.IP "--ring, -R [range]"
> +Read the contents of the ring named by the string
> +.B amdgpu_ring_,
> +i.e. without the
> +.B amdgpu_ring
> +prefix. By default it reads and prints the entire ring.  A
> +range is optional and has the format '[start:end]'. The
> +starting and ending address are non-negative integers or
> +the '.' (dot) symbol, which indicates the
> +.B rptr
> +when on the left side and
>   .B wptr
> -pointer.  For example, "-R gfx" would read the entire gfx ring,
> -"-R gfx[0:16]" would display the contents from 0 to 16 inclusively, and
> -"-R gfx[.]" or "-R gfx[.:.]" would display the last 32 words relative
> -to rptr.
> +when on the right side of the range.
> +For instance,
> +"-R gfx" prints the entire gfx ring, "-R gfx[0:16]" prints
> +the contents from 0 to 16 inclusively, and "-R gfx[.]" or
> +"-R gfx[.:.]" prints the range [rptr,wptr]. When one of
> +the range limits is a number while the other is the dot, '.',
> +then the number indicates the relative range before or after the
> +corresponding ring pointer. For instance, "-R sdma0[16:.]"
> +prints [wptr-16, wptr] words of the SDMA0 ring, and
> +"-R sdma1[.:32]" prints [rptr, rptr+32] double-words of the
> +SDMA1 ring. The contents of the ring is always interpreted,
> +if it can be interpreted.
>   .IP "--dump-ib, -di [vmid@]address length [pm]"
>   Dump an IB packet at an address with an optional VMID.  The length is 
> specified
>   in bytes.  The type of decoder  is optional and defaults to PM4 packets.
> diff --git a/src/app/ring_read.c b/src/app/ring_read.c
> index ef0c711..9cbecb0 100644
> --- a/src/app/ring_read.c
> +++ b/src/app/ring_read.c
> @@ -28,7 +28,7 @@
>   void umr_read_ring(struct umr_asic *asic, char *ringpath)
>   {
>   char ringname[32], from[32], to[32];
> - int use_decoder, enable_decoder, gprs;
> + int  enable_decoder, gprs;
>   uint32_t wptr, rptr, drv_wptr, ringsize, start, end, value,
>*ring_data;
>   struct umr_ring_decoder decoder, *pdecoder, *ppdecoder;
> @@ -73,33 +73,46 @@ void umr_read_ring(struct umr_asic *asic, char *ringpath)
>   drv_wptr = ring_data[2]<<2;
>   
>   /* default to reading entire ring */
> - use_decoder = 0;
>   if (!from[0]) {
>   start = 0;
>   end   = ringsize-4;
>   } else {
> - if (from[0] == '.' || !to[0] || to[0] == '.') {
> - /* start from 32 words prior to rptr up to wptr */
> - end = wptr;
> - if (rptr < (31*4)) {
> - start = rptr - 31*4;
> - start += ringsize;
> + if (from[0] == '.') {
> + if (to[0] == 0 || to[0] == '.') {
> + /* Notation: [.] or [.:.], meaning
> +  * [rptr, wptr].
> +  */
> + start = rptr;
> + end = wptr;
>   } else {
> - start = rptr - 31*4;
> +

Re: [PATCH] Ring range argument improvements (v2)

2019-10-24 Thread StDenis, Tom
This diff fixes your patch, can you resend please?


diff --git a/src/app/ring_read.c b/src/app/ring_read.c
index 9cbecb0..c1c9187 100644
--- a/src/app/ring_read.c
+++ b/src/app/ring_read.c
@@ -120,16 +120,16 @@ void umr_read_ring(struct umr_asic *asic, char 
*ringpath)
     asic->asicname, ringname, (unsigned long)wptr >> 2,
     asic->asicname, ringname, (unsigned long)drv_wptr >> 2);

+   if (enable_decoder) {
+   decoder.pm4.cur_opcode = 0x;
+   decoder.sdma.cur_opcode = 0x;
+   }
     do {
     value = ring_data[(start+12)>>2];
     printf("%s.%s.ring[%s%4lu%s] == %s0x%08lx%s   ",
     asic->asicname, ringname,
     BLUE, (unsigned long)start >> 2, RST,
     YELLOW, (unsigned long)value, RST);
-   if (enable_decoder) {
-   decoder.pm4.cur_opcode = 0x;
-   decoder.sdma.cur_opcode = 0x;
-   }
     printf(" %c%c%c ",
     (start == rptr) ? 'r' : '.',
     (start == wptr) ? 'w' : '.',

On 2019-10-23 5:11 p.m., Tuikov, Luben wrote:
> The valid contents of rings is invariably the
> range [rptr, wptr). Augment the ring range to
> interpret the '.' ("here") notation to mean rptr
> or wptr when given on the left or right of the
> range limits. This augments the range notation as
> follows:
>
> 1) No range given, print the whole ring.
>
> 2) [.] or [.:.], print [rptr, wptr],
>
> 3) [.:k], k >= 0, print [rptr, rptr + k], this is
> a range relative to the left limit, rptr, the
> consumer pointer.
>
> 4) [k:.] or [k], k >= 0, print [wptr - k, wptr], this is
> a range relative to the right limit, wptr, the
> producer pointer.
>
> 5) [k:r], both greater than 0, print [k,r] of the
> named ring. This is an absolute range limit
> notation.
>
> In any case, the ring contents is interpreted, if
> the ring contents can be interpreted.
>
> v2: Fix spelling mistake in the commit message:
> "then" --> "than".
>
> Signed-off-by: Luben Tuikov 
> ---
>   doc/umr.1   | 33 +++-
>   src/app/ring_read.c | 52 -
>   2 files changed, 55 insertions(+), 30 deletions(-)
>
> diff --git a/doc/umr.1 b/doc/umr.1
> index 1e585fa..137ff1e 100644
> --- a/doc/umr.1
> +++ b/doc/umr.1
> @@ -216,17 +216,30 @@ Disassemble 'size' bytes (in hex) from a given address 
> (in hex).  The size can b
>   specified as zero to have umr try and compute the shader size.
>   
>   .SH Ring and PM4 Decoding
> -.IP "--ring, -R (from:to)"
> -Read the contents of a ring named by the string without the
> -.B amdgpu_ring_
> -prefix.  By default it will read and display the entire ring.  A
> -starting and ending address can be specified in decimal or a '.' can
> -be used to indicate relative to the current
> +.IP "--ring, -R [range]"
> +Read the contents of the ring named by the string
> +.B amdgpu_ring_,
> +i.e. without the
> +.B amdgpu_ring
> +prefix. By default it reads and prints the entire ring.  A
> +range is optional and has the format '[start:end]'. The
> +starting and ending address are non-negative integers or
> +the '.' (dot) symbol, which indicates the
> +.B rptr
> +when on the left side and
>   .B wptr
> -pointer.  For example, "-R gfx" would read the entire gfx ring,
> -"-R gfx[0:16]" would display the contents from 0 to 16 inclusively, and
> -"-R gfx[.]" or "-R gfx[.:.]" would display the last 32 words relative
> -to rptr.
> +when on the right side of the range.
> +For instance,
> +"-R gfx" prints the entire gfx ring, "-R gfx[0:16]" prints
> +the contents from 0 to 16 inclusively, and "-R gfx[.]" or
> +"-R gfx[.:.]" prints the range [rptr,wptr]. When one of
> +the range limits is a number while the other is the dot, '.',
> +then the number indicates the relative range before or after the
> +corresponding ring pointer. For instance, "-R sdma0[16:.]"
> +prints [wptr-16, wptr] words of the SDMA0 ring, and
> +"-R sdma1[.:32]" prints [rptr, rptr+32] double-words of the
> +SDMA1 ring. The contents of the ring is always interpreted,
> +if it can be interpreted.
>   .IP "--dump-ib, -di [vmid@]address length [pm]"
>   Dump an IB packet at an address with an optional VMID.  The length is 
> specified
>   in bytes.  The type of decoder  is optional and defaults to PM4 packets.
> diff --git a/src/app/ring_read.c b/src/app/ring_read.c
> index ef0c711..9cbecb0 100644
> --- a/src/app/ring_read.c
> +++ b/src/app/ring_read.c
> @@ -28,7 +28,7 @@
>   void umr_read_ring(struct umr_asic *asic, char *ringpath)
>   {
>   char ringname[32], from[32], to[32];
> - int use_decoder, enable_decoder, gprs;
> + int  enable_decoder, gprs;
>   uint32_t wptr, rptr, drv_wptr, ringsize, start, end, value,
>*ring_data;
>   struct umr_ring_decoder decoder, *pdecoder, *ppdecoder;
> @@ -73,33 +73,4

Re: [PATCH] drm/amdgpu: guard ib scheduling while in reset

2019-10-24 Thread Grodzovsky, Andrey

On 10/24/19 7:01 AM, Christian König wrote:
Am 24.10.19 um 12:58 schrieb S, Shirish:
[Why]
Upon GPU reset, kernel cleans up already submitted jobs
via drm_sched_cleanup_jobs.
This schedules ib's via drm_sched_main()->run_job, leading to
race condition of rings being ready or not, since during reset
rings may be suspended.

NAK, exactly that's what should not happen.

The scheduler should be suspend while a GPU reset is in progress.

So you are running into a completely different race here.

Please sync up with Andrey how this was able to happen.

Regards,
Christian.


Shirish - Christian makes a good point - note that in amdgpu_device_gpu_recover 
drm_sched_stop which stop all the scheduler threads is called way before we 
suspend the HW in amdgpu_device_pre_asic_reset->amdgpu_device_ip_suspend where 
SDMA suspension is happening and where the HW ring marked as not ready - please 
provide call stack for when you hit [drm:amdgpu_job_run] *ERROR* Error 
scheduling IBs (-22) to identify the code path which tried to submit the SDMA IB

Andrey



[How]
make GPU reset's amdgpu_device_ip_resume_phase2() &
amdgpu_ib_schedule() in amdgpu_job_run() mutually exclusive.

Signed-off-by: Shirish S 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c| 2 ++
  3 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f4d9041..7b07a47b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -973,6 +973,7 @@ struct amdgpu_device {
  boolin_gpu_reset;
  enum pp_mp1_state   mp1_state;
  struct mutex  lock_reset;
+struct mutex  lock_ib_sched;
  struct amdgpu_doorbell_index doorbell_index;
int asic_reset_res;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 676cad1..63cad74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2759,6 +2759,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
  mutex_init(&adev->virt.vf_errors.lock);
  hash_init(adev->mn_hash);
  mutex_init(&adev->lock_reset);
+mutex_init(&adev->lock_ib_sched);
  mutex_init(&adev->virt.dpm_mutex);
  mutex_init(&adev->psp.mutex);
  @@ -3795,7 +3796,9 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info 
*hive,
  if (r)
  return r;
  +mutex_lock(&tmp_adev->lock_ib_sched);
  r = amdgpu_device_ip_resume_phase2(tmp_adev);
+mutex_unlock(&tmp_adev->lock_ib_sched);
  if (r)
  goto out;
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index e1bad99..cd6082d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -233,8 +233,10 @@ static struct dma_fence *amdgpu_job_run(struct 
drm_sched_job *sched_job)
  if (finished->error < 0) {
  DRM_INFO("Skip scheduling IBs!\n");
  } else {
+mutex_lock(&ring->adev->lock_ib_sched);
  r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
 &fence);
+mutex_unlock(&ring->adev->lock_ib_sched);
  if (r)
  DRM_ERROR("Error scheduling IBs (%d)\n", r);
  }

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: guard ib scheduling while in reset

2019-10-24 Thread Christian König

Am 24.10.19 um 17:06 schrieb Grodzovsky, Andrey:



On 10/24/19 7:01 AM, Christian König wrote:

Am 24.10.19 um 12:58 schrieb S, Shirish:

[Why]
Upon GPU reset, kernel cleans up already submitted jobs
via drm_sched_cleanup_jobs.
This schedules ib's via drm_sched_main()->run_job, leading to
race condition of rings being ready or not, since during reset
rings may be suspended.


NAK, exactly that's what should not happen.

The scheduler should be suspend while a GPU reset is in progress.

So you are running into a completely different race here.

Please sync up with Andrey how this was able to happen.

Regards,
Christian.



Shirish - Christian makes a good point - note that in 
amdgpu_device_gpu_recover drm_sched_stop which stop all the scheduler 
threads is called way before we suspend the HW in 
amdgpu_device_pre_asic_reset->amdgpu_device_ip_suspend where SDMA 
suspension is happening and where the HW ring marked as not ready - 
please provide call stack for when you hit [drm:amdgpu_job_run] 
*ERROR* Error scheduling IBs (-22) to identify the code path which 
tried to submit the SDMA IB




Well the most likely cause of this is that the hardware failed to resume 
after the reset.


Christian.


Andrey






[How]
make GPU reset's amdgpu_device_ip_resume_phase2() &
amdgpu_ib_schedule() in amdgpu_job_run() mutually exclusive.

Signed-off-by: Shirish S 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h    | 1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    | 2 ++
  3 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index f4d9041..7b07a47b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -973,6 +973,7 @@ struct amdgpu_device {
  bool    in_gpu_reset;
  enum pp_mp1_state   mp1_state;
  struct mutex  lock_reset;
+    struct mutex  lock_ib_sched;
  struct amdgpu_doorbell_index doorbell_index;
    int asic_reset_res;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 676cad1..63cad74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2759,6 +2759,7 @@ int amdgpu_device_init(struct amdgpu_device 
*adev,

  mutex_init(&adev->virt.vf_errors.lock);
  hash_init(adev->mn_hash);
  mutex_init(&adev->lock_reset);
+    mutex_init(&adev->lock_ib_sched);
  mutex_init(&adev->virt.dpm_mutex);
  mutex_init(&adev->psp.mutex);
  @@ -3795,7 +3796,9 @@ static int amdgpu_do_asic_reset(struct 
amdgpu_hive_info *hive,

  if (r)
  return r;
  + mutex_lock(&tmp_adev->lock_ib_sched);
  r = amdgpu_device_ip_resume_phase2(tmp_adev);
+ mutex_unlock(&tmp_adev->lock_ib_sched);
  if (r)
  goto out;
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index e1bad99..cd6082d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -233,8 +233,10 @@ static struct dma_fence *amdgpu_job_run(struct 
drm_sched_job *sched_job)

  if (finished->error < 0) {
  DRM_INFO("Skip scheduling IBs!\n");
  } else {
+    mutex_lock(&ring->adev->lock_ib_sched);
  r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
 &fence);
+    mutex_unlock(&ring->adev->lock_ib_sched);
  if (r)
  DRM_ERROR("Error scheduling IBs (%d)\n", r);
  }




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: Spontaneous reboots when using RX 560

2019-10-24 Thread Alex Deucher
On Tue, Oct 22, 2019 at 7:01 AM Sylvain Munaut <246...@gmail.com> wrote:
>
> Hi All,
>
> More testing over the last few days showed that only either the lowest
> power mode, or slightly above can work. Oh, I also tested 5.4-rc3 just
> in case but same results.
> It doesn't seem to be the affected by PCIe lane speed, Memory seems
> stable at 625M and almost at 1500M (only the sustained heavy workload
> eventually bring it down), but the SoC speed seems pretty touchy.
>
> So that would seem to confirm something is wrong either in the power
> play table itself, or its interpretation by the linux driver.
> I tried brute-loading some other RX570 pptable into it, but that
> didn't really do much. After writing it to pp_table, the card was
> stuck at its lower clock mode. Working fine, but same as if I had
> forced it to low power.
>
> Is there anyway to extract the power play table from windows since
> it's running fine there ?
> I'm kind of running out of idea of what to try next.

Can you send me a copy of the vbios from that board?

(as root)
(use lspci to get the bus id)
cd /sys/bus/pci/devices/
echo 1 > rom
cat rom > /tmp/vbios.rom
echo 0 > rom
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: Spontaneous reboots when using RX 560

2019-10-24 Thread Sylvain Munaut
Hi,

> Can you send me a copy of the vbios from that board?
>
> (as root)
> (use lspci to get the bus id)
> cd /sys/bus/pci/devices/
> echo 1 > rom
> cat rom > /tmp/vbios.rom
> echo 0 > rom

Sure, sent as private message.

Also, I got hold of a RX570 from another vendor and tested that. Works
fine, no crash even during stress tests / benchmarks.

Cheers,

Sylvain
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] Ring range argument improvements (v2)

2019-10-24 Thread Tuikov, Luben
Hi Tom,

Thanks for debugging it!

I'll reapply and resend it.

Regards,
Luben

On 2019-10-24 9:44 a.m., StDenis, Tom wrote:
> This diff fixes your patch, can you resend please?
> 
> 
> diff --git a/src/app/ring_read.c b/src/app/ring_read.c
> index 9cbecb0..c1c9187 100644
> --- a/src/app/ring_read.c
> +++ b/src/app/ring_read.c
> @@ -120,16 +120,16 @@ void umr_read_ring(struct umr_asic *asic, char 
> *ringpath)
>      asic->asicname, ringname, (unsigned long)wptr >> 2,
>      asic->asicname, ringname, (unsigned long)drv_wptr >> 2);
> 
> +   if (enable_decoder) {
> +   decoder.pm4.cur_opcode = 0x;
> +   decoder.sdma.cur_opcode = 0x;
> +   }
>      do {
>      value = ring_data[(start+12)>>2];
>      printf("%s.%s.ring[%s%4lu%s] == %s0x%08lx%s   ",
>      asic->asicname, ringname,
>      BLUE, (unsigned long)start >> 2, RST,
>      YELLOW, (unsigned long)value, RST);
> -   if (enable_decoder) {
> -   decoder.pm4.cur_opcode = 0x;
> -   decoder.sdma.cur_opcode = 0x;
> -   }
>      printf(" %c%c%c ",
>      (start == rptr) ? 'r' : '.',
>      (start == wptr) ? 'w' : '.',
> 
> On 2019-10-23 5:11 p.m., Tuikov, Luben wrote:
>> The valid contents of rings is invariably the
>> range [rptr, wptr). Augment the ring range to
>> interpret the '.' ("here") notation to mean rptr
>> or wptr when given on the left or right of the
>> range limits. This augments the range notation as
>> follows:
>>
>> 1) No range given, print the whole ring.
>>
>> 2) [.] or [.:.], print [rptr, wptr],
>>
>> 3) [.:k], k >= 0, print [rptr, rptr + k], this is
>> a range relative to the left limit, rptr, the
>> consumer pointer.
>>
>> 4) [k:.] or [k], k >= 0, print [wptr - k, wptr], this is
>> a range relative to the right limit, wptr, the
>> producer pointer.
>>
>> 5) [k:r], both greater than 0, print [k,r] of the
>> named ring. This is an absolute range limit
>> notation.
>>
>> In any case, the ring contents is interpreted, if
>> the ring contents can be interpreted.
>>
>> v2: Fix spelling mistake in the commit message:
>> "then" --> "than".
>>
>> Signed-off-by: Luben Tuikov 
>> ---
>>   doc/umr.1   | 33 +++-
>>   src/app/ring_read.c | 52 -
>>   2 files changed, 55 insertions(+), 30 deletions(-)
>>
>> diff --git a/doc/umr.1 b/doc/umr.1
>> index 1e585fa..137ff1e 100644
>> --- a/doc/umr.1
>> +++ b/doc/umr.1
>> @@ -216,17 +216,30 @@ Disassemble 'size' bytes (in hex) from a given address 
>> (in hex).  The size can b
>>   specified as zero to have umr try and compute the shader size.
>>   
>>   .SH Ring and PM4 Decoding
>> -.IP "--ring, -R (from:to)"
>> -Read the contents of a ring named by the string without the
>> -.B amdgpu_ring_
>> -prefix.  By default it will read and display the entire ring.  A
>> -starting and ending address can be specified in decimal or a '.' can
>> -be used to indicate relative to the current
>> +.IP "--ring, -R [range]"
>> +Read the contents of the ring named by the string
>> +.B amdgpu_ring_,
>> +i.e. without the
>> +.B amdgpu_ring
>> +prefix. By default it reads and prints the entire ring.  A
>> +range is optional and has the format '[start:end]'. The
>> +starting and ending address are non-negative integers or
>> +the '.' (dot) symbol, which indicates the
>> +.B rptr
>> +when on the left side and
>>   .B wptr
>> -pointer.  For example, "-R gfx" would read the entire gfx ring,
>> -"-R gfx[0:16]" would display the contents from 0 to 16 inclusively, and
>> -"-R gfx[.]" or "-R gfx[.:.]" would display the last 32 words relative
>> -to rptr.
>> +when on the right side of the range.
>> +For instance,
>> +"-R gfx" prints the entire gfx ring, "-R gfx[0:16]" prints
>> +the contents from 0 to 16 inclusively, and "-R gfx[.]" or
>> +"-R gfx[.:.]" prints the range [rptr,wptr]. When one of
>> +the range limits is a number while the other is the dot, '.',
>> +then the number indicates the relative range before or after the
>> +corresponding ring pointer. For instance, "-R sdma0[16:.]"
>> +prints [wptr-16, wptr] words of the SDMA0 ring, and
>> +"-R sdma1[.:32]" prints [rptr, rptr+32] double-words of the
>> +SDMA1 ring. The contents of the ring is always interpreted,
>> +if it can be interpreted.
>>   .IP "--dump-ib, -di [vmid@]address length [pm]"
>>   Dump an IB packet at an address with an optional VMID.  The length is 
>> specified
>>   in bytes.  The type of decoder  is optional and defaults to PM4 
>> packets.
>> diff --git a/src/app/ring_read.c b/src/app/ring_read.c
>> index ef0c711..9cbecb0 100644
>> --- a/src/app/ring_read.c
>> +++ b/src/app/ring_read.c
>> @@ -28,7 +28,7 @@
>>   void umr_read_ring(struct umr_asic *asic, char *ringpath)
>>   {
>>  char ringname[32]

[PATCH] Ring range argument improvements (v3)

2019-10-24 Thread Tuikov, Luben
The valid contents of rings is invariably the
range [rptr, wptr). Augment the ring range to
interpret the '.' ("here") notation to mean rptr
or wptr when given on the left or right of the
range limits. This augments the range notation as
follows:

1) No range given, print the whole ring.

2) [.] or [.:.], print [rptr, wptr],

3) [.:k], k >= 0, print [rptr, rptr + k], this is
a range relative to the left limit, rptr, the
consumer pointer.

4) [k:.] or [k], k >= 0, print [wptr - k, wptr], this is
a range relative to the right limit, wptr, the
producer pointer.

5) [k:r], both greater than 0, print [k,r] of the
named ring. This is an absolute range limit
notation.

In any case, the ring contents is interpreted, if
the ring contents can be interpreted.

v2: Fix spelling mistake in the commit message:
"then" --> "than".
v3: Fix decoding.

Signed-off-by: Luben Tuikov 
---
 doc/umr.1   | 33 +
 src/app/ring_read.c | 59 +++--
 2 files changed, 59 insertions(+), 33 deletions(-)

diff --git a/doc/umr.1 b/doc/umr.1
index 1e585fa..137ff1e 100644
--- a/doc/umr.1
+++ b/doc/umr.1
@@ -216,17 +216,30 @@ Disassemble 'size' bytes (in hex) from a given address 
(in hex).  The size can b
 specified as zero to have umr try and compute the shader size.
 
 .SH Ring and PM4 Decoding
-.IP "--ring, -R (from:to)"
-Read the contents of a ring named by the string without the
-.B amdgpu_ring_
-prefix.  By default it will read and display the entire ring.  A
-starting and ending address can be specified in decimal or a '.' can
-be used to indicate relative to the current
+.IP "--ring, -R [range]"
+Read the contents of the ring named by the string
+.B amdgpu_ring_,
+i.e. without the
+.B amdgpu_ring
+prefix. By default it reads and prints the entire ring.  A
+range is optional and has the format '[start:end]'. The
+starting and ending address are non-negative integers or
+the '.' (dot) symbol, which indicates the
+.B rptr
+when on the left side and
 .B wptr
-pointer.  For example, "-R gfx" would read the entire gfx ring,
-"-R gfx[0:16]" would display the contents from 0 to 16 inclusively, and
-"-R gfx[.]" or "-R gfx[.:.]" would display the last 32 words relative
-to rptr.
+when on the right side of the range.
+For instance,
+"-R gfx" prints the entire gfx ring, "-R gfx[0:16]" prints
+the contents from 0 to 16 inclusively, and "-R gfx[.]" or
+"-R gfx[.:.]" prints the range [rptr,wptr]. When one of
+the range limits is a number while the other is the dot, '.',
+then the number indicates the relative range before or after the
+corresponding ring pointer. For instance, "-R sdma0[16:.]"
+prints [wptr-16, wptr] words of the SDMA0 ring, and
+"-R sdma1[.:32]" prints [rptr, rptr+32] double-words of the
+SDMA1 ring. The contents of the ring is always interpreted,
+if it can be interpreted.
 .IP "--dump-ib, -di [vmid@]address length [pm]"
 Dump an IB packet at an address with an optional VMID.  The length is specified
 in bytes.  The type of decoder  is optional and defaults to PM4 packets.
diff --git a/src/app/ring_read.c b/src/app/ring_read.c
index ef0c711..b995188 100644
--- a/src/app/ring_read.c
+++ b/src/app/ring_read.c
@@ -28,7 +28,7 @@
 void umr_read_ring(struct umr_asic *asic, char *ringpath)
 {
char ringname[32], from[32], to[32];
-   int use_decoder, enable_decoder, gprs;
+   int  enable_decoder, gprs;
uint32_t wptr, rptr, drv_wptr, ringsize, start, end, value,
 *ring_data;
struct umr_ring_decoder decoder, *pdecoder, *ppdecoder;
@@ -73,33 +73,46 @@ void umr_read_ring(struct umr_asic *asic, char *ringpath)
drv_wptr = ring_data[2]<<2;
 
/* default to reading entire ring */
-   use_decoder = 0;
if (!from[0]) {
start = 0;
end   = ringsize-4;
} else {
-   if (from[0] == '.' || !to[0] || to[0] == '.') {
-   /* start from 32 words prior to rptr up to wptr */
-   end = wptr;
-   if (rptr < (31*4)) {
-   start = rptr - 31*4;
-   start += ringsize;
+   if (from[0] == '.') {
+   if (to[0] == 0 || to[0] == '.') {
+   /* Notation: [.] or [.:.], meaning
+* [rptr, wptr].
+*/
+   start = rptr;
+   end = wptr;
} else {
-   start = rptr - 31*4;
+   /* Notation: [.:k], k >=0, meaning
+* [rptr, rtpr+k] double-words.
+*/
+   start = rptr;
+   sscanf(to, "%"SCNu32, &end);
+   end *= 4;
+   end = (start + end + ringsize) % rin

Re: [PATCH] Ring range argument improvements (v3)

2019-10-24 Thread StDenis, Tom
Thanks, pushed out.


Cheers,

Tom

On 2019-10-24 2:27 p.m., Tuikov, Luben wrote:
> The valid contents of rings is invariably the
> range [rptr, wptr). Augment the ring range to
> interpret the '.' ("here") notation to mean rptr
> or wptr when given on the left or right of the
> range limits. This augments the range notation as
> follows:
>
> 1) No range given, print the whole ring.
>
> 2) [.] or [.:.], print [rptr, wptr],
>
> 3) [.:k], k >= 0, print [rptr, rptr + k], this is
> a range relative to the left limit, rptr, the
> consumer pointer.
>
> 4) [k:.] or [k], k >= 0, print [wptr - k, wptr], this is
> a range relative to the right limit, wptr, the
> producer pointer.
>
> 5) [k:r], both greater than 0, print [k,r] of the
> named ring. This is an absolute range limit
> notation.
>
> In any case, the ring contents is interpreted, if
> the ring contents can be interpreted.
>
> v2: Fix spelling mistake in the commit message:
> "then" --> "than".
> v3: Fix decoding.
>
> Signed-off-by: Luben Tuikov 
> ---
>   doc/umr.1   | 33 +
>   src/app/ring_read.c | 59 +++--
>   2 files changed, 59 insertions(+), 33 deletions(-)
>
> diff --git a/doc/umr.1 b/doc/umr.1
> index 1e585fa..137ff1e 100644
> --- a/doc/umr.1
> +++ b/doc/umr.1
> @@ -216,17 +216,30 @@ Disassemble 'size' bytes (in hex) from a given address 
> (in hex).  The size can b
>   specified as zero to have umr try and compute the shader size.
>   
>   .SH Ring and PM4 Decoding
> -.IP "--ring, -R (from:to)"
> -Read the contents of a ring named by the string without the
> -.B amdgpu_ring_
> -prefix.  By default it will read and display the entire ring.  A
> -starting and ending address can be specified in decimal or a '.' can
> -be used to indicate relative to the current
> +.IP "--ring, -R [range]"
> +Read the contents of the ring named by the string
> +.B amdgpu_ring_,
> +i.e. without the
> +.B amdgpu_ring
> +prefix. By default it reads and prints the entire ring.  A
> +range is optional and has the format '[start:end]'. The
> +starting and ending address are non-negative integers or
> +the '.' (dot) symbol, which indicates the
> +.B rptr
> +when on the left side and
>   .B wptr
> -pointer.  For example, "-R gfx" would read the entire gfx ring,
> -"-R gfx[0:16]" would display the contents from 0 to 16 inclusively, and
> -"-R gfx[.]" or "-R gfx[.:.]" would display the last 32 words relative
> -to rptr.
> +when on the right side of the range.
> +For instance,
> +"-R gfx" prints the entire gfx ring, "-R gfx[0:16]" prints
> +the contents from 0 to 16 inclusively, and "-R gfx[.]" or
> +"-R gfx[.:.]" prints the range [rptr,wptr]. When one of
> +the range limits is a number while the other is the dot, '.',
> +then the number indicates the relative range before or after the
> +corresponding ring pointer. For instance, "-R sdma0[16:.]"
> +prints [wptr-16, wptr] words of the SDMA0 ring, and
> +"-R sdma1[.:32]" prints [rptr, rptr+32] double-words of the
> +SDMA1 ring. The contents of the ring is always interpreted,
> +if it can be interpreted.
>   .IP "--dump-ib, -di [vmid@]address length [pm]"
>   Dump an IB packet at an address with an optional VMID.  The length is 
> specified
>   in bytes.  The type of decoder  is optional and defaults to PM4 packets.
> diff --git a/src/app/ring_read.c b/src/app/ring_read.c
> index ef0c711..b995188 100644
> --- a/src/app/ring_read.c
> +++ b/src/app/ring_read.c
> @@ -28,7 +28,7 @@
>   void umr_read_ring(struct umr_asic *asic, char *ringpath)
>   {
>   char ringname[32], from[32], to[32];
> - int use_decoder, enable_decoder, gprs;
> + int  enable_decoder, gprs;
>   uint32_t wptr, rptr, drv_wptr, ringsize, start, end, value,
>*ring_data;
>   struct umr_ring_decoder decoder, *pdecoder, *ppdecoder;
> @@ -73,33 +73,46 @@ void umr_read_ring(struct umr_asic *asic, char *ringpath)
>   drv_wptr = ring_data[2]<<2;
>   
>   /* default to reading entire ring */
> - use_decoder = 0;
>   if (!from[0]) {
>   start = 0;
>   end   = ringsize-4;
>   } else {
> - if (from[0] == '.' || !to[0] || to[0] == '.') {
> - /* start from 32 words prior to rptr up to wptr */
> - end = wptr;
> - if (rptr < (31*4)) {
> - start = rptr - 31*4;
> - start += ringsize;
> + if (from[0] == '.') {
> + if (to[0] == 0 || to[0] == '.') {
> + /* Notation: [.] or [.:.], meaning
> +  * [rptr, wptr].
> +  */
> + start = rptr;
> + end = wptr;
>   } else {
> - start = rptr - 31*4;
> + /* Notation: [.:k], k >=0, meaning
> +  * [rptr, rtpr

[PATCH] drm/amdkfd: bug fix for out of bounds mem on gpu cache filling info

2019-10-24 Thread Sierra Guiza, Alejandro (Alex)
The bitmap in cu_info structure is defined as a 4x4 size array. In
Acturus, this matrix is initialized as a 4x2. Based on the 8 shaders.
In the gpu cache filling initialization, the access to the bitmap matrix
was done as an 8x1 instead of 4x2. Causing an out of bounds memory
access error.
Due to this, the number of GPU cache entries was inconsistent.

Signed-off-by: Alex Sierra 
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 0c327e0fc0f7..de9f68d5c312 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -710,7 +710,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
pcache_info,
cu_info,
mem_available,
-   cu_info->cu_bitmap[i][j],
+   cu_info->cu_bitmap[i % 4][j + i 
/ 4],
ct,
cu_processor_id,
k);
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdkfd: bug fix for out of bounds mem on gpu cache filling info

2019-10-24 Thread Kuehling, Felix
On 2019-10-24 14:46, Sierra Guiza, Alejandro (Alex) wrote:
> The bitmap in cu_info structure is defined as a 4x4 size array. In
> Acturus, this matrix is initialized as a 4x2. Based on the 8 shaders.
> In the gpu cache filling initialization, the access to the bitmap matrix
> was done as an 8x1 instead of 4x2. Causing an out of bounds memory
> access error.
> Due to this, the number of GPU cache entries was inconsistent.
>
> Signed-off-by: Alex Sierra 

Reviewed-by: Felix Kuehling 


> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> index 0c327e0fc0f7..de9f68d5c312 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> @@ -710,7 +710,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
>   pcache_info,
>   cu_info,
>   mem_available,
> - cu_info->cu_bitmap[i][j],
> + cu_info->cu_bitmap[i % 4][j + i 
> / 4],
>   ct,
>   cu_processor_id,
>   k);
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/sched: Set error to s_fence if HW job submission failed.

2019-10-24 Thread Andrey Grodzovsky
Problem:
When run_job fails and HW fence returned is NULL we still signal
the s_fence to avoid hangs but the user has no way of knowing if
the actual HW job was ran and finished.

Fix:
Allow .run_job implementations to return ERR_PTR in the fence pointer
returned and then set this error for s_fence->finished fence so whoever
wait on this fence can inspect the signaled fence for an error.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/scheduler/sched_main.c | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 9a0ee74..f39b97e 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -479,6 +479,7 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler 
*sched)
struct drm_sched_job *s_job, *tmp;
uint64_t guilty_context;
bool found_guilty = false;
+   struct dma_fence *fence;
 
list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
struct drm_sched_fence *s_fence = s_job->s_fence;
@@ -492,7 +493,16 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler 
*sched)
dma_fence_set_error(&s_fence->finished, -ECANCELED);
 
dma_fence_put(s_job->s_fence->parent);
-   s_job->s_fence->parent = sched->ops->run_job(s_job);
+   fence = sched->ops->run_job(s_job);
+
+   if (IS_ERR_OR_NULL(fence)) {
+   s_job->s_fence->parent = NULL;
+   dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
+   } else {
+   s_job->s_fence->parent = fence;
+   }
+
+
}
 }
 EXPORT_SYMBOL(drm_sched_resubmit_jobs);
@@ -720,7 +730,7 @@ static int drm_sched_main(void *param)
fence = sched->ops->run_job(sched_job);
drm_sched_fence_scheduled(s_fence);
 
-   if (fence) {
+   if (!IS_ERR_OR_NULL(fence)) {
s_fence->parent = dma_fence_get(fence);
r = dma_fence_add_callback(fence, &sched_job->cb,
   drm_sched_process_job);
@@ -730,8 +740,11 @@ static int drm_sched_main(void *param)
DRM_ERROR("fence add callback failed (%d)\n",
  r);
dma_fence_put(fence);
-   } else
+   } else {
+
+   dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
drm_sched_process_job(NULL, &sched_job->cb);
+   }
 
wake_up(&sched->job_scheduled);
}
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amdgpu: If amdgpu_ib_schedule fails return back the error.

2019-10-24 Thread Andrey Grodzovsky
Use ERR_PTR to return back the error happened during amdgpu_ib_schedule.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index e1bad99..9769bb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -243,6 +243,8 @@ static struct dma_fence *amdgpu_job_run(struct 
drm_sched_job *sched_job)
job->fence = dma_fence_get(fence);
 
amdgpu_job_free_resources(job);
+
+   fence = r ? ERR_PTR(r) : fence;
return fence;
 }
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH v5] drm/amd/display: Add MST atomic routines

2019-10-24 Thread mikita.lipski
From: Mikita Lipski 

- Adding encoder atomic check to find vcpi slots for a connector
- Using DRM helper functions to calculate PBN
- Adding connector atomic check to release vcpi slots if connector
loses CRTC
- Calculate  PBN and VCPI slots only once during atomic
check and store them on crtc_state to eliminate
redundant calculation
- Call drm_dp_mst_atomic_check to verify validity of MST topology
during state atomic check

v2: squashed previous 3 separate patches, removed DSC PBN calculation,
and added PBN and VCPI slots properties to amdgpu connector

v3:
- moved vcpi_slots and pbn properties to dm_crtc_state and dc_stream_state
- updates stream's vcpi_slots and pbn on commit
- separated patch from the DSC MST series

v4:
- set vcpi_slots and pbn properties to dm_connector_state
- copy porperties from connector state on to crtc state

v5:
- keep the pbn and vcpi values only on connnector state
- added a void pointer to the stream state instead on two ints,
because dc_stream_state is OS agnostic. Pointer points to the
current dm_connector_state.

Cc: Jun Lei 
Cc: Jerry Zuo 
Cc: Harry Wentland 
Cc: Nicholas Kazlauskas 
Cc: Lyude Paul 
Signed-off-by: Mikita Lipski 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 46 ++-
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  2 +
 .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 44 ++
 .../display/amdgpu_dm/amdgpu_dm_mst_types.c   | 32 +
 drivers/gpu/drm/amd/display/dc/dc_stream.h|  1 +
 5 files changed, 84 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 48f5b43e2698..1d8d7aaba197 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -3747,6 +3747,7 @@ create_stream_for_sink(struct amdgpu_dm_connector 
*aconnector,
}
 
stream->dm_stream_context = aconnector;
+   stream->dm_stream_state = dm_state;
 
stream->timing.flags.LTE_340MCSC_SCRAMBLE =
drm_connector->display_info.hdmi.scdc.scrambling.low_rates;
@@ -4180,7 +4181,8 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector 
*connector)
state->underscan_hborder = 0;
state->underscan_vborder = 0;
state->base.max_requested_bpc = 8;
-
+   state->vcpi_slots = 0;
+   state->pbn = 0;
if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
state->abm_level = amdgpu_dm_abm_level;
 
@@ -4209,7 +4211,8 @@ amdgpu_dm_connector_atomic_duplicate_state(struct 
drm_connector *connector)
new_state->underscan_enable = state->underscan_enable;
new_state->underscan_hborder = state->underscan_hborder;
new_state->underscan_vborder = state->underscan_vborder;
-
+   new_state->vcpi_slots = state->vcpi_slots;
+   new_state->pbn = state->pbn;
return &new_state->base;
 }
 
@@ -4610,6 +4613,37 @@ static int dm_encoder_helper_atomic_check(struct 
drm_encoder *encoder,
  struct drm_crtc_state *crtc_state,
  struct drm_connector_state 
*conn_state)
 {
+   struct drm_atomic_state *state = crtc_state->state;
+   struct drm_connector *connector = conn_state->connector;
+   struct amdgpu_dm_connector *aconnector = 
to_amdgpu_dm_connector(connector);
+   struct dm_connector_state *dm_new_connector_state = 
to_dm_connector_state(conn_state);
+   const struct drm_display_mode *adjusted_mode = 
&crtc_state->adjusted_mode;
+   struct drm_dp_mst_topology_mgr *mst_mgr;
+   struct drm_dp_mst_port *mst_port;
+   int clock, bpp = 0;
+
+   if (!aconnector->port || !aconnector->dc_sink)
+   return 0;
+
+   mst_port = aconnector->port;
+   mst_mgr = &aconnector->mst_port->mst_mgr;
+
+   if (!crtc_state->connectors_changed && !crtc_state->mode_changed)
+   return 0;
+
+   if (!state->duplicated) {
+   bpp = (uint8_t)connector->display_info.bpc * 3;
+   clock = adjusted_mode->clock;
+   dm_new_connector_state->pbn = drm_dp_calc_pbn_mode(clock, bpp);
+   }
+   dm_new_connector_state->vcpi_slots = 
drm_dp_atomic_find_vcpi_slots(state,
+  mst_mgr,
+  mst_port,
+  
dm_new_connector_state->pbn);
+   if (dm_new_connector_state->vcpi_slots < 0) {
+   DRM_DEBUG_ATOMIC("failed finding vcpi slots: %d\n", 
dm_new_connector_state->vcpi_slots);
+   return dm_new_connector_state->vcpi_slots;
+   }
return 0;
 }
 
@@ -6598,6 +6632,8 @@ static void amdgpu_dm_atomic_commit_tail(struct 
drm_atomic_state *state)
hdr_changed =
   

[PATCH] drm/amdkfd: Delete duplicated queue bit map reservation

2019-10-24 Thread Zhao, Yong
The KIQ is on the second MEC and its reservation is covered in the
latter logic, so no need to reserve its bit twice.

Change-Id: Ieee390953a60c7d43de5a9aec38803f1f583a4a9
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 8c531793fe17..d3da9dde4ee1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -130,14 +130,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
  adev->gfx.mec.queue_bitmap,
  KGD_MAX_QUEUES);
 
-   /* remove the KIQ bit as well */
-   if (adev->gfx.kiq.ring.sched.ready)
-   clear_bit(amdgpu_gfx_mec_queue_to_bit(adev,
- adev->gfx.kiq.ring.me 
- 1,
- 
adev->gfx.kiq.ring.pipe,
- 
adev->gfx.kiq.ring.queue),
- gpu_resources.queue_bitmap);
-
/* According to linux/bitmap.h we shouldn't use bitmap_clear if
 * nbits is not compile time constant
 */
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: GFX9, GFX10: GRBM requires 1-cycle delay

2019-10-24 Thread Tuikov, Luben
The GRBM interface is now capable of bursting
1-cycle op per register, a WRITE followed by
another WRITE, or a WRITE followed by a READ--much
faster than previous muti-cycle per
completed-transaction interface. This causes a
problem, whereby status registers requiring a
read/write by hardware, have a 1-cycle delay, due
to the register update having to go through GRBM
interface.

This patch adds this delay.

A one cycle read op is added after updating the
invalidate request and before reading the
invalidate-ACK status.

See also commit
534991731cb5fa94b5519957646cf849ca10d17d.

Signed-off-by: Luben Tuikov 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 4 ++--
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 9 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 8 
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +-
 5 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index ac43b1af69e3..0042868dbd53 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -5129,7 +5129,7 @@ static const struct amdgpu_ring_funcs 
gfx_v10_0_ring_funcs_gfx = {
5 + /* COND_EXEC */
7 + /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
-   SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+   SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
2 + /* VM_FLUSH */
8 + /* FENCE for VM_FLUSH */
20 + /* GDS switch */
@@ -5182,7 +5182,7 @@ static const struct amdgpu_ring_funcs 
gfx_v10_0_ring_funcs_compute = {
5 + /* hdp invalidate */
7 + /* gfx_v10_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
-   SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+   SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
2 + /* gfx_v10_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm 
fence */
.emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 9fe95e7693d5..9a7a717208de 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -6218,7 +6218,7 @@ static const struct amdgpu_ring_funcs 
gfx_v9_0_ring_funcs_gfx = {
5 +  /* COND_EXEC */
7 +  /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
-   SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+   SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
2 + /* VM_FLUSH */
8 +  /* FENCE for VM_FLUSH */
20 + /* GDS switch */
@@ -6271,7 +6271,7 @@ static const struct amdgpu_ring_funcs 
gfx_v9_0_ring_funcs_compute = {
5 + /* hdp invalidate */
7 + /* gfx_v9_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
-   SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+   SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
2 + /* gfx_v9_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm 
fence */
.emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 6e1b25bd1fe7..100d526e9a42 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -346,6 +346,15 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct 
amdgpu_ring *ring,
 
amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
 
+   /* Insert a dummy read to delay one cycle before the ACK
+* inquiry.
+*/
+   if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA ||
+   ring->funcs->type == AMDGPU_RING_TYPE_GFX  ||
+   ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
+   amdgpu_ring_emit_reg_wait(ring,
+ hub->vm_inv_eng0_req + eng, 0, 0);
+
/* wait for the invalidate to complete */
amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
  1 << vmid, 1 << vmid);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 9f2a893871ec..8f3097e45299 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -495,6 +495,14 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct 
amdgpu_ring *ring,
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
  upper_32_bits(pd_addr));
 
+   /* Insert a dummy read to delay one cycle before the ACK
+* inquiry.
+*/
+   if (ring->funcs->type == AMDGPU_RING_TYPE_GFX  ||
+   ring->funcs->type == AMDGPU_RI

Re: [PATCH] Cleanup: replace prefered with preferred

2019-10-24 Thread kbuild test robot
Hi Mark,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[cannot apply to v5.4-rc4 next-20191024]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:
https://github.com/0day-ci/linux/commits/Mark-Salyzyn/Cleanup-replace-prefered-with-preferred/20191024-084105
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
13b86bc4cd648eae69fdcf3d04b2750c76350053
config: x86_64-randconfig-a004-201942 (attached as .config)
compiler: gcc-4.9 (Debian 4.9.2-10+deb8u1) 4.9.2
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot 

All errors (new ones prefixed by >>):

   In file included from include/linux/sysctl.h:30:0,
from include/linux/key.h:17,
from include/linux/cred.h:13,
from include/linux/seq_file.h:12,
from include/linux/cgroup.h:18,
from include/linux/memcontrol.h:13,
from include/linux/swap.h:9,
from include/linux/suspend.h:5,
from arch/x86/kernel/asm-offsets.c:13:
>> include/uapi/linux/sysctl.h:561:29: error: expected ',' or '}' before 
>> '__attribute__'
 NET_IPV6_TEMP_PREFERED_LFT __attribute__((deprecated)) = /* NOTYPO */
^
   make[2]: *** [arch/x86/kernel/asm-offsets.s] Error 1
   make[2]: Target '__build' not remade because of errors.
   make[1]: *** [prepare0] Error 2
   make[1]: Target 'prepare' not remade because of errors.
   make: *** [sub-make] Error 2
   29 real  5 user  4 sys  35.37% cpu   make prepare

vim +561 include/uapi/linux/sysctl.h

   546  
   547  enum {
   548  NET_IPV6_FORWARDING=1,
   549  NET_IPV6_HOP_LIMIT=2,
   550  NET_IPV6_MTU=3,
   551  NET_IPV6_ACCEPT_RA=4,
   552  NET_IPV6_ACCEPT_REDIRECTS=5,
   553  NET_IPV6_AUTOCONF=6,
   554  NET_IPV6_DAD_TRANSMITS=7,
   555  NET_IPV6_RTR_SOLICITS=8,
   556  NET_IPV6_RTR_SOLICIT_INTERVAL=9,
   557  NET_IPV6_RTR_SOLICIT_DELAY=10,
   558  NET_IPV6_USE_TEMPADDR=11,
   559  NET_IPV6_TEMP_VALID_LFT=12,
   560  NET_IPV6_TEMP_PREFERRED_LFT = 13,
 > 561  NET_IPV6_TEMP_PREFERED_LFT __attribute__((deprecated)) = /* 
 > NOTYPO */
   562  NET_IPV6_TEMP_PREFERRED_LFT,
   563  NET_IPV6_REGEN_MAX_RETRY=14,
   564  NET_IPV6_MAX_DESYNC_FACTOR=15,
   565  NET_IPV6_MAX_ADDRESSES=16,
   566  NET_IPV6_FORCE_MLD_VERSION=17,
   567  NET_IPV6_ACCEPT_RA_DEFRTR=18,
   568  NET_IPV6_ACCEPT_RA_PINFO=19,
   569  NET_IPV6_ACCEPT_RA_RTR_PREF=20,
   570  NET_IPV6_RTR_PROBE_INTERVAL=21,
   571  NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN=22,
   572  NET_IPV6_PROXY_NDP=23,
   573  NET_IPV6_ACCEPT_SOURCE_ROUTE=25,
   574  NET_IPV6_ACCEPT_RA_FROM_LOCAL=26,
   575  NET_IPV6_ACCEPT_RA_RT_INFO_MIN_PLEN=27,
   576  __NET_IPV6_MAX
   577  };
   578  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: simplify padding calculations

2019-10-24 Thread Tuikov, Luben
Simplify padding calculations.

Signed-off-by: Luben Tuikov 
---
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c  |  4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 14 +-
 5 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index c45304f1047c..1ea9e18d6f08 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -228,7 +228,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
u32 extra_bits = vmid & 0xf;
 
/* IB packet must end on a 8 DW boundary */
-   cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 
8);
+   cik_sdma_ring_insert_nop(ring, (4-lower_32_bits(ring->wptr)) & 7);
 
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, 
extra_bits));
amdgpu_ring_write(ring, ib->gpu_addr & 0xffe0); /* base must be 32 
byte aligned */
@@ -811,7 +811,7 @@ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, 
struct amdgpu_ib *ib)
u32 pad_count;
int i;
 
-   pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+   pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index a10175838013..d340f179401a 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -255,7 +255,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 
/* IB packet must end on a 8 DW boundary */
-   sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) 
% 8);
+   sdma_v2_4_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
 
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -750,7 +750,7 @@ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, 
struct amdgpu_ib *ib
u32 pad_count;
int i;
 
-   pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+   pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 5f4e2c616241..5c3c310188b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -429,7 +429,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 
/* IB packet must end on a 8 DW boundary */
-   sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) 
% 8);
+   sdma_v3_0_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
 
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -1021,7 +1021,7 @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring 
*ring, struct amdgpu_ib *ib
u32 pad_count;
int i;
 
-   pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+   pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 45bd538ba97e..7c71c88e38a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -698,7 +698,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 
/* IB packet must end on a 8 DW boundary */
-   sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) 
% 8);
+   sdma_v4_0_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
 
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -1580,7 +1580,7 @@ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring 
*ring, struct amdgpu_ib *ib
u32 pad_count;
int i;
 
-   pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+   pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 0c41b4fdc58b..67ede9e4df01 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gp

RE: [PATCH] drm/amd/powerplay: modify the parameters of SMU_MSG_PowerUpVcn to 0

2019-10-24 Thread Quan, Evan
Acked-by: Evan Quan 

-Original Message-
From: amd-gfx  On Behalf Of Liu, Aaron
Sent: Thursday, October 24, 2019 5:13 PM
To: Gong, Curry ; amd-gfx@lists.freedesktop.org
Cc: Gong, Curry 
Subject: RE: [PATCH] drm/amd/powerplay: modify the parameters of 
SMU_MSG_PowerUpVcn to 0

Reviewed-by: Aaron Liu 

BR,
Aaron Liu

> -Original Message-
> From: amd-gfx  On Behalf Of 
> chen gong
> Sent: Thursday, October 24, 2019 4:59 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Gong, Curry 
> Subject: [PATCH] drm/amd/powerplay: modify the parameters of 
> SMU_MSG_PowerUpVcn to 0
> 
> The parameters what SMU_MSG_PowerUpVcn need is 0, not 1
> 
> Signed-off-by: chen gong 
> ---
>  drivers/gpu/drm/amd/powerplay/renoir_ppt.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
> b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
> index 45c5f54..4a97519 100644
> --- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
> +++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
> @@ -282,7 +282,7 @@ static int renoir_dpm_set_uvd_enable(struct 
> smu_context *smu, bool enable)
>   if (enable) {
>   /* vcn dpm on is a prerequisite for vcn power gate messages */
>   if (smu_feature_is_enabled(smu,
> SMU_FEATURE_VCN_PG_BIT)) {
> - ret = smu_send_smc_msg_with_param(smu,
> SMU_MSG_PowerUpVcn, 1);
> + ret = smu_send_smc_msg_with_param(smu,
> SMU_MSG_PowerUpVcn, 0);
>   if (ret)
>   return ret;
>   }
> --
> 2.7.4
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH] drm/amdgpu: GFX9, GFX10: GRBM requires 1-cycle delay

2019-10-24 Thread Zhu, Changfeng
Inline.


-Original Message-
From: amd-gfx  On Behalf Of Tuikov, Luben
Sent: Friday, October 25, 2019 5:17 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Pelloux-prayer, Pierre-eric 
; Tuikov, Luben ; 
Koenig, Christian 
Subject: [PATCH] drm/amdgpu: GFX9, GFX10: GRBM requires 1-cycle delay

The GRBM interface is now capable of bursting 1-cycle op per register, a WRITE 
followed by another WRITE, or a WRITE followed by a READ--much faster than 
previous muti-cycle per completed-transaction interface. This causes a problem, 
whereby status registers requiring a read/write by hardware, have a 1-cycle 
delay, due to the register update having to go through GRBM interface.

This patch adds this delay.

A one cycle read op is added after updating the invalidate request and before 
reading the invalidate-ACK status.

See also commit
534991731cb5fa94b5519957646cf849ca10d17d.

Signed-off-by: Luben Tuikov 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 ++--  
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 4 ++--  
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 9 +  
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 8   
drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +-
 5 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index ac43b1af69e3..0042868dbd53 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -5129,7 +5129,7 @@ static const struct amdgpu_ring_funcs 
gfx_v10_0_ring_funcs_gfx = {
5 + /* COND_EXEC */
7 + /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
-   SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+   SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
2 + /* VM_FLUSH */
8 + /* FENCE for VM_FLUSH */
20 + /* GDS switch */
@@ -5182,7 +5182,7 @@ static const struct amdgpu_ring_funcs 
gfx_v10_0_ring_funcs_compute = {
5 + /* hdp invalidate */
7 + /* gfx_v10_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
-   SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+   SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
2 + /* gfx_v10_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm 
fence */
.emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 9fe95e7693d5..9a7a717208de 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -6218,7 +6218,7 @@ static const struct amdgpu_ring_funcs 
gfx_v9_0_ring_funcs_gfx = {
5 +  /* COND_EXEC */
7 +  /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
-   SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+   SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
2 + /* VM_FLUSH */
8 +  /* FENCE for VM_FLUSH */
20 + /* GDS switch */
@@ -6271,7 +6271,7 @@ static const struct amdgpu_ring_funcs 
gfx_v9_0_ring_funcs_compute = {
5 + /* hdp invalidate */
7 + /* gfx_v9_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
-   SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+   SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
2 + /* gfx_v9_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm 
fence */
.emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 6e1b25bd1fe7..100d526e9a42 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -346,6 +346,15 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct 
amdgpu_ring *ring,
 
amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
 
+   /* Insert a dummy read to delay one cycle before the ACK
+* inquiry.
+*/
+   if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA ||
+   ring->funcs->type == AMDGPU_RING_TYPE_GFX  ||
+   ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
+   amdgpu_ring_emit_reg_wait(ring,
+ hub->vm_inv_eng0_req + eng, 0, 0);
+
/* wait for the invalidate to complete */
amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
  1 << vmid, 1 << vmid);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 9f2a893871ec..8f3097e45299 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -495,6 +495,14 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct 
amdgpu_ring *ring,

Re: [PATCH] drm/amdgpu: GFX9, GFX10: GRBM requires 1-cycle delay

2019-10-24 Thread Koenig, Christian
Am 24.10.19 um 23:16 schrieb Tuikov, Luben:
> The GRBM interface is now capable of bursting
> 1-cycle op per register, a WRITE followed by
> another WRITE, or a WRITE followed by a READ--much
> faster than previous muti-cycle per
> completed-transaction interface. This causes a
> problem, whereby status registers requiring a
> read/write by hardware, have a 1-cycle delay, due
> to the register update having to go through GRBM
> interface.
>
> This patch adds this delay.
>
> A one cycle read op is added after updating the
> invalidate request and before reading the
> invalidate-ACK status.

Please completely drop all changes for GFX9 since this patch will most 
likely break SRIOV.

Additional to that please apply the workaround only to SDMA since the CP 
driven engines should handle that in firmware.

Regards,
Christian.

>
> See also commit
> 534991731cb5fa94b5519957646cf849ca10d17d.
>
> Signed-off-by: Luben Tuikov 
> ---
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 ++--
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 4 ++--
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 9 +
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 8 
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +-
>   5 files changed, 22 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index ac43b1af69e3..0042868dbd53 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -5129,7 +5129,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v10_0_ring_funcs_gfx = {
>   5 + /* COND_EXEC */
>   7 + /* PIPELINE_SYNC */
>   SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
>   2 + /* VM_FLUSH */
>   8 + /* FENCE for VM_FLUSH */
>   20 + /* GDS switch */
> @@ -5182,7 +5182,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v10_0_ring_funcs_compute = {
>   5 + /* hdp invalidate */
>   7 + /* gfx_v10_0_ring_emit_pipeline_sync */
>   SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
>   2 + /* gfx_v10_0_ring_emit_vm_flush */
>   8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm 
> fence */
>   .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 9fe95e7693d5..9a7a717208de 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -6218,7 +6218,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v9_0_ring_funcs_gfx = {
>   5 +  /* COND_EXEC */
>   7 +  /* PIPELINE_SYNC */
>   SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
>   2 + /* VM_FLUSH */
>   8 +  /* FENCE for VM_FLUSH */
>   20 + /* GDS switch */
> @@ -6271,7 +6271,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v9_0_ring_funcs_compute = {
>   5 + /* hdp invalidate */
>   7 + /* gfx_v9_0_ring_emit_pipeline_sync */
>   SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
>   2 + /* gfx_v9_0_ring_emit_vm_flush */
>   8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm 
> fence */
>   .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 6e1b25bd1fe7..100d526e9a42 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -346,6 +346,15 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct 
> amdgpu_ring *ring,
>   
>   amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
>   
> + /* Insert a dummy read to delay one cycle before the ACK
> +  * inquiry.
> +  */
> + if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA ||
> + ring->funcs->type == AMDGPU_RING_TYPE_GFX  ||
> + ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
> + amdgpu_ring_emit_reg_wait(ring,
> +   hub->vm_inv_eng0_req + eng, 0, 0);
> +
>   /* wait for the invalidate to complete */
>   amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> 1 << vmid, 1 << vmid);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 9f2a893871ec..8f3097e45299 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -495,

Re: [PATCH] drm/amdgpu: simplify padding calculations

2019-10-24 Thread Koenig, Christian
Am 25.10.19 um 01:44 schrieb Tuikov, Luben:
> Simplify padding calculations.
>
> Signed-off-by: Luben Tuikov 
> ---
>   drivers/gpu/drm/amd/amdgpu/cik_sdma.c  |  4 ++--
>   drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c |  4 ++--
>   drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c |  4 ++--
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  4 ++--
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 14 +-
>   5 files changed, 17 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c 
> b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> index c45304f1047c..1ea9e18d6f08 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> @@ -228,7 +228,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring 
> *ring,
>   u32 extra_bits = vmid & 0xf;
>   
>   /* IB packet must end on a 8 DW boundary */
> - cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 
> 8);
> + cik_sdma_ring_insert_nop(ring, (4-lower_32_bits(ring->wptr)) & 7);
>   
>   amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, 
> extra_bits));
>   amdgpu_ring_write(ring, ib->gpu_addr & 0xffe0); /* base must be 32 
> byte aligned */
> @@ -811,7 +811,7 @@ static void cik_sdma_ring_pad_ib(struct amdgpu_ring 
> *ring, struct amdgpu_ib *ib)
>   u32 pad_count;
>   int i;
>   
> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> + pad_count = (-ib->length_dw) & 7;
>   for (i = 0; i < pad_count; i++)
>   if (sdma && sdma->burst_nop && (i == 0))
>   ib->ptr[ib->length_dw++] =
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> index a10175838013..d340f179401a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> @@ -255,7 +255,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring 
> *ring,
>   unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>   
>   /* IB packet must end on a 8 DW boundary */
> - sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) 
> % 8);
> + sdma_v2_4_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
>   
>   amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
> SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -750,7 +750,7 @@ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring 
> *ring, struct amdgpu_ib *ib
>   u32 pad_count;
>   int i;
>   
> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> + pad_count = (-ib->length_dw) & 7;
>   for (i = 0; i < pad_count; i++)
>   if (sdma && sdma->burst_nop && (i == 0))
>   ib->ptr[ib->length_dw++] =
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> index 5f4e2c616241..5c3c310188b6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> @@ -429,7 +429,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring 
> *ring,
>   unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>   
>   /* IB packet must end on a 8 DW boundary */
> - sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) 
> % 8);
> + sdma_v3_0_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
>   
>   amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
> SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -1021,7 +1021,7 @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring 
> *ring, struct amdgpu_ib *ib
>   u32 pad_count;
>   int i;
>   
> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> + pad_count = (-ib->length_dw) & 7;
>   for (i = 0; i < pad_count; i++)
>   if (sdma && sdma->burst_nop && (i == 0))
>   ib->ptr[ib->length_dw++] =
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index 45bd538ba97e..7c71c88e38a4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -698,7 +698,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring 
> *ring,
>   unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>   
>   /* IB packet must end on a 8 DW boundary */
> - sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) 
> % 8);
> + sdma_v4_0_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
>   
>   amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
> SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -1580,7 +1580,7 @@ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring 
> *ring, struct amdgpu_ib *ib
>   u32 pad_count;
>   int i;
>   
> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> + pad_count = (-ib->length_dw) & 7;
>   for (i = 0; i < pad_count; i++)
>   if (sdma && sdma->burst_nop && (i == 0))
>   ib->ptr[ib->length_dw++] =
>