[PATCH 3/6] drm/amdgpu: add get_clockgating callback for soc15

2017-03-23 Thread Huang Rui
Signed-off-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c |  3 +++
 drivers/gpu/drm/amd/amdgpu/soc15.c | 34 ++
 2 files changed, 37 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 743a852..fef89c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -55,7 +55,10 @@ static const struct cg_flag_name clocks[] = {
{AMD_CG_SUPPORT_VCE_MGCG, "Video Compression Engine Medium Grain Clock 
Gating"},
{AMD_CG_SUPPORT_HDP_LS, "Host Data Path Light Sleep"},
{AMD_CG_SUPPORT_HDP_MGCG, "Host Data Path Medium Grain Clock Gating"},
+   {AMD_CG_SUPPORT_DRM_MGCG, "Digital Right Managment Medium Grain Clock 
Gating"},
+   {AMD_CG_SUPPORT_DRM_LS, "Digital Right Managment Light Sleep"},
{AMD_CG_SUPPORT_ROM_MGCG, "Rom Medium Grain Clock Gating"},
+   {AMD_CG_SUPPORT_DF_MGCG, "Data Fabric Medium Grain Clock Gating"},
{0, NULL},
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index e37c1ff..dd70984 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -782,6 +782,39 @@ static int soc15_common_set_clockgating_state(void *handle,
return 0;
 }
 
+static void soc15_common_get_clockgating_state(void *handle, u32 *flags)
+{
+   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+   int data;
+
+   nbio_v6_1_get_clockgating_state(adev, flags);
+
+   /* AMD_CG_SUPPORT_HDP_LS */
+   data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS));
+   if (data & HDP_MEM_POWER_LS__LS_ENABLE_MASK)
+   *flags |= AMD_CG_SUPPORT_HDP_LS;
+
+   /* AMD_CG_SUPPORT_DRM_MGCG */
+   data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_CGTT_DRM_CLK_CTRL0));
+   if (!(data & MP0_SMN_CGTT_DRM_CLK_CTRL0__SOFT_OVERRIDE0_MASK))
+   *flags |= AMD_CG_SUPPORT_DRM_MGCG;
+
+   /* AMD_CG_SUPPORT_DRM_LS */
+   data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_DRM_LIGHT_SLEEP_CTRL));
+   if (data & MP0_SMN_DRM_LIGHT_SLEEP_CTRL__MEM_LIGHT_SLEEP_EN_MASK)
+   *flags |= AMD_CG_SUPPORT_DRM_LS;
+
+   /* AMD_CG_SUPPORT_ROM_MGCG */
+   data = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0));
+   if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK))
+   *flags |= AMD_CG_SUPPORT_ROM_MGCG;
+
+   /* AMD_CG_SUPPORT_DF_MGCG */
+   data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater));
+   if (data & DF_MGCG_ENABLE_15_CYCLE_DELAY)
+   *flags |= AMD_CG_SUPPORT_DF_MGCG;
+}
+
 static int soc15_common_set_powergating_state(void *handle,
enum amd_powergating_state state)
 {
@@ -804,4 +837,5 @@ const struct amd_ip_funcs soc15_common_ip_funcs = {
.soft_reset = soc15_common_soft_reset,
.set_clockgating_state = soc15_common_set_clockgating_state,
.set_powergating_state = soc15_common_set_powergating_state,
+   .get_clockgating_state= soc15_common_get_clockgating_state,
 };
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 6/6] drm/amdgpu: fix to remove HDP MGCG on soc15

2017-03-23 Thread Huang Rui
SOC15 doesn't enable HDP MGCG yet.

Signed-off-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index dd70984..a7a0c27 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -561,7 +561,6 @@ static int soc15_common_early_init(void *handle)
AMD_CG_SUPPORT_GFX_CGLS |
AMD_CG_SUPPORT_BIF_MGCG |
AMD_CG_SUPPORT_BIF_LS |
-   AMD_CG_SUPPORT_HDP_MGCG |
AMD_CG_SUPPORT_HDP_LS |
AMD_CG_SUPPORT_DRM_MGCG |
AMD_CG_SUPPORT_DRM_LS |
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 5/6] drm/amdgpu: add get_clockgating callback for mmhub v1

2017-03-23 Thread Huang Rui
Signed-off-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index b1e0e6b..68e5f7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -552,6 +552,22 @@ static int mmhub_v1_0_set_clockgating_state(void *handle,
return 0;
 }
 
+static void mmhub_v1_0_get_clockgating_state(void *handle, u32 *flags)
+{
+   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+   int data;
+
+   /* AMD_CG_SUPPORT_MC_MGCG */
+   data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL));
+   if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
+   *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+   /* AMD_CG_SUPPORT_MC_LS */
+   data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG));
+   if (data & ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+   *flags |= AMD_CG_SUPPORT_MC_LS;
+}
+
 static int mmhub_v1_0_set_powergating_state(void *handle,
enum amd_powergating_state state)
 {
@@ -573,6 +589,7 @@ const struct amd_ip_funcs mmhub_v1_0_ip_funcs = {
.soft_reset = mmhub_v1_0_soft_reset,
.set_clockgating_state = mmhub_v1_0_set_clockgating_state,
.set_powergating_state = mmhub_v1_0_set_powergating_state,
+   .get_clockgating_state = mmhub_v1_0_get_clockgating_state,
 };
 
 const struct amdgpu_ip_block_version mmhub_v1_0_ip_block =
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 0/6] drm/amdgpu: add get clockgating functions for new asic

2017-03-23 Thread Huang Rui
Hi all,

This patch set adds get_clockgating functions, after that, we can use
debugfs pm to check the dynamic clockgating status.

Thanks,
Rui

Huang Rui (6):
  drm/amdgpu: add get_clockgating callback for gfx v9
  drm/amdgpu: add get_clockgating callback for nbio v6.1
  drm/amdgpu: add get_clockgating callback for soc15
  drm/amdgpu: add get_clockgating for sdma v4
  drm/amdgpu: add get_clockgating callback for mmhub v1
  drm/amdgpu: fix to remove HDP MGCG on soc15

 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c  |  6 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 43 +
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 17 +
 drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c  | 15 
 drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h  |  1 +
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c  | 17 +
 drivers/gpu/drm/amd/amdgpu/soc15.c  | 35 ++-
 7 files changed, 133 insertions(+), 1 deletion(-)

-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 4/6] drm/amdgpu: add get_clockgating for sdma v4

2017-03-23 Thread Huang Rui
Signed-off-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 7347326..df4b1d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1389,6 +1389,22 @@ static int sdma_v4_0_set_powergating_state(void *handle,
return 0;
 }
 
+static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags)
+{
+   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+   int data;
+
+   /* AMD_CG_SUPPORT_SDMA_MGCG */
+   data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL));
+   if (!(data & SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK))
+   *flags |= AMD_CG_SUPPORT_SDMA_MGCG;
+
+   /* AMD_CG_SUPPORT_SDMA_LS */
+   data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
+   if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
+   *flags |= AMD_CG_SUPPORT_SDMA_LS;
+}
+
 const struct amd_ip_funcs sdma_v4_0_ip_funcs = {
.name = "sdma_v4_0",
.early_init = sdma_v4_0_early_init,
@@ -1404,6 +1420,7 @@ const struct amd_ip_funcs sdma_v4_0_ip_funcs = {
.soft_reset = sdma_v4_0_soft_reset,
.set_clockgating_state = sdma_v4_0_set_clockgating_state,
.set_powergating_state = sdma_v4_0_set_powergating_state,
+   .get_clockgating_state = sdma_v4_0_get_clockgating_state,
 };
 
 static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/6] drm/amdgpu: add get_clockgating callback for nbio v6.1

2017-03-23 Thread Huang Rui
Signed-off-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c |  1 +
 drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c | 15 +++
 drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h |  1 +
 3 files changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 2c170f1..743a852 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -49,6 +49,7 @@ static const struct cg_flag_name clocks[] = {
{AMD_CG_SUPPORT_MC_MGCG, "Memory Controller Medium Grain Clock Gating"},
{AMD_CG_SUPPORT_SDMA_LS, "System Direct Memory Access Light Sleep"},
{AMD_CG_SUPPORT_SDMA_MGCG, "System Direct Memory Access Medium Grain 
Clock Gating"},
+   {AMD_CG_SUPPORT_BIF_MGCG, "Bus Interface Medium Grain Clock Gating"},
{AMD_CG_SUPPORT_BIF_LS, "Bus Interface Light Sleep"},
{AMD_CG_SUPPORT_UVD_MGCG, "Unified Video Decoder Medium Grain Clock 
Gating"},
{AMD_CG_SUPPORT_VCE_MGCG, "Video Compression Engine Medium Grain Clock 
Gating"},
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
index f517e9a..c0945e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -206,6 +206,21 @@ void nbio_v6_1_update_medium_grain_light_sleep(struct 
amdgpu_device *adev,
WREG32_PCIE(smnPCIE_CNTL2, data);
 }
 
+void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, u32 *flags)
+{
+   int data;
+
+   /* AMD_CG_SUPPORT_BIF_MGCG */
+   data = RREG32_PCIE(smnCPM_CONTROL);
+   if (data & CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
+   *flags |= AMD_CG_SUPPORT_BIF_MGCG;
+
+   /* AMD_CG_SUPPORT_BIF_LS */
+   data = RREG32_PCIE(smnPCIE_CNTL2);
+   if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
+   *flags |= AMD_CG_SUPPORT_BIF_LS;
+}
+
 struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg;
 struct nbio_pcie_index_data nbio_v6_1_pcie_index_data;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h 
b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h
index a778d1c..a7e6f39 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h
@@ -48,5 +48,6 @@ void nbio_v6_1_ih_control(struct amdgpu_device *adev);
 u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev);
 void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, 
bool enable);
 void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, 
bool enable);
+void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, u32 *flags);
 
 #endif
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/6] drm/amdgpu: add get_clockgating callback for gfx v9

2017-03-23 Thread Huang Rui
Signed-off-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 43 ++
 2 files changed, 45 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 28a1e04..2c170f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -43,6 +43,8 @@ static const struct cg_flag_name clocks[] = {
{AMD_CG_SUPPORT_GFX_CGTS_LS, "Graphics Coarse Grain Tree Shader Light 
Sleep"},
{AMD_CG_SUPPORT_GFX_CP_LS, "Graphics Command Processor Light Sleep"},
{AMD_CG_SUPPORT_GFX_RLC_LS, "Graphics Run List Controller Light Sleep"},
+   {AMD_CG_SUPPORT_GFX_3D_CGCG, "Graphics 3D Coarse Grain Clock Gating"},
+   {AMD_CG_SUPPORT_GFX_3D_CGLS, "Graphics 3D Coarse Grain memory Light 
Sleep"},
{AMD_CG_SUPPORT_MC_LS, "Memory Controller Light Sleep"},
{AMD_CG_SUPPORT_MC_MGCG, "Memory Controller Medium Grain Clock Gating"},
{AMD_CG_SUPPORT_SDMA_LS, "System Direct Memory Access Light Sleep"},
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 611d68f..0a745ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2374,6 +2374,48 @@ static int gfx_v9_0_set_clockgating_state(void *handle,
return 0;
 }
 
+static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
+{
+   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+   int data;
+
+   if (amdgpu_sriov_vf(adev))
+   *flags = 0;
+
+   /* AMD_CG_SUPPORT_GFX_MGCG */
+   data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
+   if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
+   *flags |= AMD_CG_SUPPORT_GFX_MGCG;
+
+   /* AMD_CG_SUPPORT_GFX_CGCG */
+   data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
+   if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
+   *flags |= AMD_CG_SUPPORT_GFX_CGCG;
+
+   /* AMD_CG_SUPPORT_GFX_CGLS */
+   if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
+   *flags |= AMD_CG_SUPPORT_GFX_CGLS;
+
+   /* AMD_CG_SUPPORT_GFX_RLC_LS */
+   data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
+   if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
+   *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
+
+   /* AMD_CG_SUPPORT_GFX_CP_LS */
+   data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
+   if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
+   *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
+
+   /* AMD_CG_SUPPORT_GFX_3D_CGCG */
+   data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
+   if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
+   *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
+
+   /* AMD_CG_SUPPORT_GFX_3D_CGLS */
+   if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
+   *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
+}
+
 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
 {
return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
@@ -2865,6 +2907,7 @@ const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
.soft_reset = gfx_v9_0_soft_reset,
.set_clockgating_state = gfx_v9_0_set_clockgating_state,
.set_powergating_state = gfx_v9_0_set_powergating_state,
+   .get_clockgating_state = gfx_v9_0_get_clockgating_state,
 };
 
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: Question about page table updates at BO destroy

2017-03-23 Thread Zhang, Jerry (Junwei)

On 03/23/2017 09:07 PM, Nicolai Hähnle wrote:

Hi Jerry,

On 23.03.2017 03:26, Zhang, Jerry (Junwei) wrote:

On 03/22/2017 11:06 PM, Nicolai Hähnle wrote:

Hi all,

there's a bit of a puzzle where I'm wondering whether there's a subtle
bug in
the amdgpu kernel module.

Basically, the concern is that a buggy user space driver might trigger a
sequence like this:

1. Submit a CS that accesses some BO _without_ adding that BO to the
buffer list.
2. Free that BO.


The user space should call unmap when free a BO, as my understanding.
In this case, it will call amdgpu_gem_va_update_vm() to clear the PTE
related to the BO.
Right?

Or you just imagine this scenery that there is no unmap?


I'm thinking of the scenario without an unmap, i.e. broken / malicious user
space. I haven't looked into the unmap case, I will. I have a WIP patch for
this, will give it a proper test drive later.


if so, it will happens.
I have reviewed them all.

Jerry



Cheers,
Nicolai




Jerry


3. Some other task re-uses the memory underlying the BO.
4. The CS is submitted to the hardware and accesses memory that is now
already
in use by somebody else, since there has been no update to the page
tables to
reflect the freed BO.

Obviously there's a user space bug in step 1, but the kernel must
still prevent
the conflicting memory accesses, and I don't see where it does.

amdgpu_gem_object_close takes a reservation of the BO and the page
directory,
but then simply backs off that reservation rather than adding a fence,
which I
suspect is necessary.

I believe that whenever we remove a BO from a VM, we must
unconditionally add
the most recent page directory fence(?) to the BO. Does that sound right?

Cheers,
Nicolai

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 2/2] drm/amdgpu: clear freed mappings immediately when BO may be freed

2017-03-23 Thread Zhang, Jerry (Junwei)

On 03/24/2017 11:42 AM, Zhang, Jerry (Junwei) wrote:

On 03/24/2017 03:27 AM, Nicolai Hähnle wrote:

From: Nicolai Hähnle 

Also, add the fence of the clear operations to the BO to ensure that
the underlying memory can only be re-used after all PTEs pointing to
it have been cleared.

This avoids the following sequence of events that could be triggered
by user space:

1. Submit a CS that accesses some BO _without_ adding that BO to the
buffer list.
2. Free that BO.
3. Some other task re-uses the memory underlying the BO.
4. The CS is submitted to the hardware and accesses memory that is
now already in use by somebody else.

By clearing the page tables immediately in step 2, a GPU VM fault will
be triggered in step 4 instead of wild memory accesses.

v2: use amdgpu_bo_fence directly

Signed-off-by: Nicolai Hähnle 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 12 
  1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 4a53c43..8b0f5f18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -145,20 +145,21 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
  struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
  struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
  struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
  struct amdgpu_vm *vm = &fpriv->vm;

  struct amdgpu_bo_list_entry vm_pd;
  struct list_head list, duplicates;
  struct ttm_validate_buffer tv;
  struct ww_acquire_ctx ticket;
  struct amdgpu_bo_va *bo_va;
+struct fence *fence = NULL;
  int r;

  INIT_LIST_HEAD(&list);
  INIT_LIST_HEAD(&duplicates);

  tv.bo = &bo->tbo;
  tv.shared = true;
  list_add(&tv.head, &list);

  amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
@@ -166,20 +167,31 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
  r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
  if (r) {
  dev_err(adev->dev, "leaking bo va because "
  "we fail to reserve bo (%d)\n", r);
  return;
  }
  bo_va = amdgpu_vm_bo_find(vm, bo);
  if (bo_va) {
  if (--bo_va->ref_count == 0) {
  amdgpu_vm_bo_rmv(adev, bo_va);
+
+r = amdgpu_vm_clear_freed(adev, vm, &fence);
+if (unlikely(r)) {
+dev_err(adev->dev, "failed to clear page "
+"tables on GEM object close (%d)\n", r);
+}
+
+if (fence) {


I think it's always true.
Maybe you mean *fence?


My fault to pick a wrong mail thread at the same time.
it's already a pointer, not **fence defined.

Reviewed-by: Junwei Zhang 




+amdgpu_bo_fence(bo, fence, true);
+fence_put(fence);
+}
  }
  }
  ttm_eu_backoff_reservation(&ticket, &list);
  }

  static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r)
  {
  if (r == -EDEADLK) {
  r = amdgpu_gpu_reset(adev);
  if (!r)


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 2/2] drm/amdgpu: clear freed mappings immediately when BO may be freed

2017-03-23 Thread Zhang, Jerry (Junwei)

On 03/24/2017 03:27 AM, Nicolai Hähnle wrote:

From: Nicolai Hähnle 

Also, add the fence of the clear operations to the BO to ensure that
the underlying memory can only be re-used after all PTEs pointing to
it have been cleared.

This avoids the following sequence of events that could be triggered
by user space:

1. Submit a CS that accesses some BO _without_ adding that BO to the
buffer list.
2. Free that BO.
3. Some other task re-uses the memory underlying the BO.
4. The CS is submitted to the hardware and accesses memory that is
now already in use by somebody else.

By clearing the page tables immediately in step 2, a GPU VM fault will
be triggered in step 4 instead of wild memory accesses.

v2: use amdgpu_bo_fence directly

Signed-off-by: Nicolai Hähnle 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 12 
  1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 4a53c43..8b0f5f18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -145,20 +145,21 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;

struct amdgpu_bo_list_entry vm_pd;
struct list_head list, duplicates;
struct ttm_validate_buffer tv;
struct ww_acquire_ctx ticket;
struct amdgpu_bo_va *bo_va;
+   struct fence *fence = NULL;
int r;

INIT_LIST_HEAD(&list);
INIT_LIST_HEAD(&duplicates);

tv.bo = &bo->tbo;
tv.shared = true;
list_add(&tv.head, &list);

amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
@@ -166,20 +167,31 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
if (r) {
dev_err(adev->dev, "leaking bo va because "
"we fail to reserve bo (%d)\n", r);
return;
}
bo_va = amdgpu_vm_bo_find(vm, bo);
if (bo_va) {
if (--bo_va->ref_count == 0) {
amdgpu_vm_bo_rmv(adev, bo_va);
+
+   r = amdgpu_vm_clear_freed(adev, vm, &fence);
+   if (unlikely(r)) {
+   dev_err(adev->dev, "failed to clear page "
+   "tables on GEM object close (%d)\n", r);
+   }
+
+   if (fence) {


I think it's always true.
Maybe you mean *fence?


+   amdgpu_bo_fence(bo, fence, true);
+   fence_put(fence);
+   }
}
}
ttm_eu_backoff_reservation(&ticket, &list);
  }

  static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r)
  {
if (r == -EDEADLK) {
r = amdgpu_gpu_reset(adev);
if (!r)


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 1/2] drm/amdgpu: add optional fence out-parameter to amdgpu_vm_clear_freed

2017-03-23 Thread Zhang, Jerry (Junwei)

On 03/24/2017 10:30 AM, zhoucm1 wrote:



On 2017年03月24日 03:27, Nicolai Hähnle wrote:

From: Nicolai Hähnle 

We will add the fence to freed buffer objects in a later commit, to ensure
that the underlying memory can only be re-used after all references in
page tables have been cleared.

Signed-off-by: Nicolai Hähnle 

Reviewed-by: Chunming Zhou 

Reviewed-by: Junwei Zhang 




---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 21 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++-
  4 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 55d553a..85e6070 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -778,21 +778,21 @@ static int amdgpu_bo_vm_update_pte(struct
amdgpu_cs_parser *p)
  int i, r;
  r = amdgpu_vm_update_page_directory(adev, vm);
  if (r)
  return r;
  r = amdgpu_sync_fence(adev, &p->job->sync, vm->page_directory_fence);
  if (r)
  return r;
-r = amdgpu_vm_clear_freed(adev, vm);
+r = amdgpu_vm_clear_freed(adev, vm, NULL);
  if (r)
  return r;
  r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
  if (r)
  return r;
  r = amdgpu_sync_fence(adev, &p->job->sync,
fpriv->prt_va->last_pt_update);
  if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index be9fb2c..4a53c43 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -535,21 +535,21 @@ static void amdgpu_gem_va_update_vm(struct
amdgpu_device *adev,
  r = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_va_check,
NULL);
  if (r)
  goto error;
  r = amdgpu_vm_update_page_directory(adev, vm);
  if (r)
  goto error;
-r = amdgpu_vm_clear_freed(adev, vm);
+r = amdgpu_vm_clear_freed(adev, vm, NULL);
  if (r)
  goto error;
  if (operation == AMDGPU_VA_OP_MAP ||
  operation == AMDGPU_VA_OP_REPLACE)
  r = amdgpu_vm_bo_update(adev, bo_va, false);
  error:
  if (r && r != -ERESTARTSYS)
  DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index dd7df45..2c95a75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1397,48 +1397,57 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device
*adev, struct amdgpu_vm *vm)
  }
  kfree(shared);
  }
  /**
   * amdgpu_vm_clear_freed - clear freed BOs in the PT
   *
   * @adev: amdgpu_device pointer
   * @vm: requested vm
+ * @fence: optional resulting fence (unchanged if no work needed to be done
+ * or if an error occurred)
   *
   * Make sure all freed BOs are cleared in the PT.
   * Returns 0 for success.
   *
   * PTs have to be reserved and mutex must be locked!
   */
  int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
-  struct amdgpu_vm *vm)
+  struct amdgpu_vm *vm,
+  struct fence **fence)
  {
  struct amdgpu_bo_va_mapping *mapping;
-struct fence *fence = NULL;
+struct fence *f = NULL;
  int r;
  while (!list_empty(&vm->freed)) {
  mapping = list_first_entry(&vm->freed,
  struct amdgpu_bo_va_mapping, list);
  list_del(&mapping->list);
  r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, NULL, vm, mapping,
-   0, 0, &fence);
-amdgpu_vm_free_mapping(adev, vm, mapping, fence);
+   0, 0, &f);
+amdgpu_vm_free_mapping(adev, vm, mapping, f);
  if (r) {
-fence_put(fence);
+fence_put(f);
  return r;
  }
+}
+if (fence && f) {
+fence_put(*fence);
+*fence = f;
+} else {
+fence_put(f);
  }
-fence_put(fence);
+
  return 0;
  }
  /**
   * amdgpu_vm_clear_invalids - clear invalidated BOs in the PT
   *
   * @adev: amdgpu_device pointer
   * @vm: requested vm
   *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index ff10fa5..9d5a572 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -187,21 +187,22 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
  struct amdgpu_vm *vm,
  uint64_t saddr, uint64_t size);
  int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
struct amdgpu_sync *sync, struct fence *fence,
struct amdgpu_job *job);
  int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
  void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
  int amdgpu_vm_update_page_directory

[PATCH 07/18] drm/amdgpu: handle multi level PD in the LRU

2017-03-23 Thread Chunming Zhou
From: Christian König 

Move all levels to the end after command submission.

Change-Id: I6d41aac90be29476780b897cf5943a2261580a78
Signed-off-by: Christian König 
Reviewed-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 36 +-
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 23674ed..fe3db17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -199,28 +199,46 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
 }
 
 /**
- * amdgpu_vm_move_pt_bos_in_lru - move the PT BOs to the LRU tail
+ * amdgpu_vm_move_level_in_lru - move one level of PT BOs to the LRU tail
  *
  * @adev: amdgpu device instance
  * @vm: vm providing the BOs
  *
  * Move the PT BOs to the tail of the LRU.
  */
-void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
- struct amdgpu_vm *vm)
+static void amdgpu_vm_move_level_in_lru(struct amdgpu_vm_pt *parent)
 {
-   struct ttm_bo_global *glob = adev->mman.bdev.glob;
unsigned i;
 
-   spin_lock(&glob->lru_lock);
-   for (i = 0; i <= vm->root.last_entry_used; ++i) {
-   struct amdgpu_bo *bo = vm->root.entries[i].bo;
+   if (!parent->entries)
+   return;
 
-   if (!bo)
+   for (i = 0; i <= parent->last_entry_used; ++i) {
+   struct amdgpu_vm_pt *entry = &parent->entries[i];
+
+   if (!entry->bo)
continue;
 
-   ttm_bo_move_to_lru_tail(&bo->tbo);
+   ttm_bo_move_to_lru_tail(&entry->bo->tbo);
+   amdgpu_vm_move_level_in_lru(entry);
}
+}
+
+/**
+ * amdgpu_vm_move_pt_bos_in_lru - move the PT BOs to the LRU tail
+ *
+ * @adev: amdgpu device instance
+ * @vm: vm providing the BOs
+ *
+ * Move the PT BOs to the tail of the LRU.
+ */
+void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+   struct ttm_bo_global *glob = adev->mman.bdev.glob;
+
+   spin_lock(&glob->lru_lock);
+   amdgpu_vm_move_level_in_lru(&vm->root);
spin_unlock(&glob->lru_lock);
 }
 
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 05/18] drm/amdgpu: handle multi level PD size calculation

2017-03-23 Thread Chunming Zhou
From: Christian König 

Allows us to get the size for all levels as well.

Change-Id: Iaf2f9b2bf19c3623018a2215f8cf01a61bdbe8ea
Signed-off-by: Christian König 
Reviewed-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 34 ++
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9172954..90494ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -76,27 +76,37 @@ struct amdgpu_pte_update_params {
 };
 
 /**
- * amdgpu_vm_num_pde - return the number of page directory entries
+ * amdgpu_vm_num_entries - return the number of entries in a PD/PT
  *
  * @adev: amdgpu_device pointer
  *
- * Calculate the number of page directory entries.
+ * Calculate the number of entries in a page directory or page table.
  */
-static unsigned amdgpu_vm_num_pdes(struct amdgpu_device *adev)
+static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
+ unsigned level)
 {
-   return adev->vm_manager.max_pfn >> amdgpu_vm_block_size;
+   if (level == 0)
+   /* For the root directory */
+   return adev->vm_manager.max_pfn >>
+   (amdgpu_vm_block_size * adev->vm_manager.num_level);
+   else if (level == adev->vm_manager.num_level)
+   /* For the page tables on the leaves */
+   return AMDGPU_VM_PTE_COUNT;
+   else
+   /* Everything in between */
+   return 1 << amdgpu_vm_block_size;
 }
 
 /**
- * amdgpu_vm_directory_size - returns the size of the page directory in bytes
+ * amdgpu_vm_bo_size - returns the size of the BOs in bytes
  *
  * @adev: amdgpu_device pointer
  *
- * Calculate the size of the page directory in bytes.
+ * Calculate the size of the BO for a page directory or page table in bytes.
  */
-static unsigned amdgpu_vm_directory_size(struct amdgpu_device *adev)
+static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
 {
-   return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_pdes(adev) * 8);
+   return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
 }
 
 /**
@@ -1393,7 +1403,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
saddr >>= amdgpu_vm_block_size;
eaddr >>= amdgpu_vm_block_size;
 
-   BUG_ON(eaddr >= amdgpu_vm_num_pdes(adev));
+   BUG_ON(eaddr >= amdgpu_vm_num_entries(adev, 0));
 
if (eaddr > vm->root.last_entry_used)
vm->root.last_entry_used = eaddr;
@@ -1576,8 +1586,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
INIT_LIST_HEAD(&vm->cleared);
INIT_LIST_HEAD(&vm->freed);
 
-   pd_size = amdgpu_vm_directory_size(adev);
-   pd_entries = amdgpu_vm_num_pdes(adev);
+   pd_size = amdgpu_vm_bo_size(adev, 0);
+   pd_entries = amdgpu_vm_num_entries(adev, 0);
 
/* allocate page table array */
vm->root.entries = drm_calloc_large(pd_entries,
@@ -1662,7 +1672,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
kfree(mapping);
}
 
-   for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) {
+   for (i = 0; i < amdgpu_vm_num_entries(adev, 0); i++) {
struct amdgpu_bo *pt = vm->root.entries[i].bo;
 
if (!pt)
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 00/18] *** multiple level VMPT enablement ***

2017-03-23 Thread Chunming Zhou
*** BLURB HERE ***
From Vega, ascis start to support multiple level vmpt, the series is to 
implement it.

Tested successfully with 2/3/4 levels.

Christian König (10):
  drm/amdgpu: rename page_directory_fence to last_dir_update
  drm/amdgpu: add the VM pointer to the amdgpu_pte_update_params as well
  drm/amdgpu: add num_level to the VM manager
  drm/amdgpu: generalize page table level
  drm/amdgpu: handle multi level PD size calculation
  drm/amdgpu: handle multi level PD during validation
  drm/amdgpu: handle multi level PD in the LRU
  drm/amdgpu: handle multi level PD updates
  drm/amdgpu: handle multi level PD during PT updates
  drm/amdgpu: add alloc/free for multi level PDs

Chunming Zhou (8):
  drm/amdgpu: set page table depth by num_level
  drm/amdgpu: block size of multiple level vmpt prefers one page
  drm/amdgpu: fix update sub levels
  drm/amdgpu: sub levels need to update regardless of parent updates
  drm/amdgpu: clear entries allocation
  drm/amdgpu: fix entries index calculation
  drm/amdgpu: need alloc sub level even parent bo was allocated
  drm/amdgpu: enable four level vmpt

 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |   6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  |   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c  |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 474 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h   |  16 +-
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c |   3 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c|   1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c|   1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c|   1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c|   1 +
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  |   2 +-
 11 files changed, 336 insertions(+), 175 deletions(-)

-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 04/18] drm/amdgpu: generalize page table level

2017-03-23 Thread Chunming Zhou
From: Christian König 

No functional change, but the base for multi level page tables.

Change-Id: If5729be07e15cc8618ae7bce15c6b27aa4f24393
Signed-off-by: Christian König 
Reviewed-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 87 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  9 ++--
 3 files changed, 50 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 0e5d851..d9308cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -873,7 +873,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
}
 
if (p->job->vm) {
-   p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
+   p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.bo);
 
r = amdgpu_bo_vm_update_pte(p, vm);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 1f27300..9172954 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -113,9 +113,9 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 struct list_head *validated,
 struct amdgpu_bo_list_entry *entry)
 {
-   entry->robj = vm->page_directory;
+   entry->robj = vm->root.bo;
entry->priority = 0;
-   entry->tv.bo = &vm->page_directory->tbo;
+   entry->tv.bo = &entry->robj->tbo;
entry->tv.shared = true;
entry->user_pages = NULL;
list_add(&entry->tv.head, validated);
@@ -147,8 +147,8 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
return 0;
 
/* add the vm page table to the list */
-   for (i = 0; i <= vm->max_pde_used; ++i) {
-   struct amdgpu_bo *bo = vm->page_tables[i].bo;
+   for (i = 0; i <= vm->root.last_entry_used; ++i) {
+   struct amdgpu_bo *bo = vm->root.entries[i].bo;
 
if (!bo)
continue;
@@ -176,8 +176,8 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device 
*adev,
unsigned i;
 
spin_lock(&glob->lru_lock);
-   for (i = 0; i <= vm->max_pde_used; ++i) {
-   struct amdgpu_bo *bo = vm->page_tables[i].bo;
+   for (i = 0; i <= vm->root.last_entry_used; ++i) {
+   struct amdgpu_bo *bo = vm->root.entries[i].bo;
 
if (!bo)
continue;
@@ -597,15 +597,15 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device 
*adev,
int r;
 
ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
-   shadow = vm->page_directory->shadow;
+   shadow = vm->root.bo->shadow;
 
/* padding, etc. */
ndw = 64;
 
/* assume the worst case */
-   ndw += vm->max_pde_used * 6;
+   ndw += vm->root.last_entry_used * 6;
 
-   pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
+   pd_addr = amdgpu_bo_gpu_offset(vm->root.bo);
if (shadow) {
r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
if (r)
@@ -625,8 +625,8 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device 
*adev,
params.ib = &job->ibs[0];
 
/* walk over the address space and update the page directory */
-   for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
-   struct amdgpu_bo *bo = vm->page_tables[pt_idx].bo;
+   for (pt_idx = 0; pt_idx <= vm->root.last_entry_used; ++pt_idx) {
+   struct amdgpu_bo *bo = vm->root.entries[pt_idx].bo;
uint64_t pde, pt;
 
if (bo == NULL)
@@ -642,10 +642,10 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device 
*adev,
}
 
pt = amdgpu_bo_gpu_offset(bo);
-   if (vm->page_tables[pt_idx].addr == pt)
+   if (vm->root.entries[pt_idx].addr == pt)
continue;
 
-   vm->page_tables[pt_idx].addr = pt;
+   vm->root.entries[pt_idx].addr = pt;
 
pde = pd_addr + pt_idx * 8;
if (((last_pde + 8 * count) != pde) ||
@@ -680,7 +680,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device 
*adev,
if (count) {
uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt);
 
-   if (vm->page_directory->shadow)
+   if (vm->root.bo->shadow)
amdgpu_vm_do_set_ptes(¶ms, last_shadow, pt_addr,
  count, incr, AMDGPU_PTE_VALID);
 
@@ -694,7 +694,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device 
*adev,
}
 
amdgpu_ring_pad_ib(ring, params.ib);
-   amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv,
+   amdgpu_sync_resv(adev, &job->sync, vm->

[PATCH 17/18] drm/amdgpu: need alloc sub level even parent bo was allocated

2017-03-23 Thread Chunming Zhou
Change-Id: Ia7b256c298f9d4fb522d0add585961ac789ab80f
Signed-off-by: Chunming Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 38 +-
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 8ccc6a4..7f54502 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1476,27 +1476,27 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device 
*adev,
struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
struct amdgpu_bo *pt;
 
-   if (entry->bo)
-   continue;
-
-   r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, level),
-AMDGPU_GPU_PAGE_SIZE, true,
-AMDGPU_GEM_DOMAIN_VRAM,
-AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
-AMDGPU_GEM_CREATE_SHADOW |
-AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
-AMDGPU_GEM_CREATE_VRAM_CLEARED,
-NULL, resv, &pt);
-   if (r)
-   return r;
+   if (!entry->bo) {
+   r = amdgpu_bo_create(adev,
+amdgpu_vm_bo_size(adev, level),
+AMDGPU_GPU_PAGE_SIZE, true,
+AMDGPU_GEM_DOMAIN_VRAM,
+AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
+AMDGPU_GEM_CREATE_SHADOW |
+AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+AMDGPU_GEM_CREATE_VRAM_CLEARED,
+NULL, resv, &pt);
+   if (r)
+   return r;
 
-   /* Keep a reference to the root directory to avoid
-* freeing them up in the wrong order.
-*/
-   pt->parent = amdgpu_bo_ref(vm->root.bo);
+   /* Keep a reference to the root directory to avoid
+   * freeing them up in the wrong order.
+   */
+   pt->parent = amdgpu_bo_ref(vm->root.bo);
 
-   entry->bo = pt;
-   entry->addr = 0;
+   entry->bo = pt;
+   entry->addr = 0;
+   }
 
if (level < adev->vm_manager.num_level) {
r = amdgpu_vm_alloc_levels(adev, vm, entry, saddr,
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 16/18] drm/amdgpu: fix entries index calculation

2017-03-23 Thread Chunming Zhou
Change-Id: I3e7eabc3576af4265f8cf58187ebe6cc62503ac6
Signed-off-by: Chunming Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9721552..8ccc6a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1462,8 +1462,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device 
*adev,
memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
}
 
-   from = saddr >> shift;
-   to = eaddr >> shift;
+   from = (saddr >> shift) % amdgpu_vm_num_entries(adev, level);
+   to = (eaddr >> shift) % amdgpu_vm_num_entries(adev, level);
 
if (to > parent->last_entry_used)
parent->last_entry_used = to;
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 15/18] drm/amdgpu: clear entries allocation

2017-03-23 Thread Chunming Zhou
Change-Id: If37d906b7ada95087f02e2dffdd81c78a0146c83
Signed-off-by: Chunming Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 3d2159e..9721552 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1459,6 +1459,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device 
*adev,
   sizeof(struct amdgpu_vm_pt));
if (!parent->entries)
return -ENOMEM;
+   memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
}
 
from = saddr >> shift;
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 12/18] drm/amdgpu: block size of multiple level vmpt prefers one page

2017-03-23 Thread Chunming Zhou
multiple level vmpt will cover bigger va space, if vmpt is 4 levels,
one highest pde will cover 1TB, so increase vm size to 1TB.

Change-Id: Ieaf844a8b364892a5a3f3e43e1690b9579b40b85
Signed-off-by: Chunming Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index d74a406..f77b0ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -82,8 +82,8 @@
 unsigned amdgpu_ip_block_mask = 0x;
 int amdgpu_bapm = -1;
 int amdgpu_deep_color = 0;
-int amdgpu_vm_size = 64;
-int amdgpu_vm_block_size = -1;
+int amdgpu_vm_size = 1024;
+int amdgpu_vm_block_size = 9;
 int amdgpu_vm_fault_stop = 0;
 int amdgpu_vm_debug = 0;
 int amdgpu_vram_page_split = 1024;
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 18/18] drm/amdgpu: enable four level vmpt

2017-03-23 Thread Chunming Zhou
Change-Id: Ibd9ce30183f77b977e90ce40243d054037b7668b
Signed-off-by: Chunming Zhou 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 6625a2f..4178250 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -508,7 +508,7 @@ static int gmc_v9_0_vm_init(struct amdgpu_device *adev)
 * amdkfd will use VMIDs 8-15
 */
adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
-   adev->vm_manager.num_level = 1;
+   adev->vm_manager.num_level = 3;
amdgpu_vm_manager_init(adev);
 
return 0;
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 13/18] drm/amdgpu: fix update sub levels

2017-03-23 Thread Chunming Zhou
sub level pdes always are written to incorrect place.

Change-Id: Ic40bbf2affa9cdd0490f8d7dde883cab36bd60fc
Signed-off-by: Chunming Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 71f90e5..5c51815 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -661,7 +661,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device 
*adev,
/* assume the worst case */
ndw += parent->last_entry_used * 6;
 
-   pd_addr = amdgpu_bo_gpu_offset(vm->root.bo);
+   pd_addr = amdgpu_bo_gpu_offset(parent->bo);
 
shadow = parent->bo->shadow;
if (shadow) {
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 14/18] drm/amdgpu: sub levels need to update regardless of parent updates

2017-03-23 Thread Chunming Zhou
Change-Id: I4373d50e243c20a9f7d00134406496de5f482c44
Signed-off-by: Chunming Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 34 --
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 5c51815..3d2159e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -748,27 +748,25 @@ static int amdgpu_vm_update_level(struct amdgpu_device 
*adev,
 
if (params.ib->length_dw == 0) {
amdgpu_job_free(job);
-   return 0;
-   }
-
-   amdgpu_ring_pad_ib(ring, params.ib);
-   amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv,
-AMDGPU_FENCE_OWNER_VM);
-   if (shadow)
-   amdgpu_sync_resv(adev, &job->sync, shadow->tbo.resv,
+   } else {
+   amdgpu_ring_pad_ib(ring, params.ib);
+   amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv,
 AMDGPU_FENCE_OWNER_VM);
+   if (shadow)
+   amdgpu_sync_resv(adev, &job->sync, shadow->tbo.resv,
+AMDGPU_FENCE_OWNER_VM);
 
-   WARN_ON(params.ib->length_dw > ndw);
-   r = amdgpu_job_submit(job, ring, &vm->entity,
- AMDGPU_FENCE_OWNER_VM, &fence);
-   if (r)
-   goto error_free;
-
-   amdgpu_bo_fence(parent->bo, fence, true);
-   fence_put(vm->last_dir_update);
-   vm->last_dir_update = fence_get(fence);
-   fence_put(fence);
+   WARN_ON(params.ib->length_dw > ndw);
+   r = amdgpu_job_submit(job, ring, &vm->entity,
+   AMDGPU_FENCE_OWNER_VM, &fence);
+   if (r)
+   goto error_free;
 
+   amdgpu_bo_fence(parent->bo, fence, true);
+   fence_put(vm->last_dir_update);
+   vm->last_dir_update = fence_get(fence);
+   fence_put(fence);
+   }
/*
 * Recurse into the subdirectories. This recursion is harmless because
 * we only have a maximum of 5 layers.
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 11/18] drm/amdgpu: set page table depth by num_level

2017-03-23 Thread Chunming Zhou
Change-Id: I6180bedb8948398429fb32b36faa35960b3b85e6
Signed-off-by: Chunming Zhou 
---
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 3 ++-
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index a47f9dc..3a6f50a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -200,7 +200,8 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
for (i = 0; i <= 14; i++) {
tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL) + i);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
-   tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, 1);
+   tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+   adev->vm_manager.num_level);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 01f3aa5..07af98c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -218,7 +218,7 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-   PAGE_TABLE_DEPTH, 1);
+   PAGE_TABLE_DEPTH, adev->vm_manager.num_level);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 10/18] drm/amdgpu: add alloc/free for multi level PDs

2017-03-23 Thread Chunming Zhou
From: Christian König 

Allocate and free page directories on demand.

Change-Id: I341b72b911377033257af888dd1a96ca54f586e9
Signed-off-by: Christian König 
Reviewed-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 178 -
 1 file changed, 107 insertions(+), 71 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 43747f8..71f90e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1433,6 +1433,83 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct 
amdgpu_device *adev,
return bo_va;
 }
 
+ /**
+ * amdgpu_vm_alloc_levels - allocate the PD/PT levels
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @saddr: start of the address range
+ * @eaddr: end of the address range
+ *
+ * Make sure the page directories and page tables are allocated
+ */
+static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_vm_pt *parent,
+ uint64_t saddr, uint64_t eaddr,
+ unsigned level)
+{
+   unsigned shift = (adev->vm_manager.num_level - level) *
+   amdgpu_vm_block_size;
+   unsigned pt_idx, from, to;
+   int r;
+
+   if (!parent->entries) {
+   unsigned num_entries = amdgpu_vm_num_entries(adev, level);
+
+   parent->entries = drm_calloc_large(num_entries,
+  sizeof(struct amdgpu_vm_pt));
+   if (!parent->entries)
+   return -ENOMEM;
+   }
+
+   from = saddr >> shift;
+   to = eaddr >> shift;
+
+   if (to > parent->last_entry_used)
+   parent->last_entry_used = to;
+
+   ++level;
+
+   /* walk over the address space and allocate the page tables */
+   for (pt_idx = from; pt_idx <= to; ++pt_idx) {
+   struct reservation_object *resv = vm->root.bo->tbo.resv;
+   struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
+   struct amdgpu_bo *pt;
+
+   if (entry->bo)
+   continue;
+
+   r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, level),
+AMDGPU_GPU_PAGE_SIZE, true,
+AMDGPU_GEM_DOMAIN_VRAM,
+AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
+AMDGPU_GEM_CREATE_SHADOW |
+AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+AMDGPU_GEM_CREATE_VRAM_CLEARED,
+NULL, resv, &pt);
+   if (r)
+   return r;
+
+   /* Keep a reference to the root directory to avoid
+* freeing them up in the wrong order.
+*/
+   pt->parent = amdgpu_bo_ref(vm->root.bo);
+
+   entry->bo = pt;
+   entry->addr = 0;
+
+   if (level < adev->vm_manager.num_level) {
+   r = amdgpu_vm_alloc_levels(adev, vm, entry, saddr,
+  eaddr, level);
+   if (r)
+   return r;
+   }
+   }
+
+   return 0;
+}
+
 /**
  * amdgpu_vm_bo_map - map bo inside a vm
  *
@@ -1455,7 +1532,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_vm *vm = bo_va->vm;
struct interval_tree_node *it;
-   unsigned last_pfn, pt_idx;
+   unsigned last_pfn;
uint64_t eaddr;
int r;
 
@@ -1506,46 +1583,10 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
list_add(&mapping->list, &bo_va->invalids);
interval_tree_insert(&mapping->it, &vm->va);
 
-   /* Make sure the page tables are allocated */
-   saddr >>= amdgpu_vm_block_size;
-   eaddr >>= amdgpu_vm_block_size;
-
-   BUG_ON(eaddr >= amdgpu_vm_num_entries(adev, 0));
-
-   if (eaddr > vm->root.last_entry_used)
-   vm->root.last_entry_used = eaddr;
-
-   /* walk over the address space and allocate the page tables */
-   for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) {
-   struct reservation_object *resv = vm->root.bo->tbo.resv;
-   struct amdgpu_bo *pt;
-
-   if (vm->root.entries[pt_idx].bo)
-   continue;
-
-   r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8,
-AMDGPU_GPU_PAGE_SIZE, true,
-AMDGPU_GEM_DOMAIN_VRAM,
-AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
-AMDGPU_GEM_CREATE_SHADOW |
-AMDGPU_GEM_CREATE_VRAM_CONTIG

[PATCH 08/18] drm/amdgpu: handle multi level PD updates

2017-03-23 Thread Chunming Zhou
From: Christian König 

Update all levels of the page directory.

Change-Id: I0ce3fc1fd88397aedf693b0b6e2efb2db704e615
Signed-off-by: Christian König 
Reviewed-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 67 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  4 +-
 4 files changed, 54 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index d9308cf..de1c4c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -787,7 +787,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser 
*p,
struct amdgpu_bo *bo;
int i, r;
 
-   r = amdgpu_vm_update_page_directory(adev, vm);
+   r = amdgpu_vm_update_directories(adev, vm);
if (r)
return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 48ab967..008b8ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -691,7 +691,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device 
*adev,
if (r)
goto error;
 
-   r = amdgpu_vm_update_page_directory(adev, bo_va->vm);
+   r = amdgpu_vm_update_directories(adev, bo_va->vm);
if (r)
goto error;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index fe3db17..271cd2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -625,24 +625,24 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t 
*pages_addr, uint64_t addr)
 }
 
 /*
- * amdgpu_vm_update_pdes - make sure that page directory is valid
+ * amdgpu_vm_update_level - update a single level in the hierarchy
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
- * @start: start of GPU address range
- * @end: end of GPU address range
+ * @parent: parent directory
  *
- * Allocates new page tables if necessary
- * and updates the page directory.
+ * Makes sure all entries in @parent are up to date.
  * Returns 0 for success, error for failure.
  */
-int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
-   struct amdgpu_vm *vm)
+static int amdgpu_vm_update_level(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_vm_pt *parent,
+ unsigned level)
 {
struct amdgpu_bo *shadow;
struct amdgpu_ring *ring;
uint64_t pd_addr, shadow_addr;
-   uint32_t incr = AMDGPU_VM_PTE_COUNT * 8;
+   uint32_t incr = amdgpu_vm_bo_size(adev, level + 1);
uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0;
unsigned count = 0, pt_idx, ndw;
struct amdgpu_job *job;
@@ -651,16 +651,19 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device 
*adev,
 
int r;
 
+   if (!parent->entries)
+   return 0;
ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
-   shadow = vm->root.bo->shadow;
 
/* padding, etc. */
ndw = 64;
 
/* assume the worst case */
-   ndw += vm->root.last_entry_used * 6;
+   ndw += parent->last_entry_used * 6;
 
pd_addr = amdgpu_bo_gpu_offset(vm->root.bo);
+
+   shadow = parent->bo->shadow;
if (shadow) {
r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
if (r)
@@ -679,9 +682,9 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device 
*adev,
params.adev = adev;
params.ib = &job->ibs[0];
 
-   /* walk over the address space and update the page directory */
-   for (pt_idx = 0; pt_idx <= vm->root.last_entry_used; ++pt_idx) {
-   struct amdgpu_bo *bo = vm->root.entries[pt_idx].bo;
+   /* walk over the address space and update the directory */
+   for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
+   struct amdgpu_bo *bo = parent->entries[pt_idx].bo;
uint64_t pde, pt;
 
if (bo == NULL)
@@ -697,10 +700,10 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device 
*adev,
}
 
pt = amdgpu_bo_gpu_offset(bo);
-   if (vm->root.entries[pt_idx].addr == pt)
+   if (parent->entries[pt_idx].addr == pt)
continue;
 
-   vm->root.entries[pt_idx].addr = pt;
+   parent->entries[pt_idx].addr = pt;
 
pde = pd_addr + pt_idx * 8;
if (((last_pde + 8 * count) != pde) ||
@@ -749,7 +752,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device 
*adev,
}
 
amdgpu_ring_pad_ib(ring, params.ib);
-   amdgpu_sync_resv(adev, &job->sync, vm->root.bo->tbo.

[PATCH 06/18] drm/amdgpu: handle multi level PD during validation

2017-03-23 Thread Chunming Zhou
From: Christian König 

All page directory levels should be in place after this.

Change-Id: Ied101d6e14676acc07fe2d46ecba4563007b5045
Signed-off-by: Christian König 
Reviewed-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 57 +-
 1 file changed, 42 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 90494ce..23674ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -132,6 +132,47 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 }
 
 /**
+ * amdgpu_vm_validate_layer - validate a single page table level
+ *
+ * @parent: parent page table level
+ * @validate: callback to do the validation
+ * @param: parameter for the validation callback
+ *
+ * Validate the page table BOs on command submission if neccessary.
+ */
+static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent,
+   int (*validate)(void *, struct amdgpu_bo *),
+   void *param)
+{
+   unsigned i;
+   int r;
+
+   if (!parent->entries)
+   return 0;
+
+   for (i = 0; i <= parent->last_entry_used; ++i) {
+   struct amdgpu_vm_pt *entry = &parent->entries[i];
+
+   if (!entry->bo)
+   continue;
+
+   r = validate(param, entry->bo);
+   if (r)
+   return r;
+
+   /*
+* Recurse into the sub directory. This is harmless because we
+* have only a maximum of 5 layers.
+*/
+   r = amdgpu_vm_validate_level(entry, validate, param);
+   if (r)
+   return r;
+   }
+
+   return r;
+}
+
+/**
  * amdgpu_vm_validate_pt_bos - validate the page table BOs
  *
  * @adev: amdgpu device pointer
@@ -146,8 +187,6 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
  void *param)
 {
uint64_t num_evictions;
-   unsigned i;
-   int r;
 
/* We only need to validate the page tables
 * if they aren't already valid.
@@ -156,19 +195,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
if (num_evictions == vm->last_eviction_counter)
return 0;
 
-   /* add the vm page table to the list */
-   for (i = 0; i <= vm->root.last_entry_used; ++i) {
-   struct amdgpu_bo *bo = vm->root.entries[i].bo;
-
-   if (!bo)
-   continue;
-
-   r = validate(param, bo);
-   if (r)
-   return r;
-   }
-
-   return 0;
+   return amdgpu_vm_validate_level(&vm->root, validate, param);
 }
 
 /**
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 09/18] drm/amdgpu: handle multi level PD during PT updates

2017-03-23 Thread Chunming Zhou
From: Christian König 

Not the best solution, but good enough for now.

Change-Id: I45ac1a9d8513ebe51bce9a276da39ddf3524b058
Signed-off-by: Christian König 
Reviewed-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 39 +-
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 271cd2b..43747f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -807,6 +807,32 @@ int amdgpu_vm_update_directories(struct amdgpu_device 
*adev,
 }
 
 /**
+ * amdgpu_vm_find_pt - find the page table for an address
+ *
+ * @p: see amdgpu_pte_update_params definition
+ * @addr: virtual address in question
+ *
+ * Find the page table BO for a virtual address, return NULL when none found.
+ */
+static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
+ uint64_t addr)
+{
+   struct amdgpu_vm_pt *entry = &p->vm->root;
+   unsigned idx, level = p->adev->vm_manager.num_level;
+
+   while (entry->entries) {
+   idx = addr >> (amdgpu_vm_block_size * level--);
+   idx %= amdgpu_bo_size(entry->bo) / 8;
+   entry = &entry->entries[idx];
+   }
+
+   if (level)
+   return NULL;
+
+   return entry->bo;
+}
+
+/**
  * amdgpu_vm_update_ptes - make sure that page tables are valid
  *
  * @params: see amdgpu_pte_update_params definition
@@ -826,15 +852,16 @@ static void amdgpu_vm_update_ptes(struct 
amdgpu_pte_update_params *params,
 
uint64_t cur_pe_start, cur_nptes, cur_dst;
uint64_t addr; /* next GPU address to be updated */
-   uint64_t pt_idx;
struct amdgpu_bo *pt;
unsigned nptes; /* next number of ptes to be updated */
uint64_t next_pe_start;
 
/* initialize the variables */
addr = start;
-   pt_idx = addr >> amdgpu_vm_block_size;
-   pt = params->vm->root.entries[pt_idx].bo;
+   pt = amdgpu_vm_get_pt(params, addr);
+   if (!pt)
+   return;
+
if (params->shadow) {
if (!pt->shadow)
return;
@@ -856,8 +883,10 @@ static void amdgpu_vm_update_ptes(struct 
amdgpu_pte_update_params *params,
 
/* walk over the address space and update the page tables */
while (addr < end) {
-   pt_idx = addr >> amdgpu_vm_block_size;
-   pt = params->vm->root.entries[pt_idx].bo;
+   pt = amdgpu_vm_get_pt(params, addr);
+   if (!pt)
+   return;
+
if (params->shadow) {
if (!pt->shadow)
return;
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 03/18] drm/amdgpu: add num_level to the VM manager

2017-03-23 Thread Chunming Zhou
From: Christian König 

Needs to be filled with handling.

Change-Id: I04881a2b304a020c259ce85e94b12900a77f1c02
Signed-off-by: Christian König 
Reviewed-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c  | 1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c  | 1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c  | 1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 1 +
 5 files changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 6be6c71..e208186f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -151,6 +151,7 @@ struct amdgpu_vm_manager {
unsignedseqno[AMDGPU_MAX_RINGS];
 
uint32_tmax_pfn;
+   uint32_tnum_level;
/* vram base address for page table entry  */
u64 vram_base_offset;
/* is vm enabled? */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 7155ae5..0ce0d0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -607,6 +607,7 @@ static int gmc_v6_0_vm_init(struct amdgpu_device *adev)
 * amdkfd will use VMIDs 8-15
 */
adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
+   adev->vm_manager.num_level = 1;
amdgpu_vm_manager_init(adev);
 
/* base offset of vram pages */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index ff4cc63..f90dba5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -734,6 +734,7 @@ static int gmc_v7_0_vm_init(struct amdgpu_device *adev)
 * amdkfd will use VMIDs 8-15
 */
adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
+   adev->vm_manager.num_level = 1;
adev->vm_manager.shared_aperture_start = 0x2000ULL;
adev->vm_manager.shared_aperture_end =
adev->vm_manager.shared_aperture_start + (4ULL << 30) - 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index d7d025a..fe79328 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -865,6 +865,7 @@ static int gmc_v8_0_vm_init(struct amdgpu_device *adev)
 * amdkfd will use VMIDs 8-15
 */
adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
+   adev->vm_manager.num_level = 1;
adev->vm_manager.shared_aperture_start = 0x2000ULL;
adev->vm_manager.shared_aperture_end =
adev->vm_manager.shared_aperture_start + (4ULL << 30) - 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 58557add8..6625a2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -508,6 +508,7 @@ static int gmc_v9_0_vm_init(struct amdgpu_device *adev)
 * amdkfd will use VMIDs 8-15
 */
adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
+   adev->vm_manager.num_level = 1;
amdgpu_vm_manager_init(adev);
 
return 0;
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 01/18] drm/amdgpu: rename page_directory_fence to last_dir_update

2017-03-23 Thread Chunming Zhou
From: Christian König 

Decribes better what this is used for.

Change-Id: I1bd496522fbdd6531d2c1d17434822b53bec06d0
Signed-off-by: Christian König 
Reviewed-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index f225d63..0e5d851 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -791,7 +791,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser 
*p,
if (r)
return r;
 
-   r = amdgpu_sync_fence(adev, &p->job->sync, vm->page_directory_fence);
+   r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_dir_update);
if (r)
return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 01418c8..66f5b91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -705,8 +705,8 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device 
*adev,
goto error_free;
 
amdgpu_bo_fence(vm->page_directory, fence, true);
-   fence_put(vm->page_directory_fence);
-   vm->page_directory_fence = fence_get(fence);
+   fence_put(vm->last_dir_update);
+   vm->last_dir_update = fence_get(fence);
fence_put(fence);
 
return 0;
@@ -1596,7 +1596,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
if (r)
goto err;
 
-   vm->page_directory_fence = NULL;
+   vm->last_dir_update = NULL;
 
r = amdgpu_bo_create(adev, pd_size, align, true,
 AMDGPU_GEM_DOMAIN_VRAM,
@@ -1673,7 +1673,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
 
amdgpu_bo_unref(&vm->page_directory->shadow);
amdgpu_bo_unref(&vm->page_directory);
-   fence_put(vm->page_directory_fence);
+   fence_put(vm->last_dir_update);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 1a7922b..6be6c71 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -97,7 +97,7 @@ struct amdgpu_vm {
/* contains the page directory */
struct amdgpu_bo*page_directory;
unsignedmax_pde_used;
-   struct fence*page_directory_fence;
+   struct fence*last_dir_update;
uint64_tlast_eviction_counter;
 
/* array of page tables, one for each page directory entry */
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 02/18] drm/amdgpu: add the VM pointer to the amdgpu_pte_update_params as well

2017-03-23 Thread Chunming Zhou
From: Christian König 

This way we save passing it through the different functions.

Change-Id: Id94564a70d106b0ef36c7f45de2b25ca176db2d2
Signed-off-by: Christian König 
Reviewed-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 21 +++--
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 66f5b91..1f27300 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -61,6 +61,8 @@
 struct amdgpu_pte_update_params {
/* amdgpu device we do this update for */
struct amdgpu_device *adev;
+   /* optional amdgpu_vm we do this update for */
+   struct amdgpu_vm *vm;
/* address where to copy page table entries from */
uint64_t src;
/* indirect buffer to fill with commands */
@@ -729,7 +731,6 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device 
*adev,
  * Update the page tables in the range @start - @end.
  */
 static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
- struct amdgpu_vm *vm,
  uint64_t start, uint64_t end,
  uint64_t dst, uint64_t flags)
 {
@@ -745,7 +746,7 @@ static void amdgpu_vm_update_ptes(struct 
amdgpu_pte_update_params *params,
/* initialize the variables */
addr = start;
pt_idx = addr >> amdgpu_vm_block_size;
-   pt = vm->page_tables[pt_idx].bo;
+   pt = params->vm->page_tables[pt_idx].bo;
if (params->shadow) {
if (!pt->shadow)
return;
@@ -768,7 +769,7 @@ static void amdgpu_vm_update_ptes(struct 
amdgpu_pte_update_params *params,
/* walk over the address space and update the page tables */
while (addr < end) {
pt_idx = addr >> amdgpu_vm_block_size;
-   pt = vm->page_tables[pt_idx].bo;
+   pt = params->vm->page_tables[pt_idx].bo;
if (params->shadow) {
if (!pt->shadow)
return;
@@ -819,7 +820,6 @@ static void amdgpu_vm_update_ptes(struct 
amdgpu_pte_update_params *params,
  * @flags: hw mapping flags
  */
 static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params*params,
-   struct amdgpu_vm *vm,
uint64_t start, uint64_t end,
uint64_t dst, uint64_t flags)
 {
@@ -853,25 +853,25 @@ static void amdgpu_vm_frag_ptes(struct 
amdgpu_pte_update_params   *params,
if (params->src || !(flags & AMDGPU_PTE_VALID) ||
(frag_start >= frag_end)) {
 
-   amdgpu_vm_update_ptes(params, vm, start, end, dst, flags);
+   amdgpu_vm_update_ptes(params, start, end, dst, flags);
return;
}
 
/* handle the 4K area at the beginning */
if (start != frag_start) {
-   amdgpu_vm_update_ptes(params, vm, start, frag_start,
+   amdgpu_vm_update_ptes(params, start, frag_start,
  dst, flags);
dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
}
 
/* handle the area in the middle */
-   amdgpu_vm_update_ptes(params, vm, frag_start, frag_end, dst,
+   amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
  flags | frag_flags);
 
/* handle the 4K area at the end */
if (frag_end != end) {
dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
-   amdgpu_vm_update_ptes(params, vm, frag_end, end, dst, flags);
+   amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
}
 }
 
@@ -911,6 +911,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device 
*adev,
 
memset(¶ms, 0, sizeof(params));
params.adev = adev;
+   params.vm = vm;
params.src = src;
 
ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
@@ -992,9 +993,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device 
*adev,
goto error_free;
 
params.shadow = true;
-   amdgpu_vm_frag_ptes(¶ms, vm, start, last + 1, addr, flags);
+   amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags);
params.shadow = false;
-   amdgpu_vm_frag_ptes(¶ms, vm, start, last + 1, addr, flags);
+   amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags);
 
amdgpu_ring_pad_ib(ring, params.ib);
WARN_ON(params.ib->length_dw > ndw);
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 2/2] drm/amdgpu: clear freed mappings immediately when BO may be freed

2017-03-23 Thread zhoucm1



On 2017年03月24日 03:27, Nicolai Hähnle wrote:

From: Nicolai Hähnle 

Also, add the fence of the clear operations to the BO to ensure that
the underlying memory can only be re-used after all PTEs pointing to
it have been cleared.

This avoids the following sequence of events that could be triggered
by user space:

1. Submit a CS that accesses some BO _without_ adding that BO to the
buffer list.
2. Free that BO.
3. Some other task re-uses the memory underlying the BO.
4. The CS is submitted to the hardware and accesses memory that is
now already in use by somebody else.

By clearing the page tables immediately in step 2, a GPU VM fault will
be triggered in step 4 instead of wild memory accesses.

v2: use amdgpu_bo_fence directly

Signed-off-by: Nicolai Hähnle 

Reviewed-by: Chunming Zhou 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 12 
  1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 4a53c43..8b0f5f18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -145,20 +145,21 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
  
  	struct amdgpu_bo_list_entry vm_pd;

struct list_head list, duplicates;
struct ttm_validate_buffer tv;
struct ww_acquire_ctx ticket;
struct amdgpu_bo_va *bo_va;
+   struct fence *fence = NULL;
int r;
  
  	INIT_LIST_HEAD(&list);

INIT_LIST_HEAD(&duplicates);
  
  	tv.bo = &bo->tbo;

tv.shared = true;
list_add(&tv.head, &list);
  
  	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);

@@ -166,20 +167,31 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
if (r) {
dev_err(adev->dev, "leaking bo va because "
"we fail to reserve bo (%d)\n", r);
return;
}
bo_va = amdgpu_vm_bo_find(vm, bo);
if (bo_va) {
if (--bo_va->ref_count == 0) {
amdgpu_vm_bo_rmv(adev, bo_va);
+
+   r = amdgpu_vm_clear_freed(adev, vm, &fence);
+   if (unlikely(r)) {
+   dev_err(adev->dev, "failed to clear page "
+   "tables on GEM object close (%d)\n", r);
+   }
+
+   if (fence) {
+   amdgpu_bo_fence(bo, fence, true);
+   fence_put(fence);
+   }
}
}
ttm_eu_backoff_reservation(&ticket, &list);
  }
  
  static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r)

  {
if (r == -EDEADLK) {
r = amdgpu_gpu_reset(adev);
if (!r)


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 1/2] drm/amdgpu: add optional fence out-parameter to amdgpu_vm_clear_freed

2017-03-23 Thread zhoucm1



On 2017年03月24日 03:27, Nicolai Hähnle wrote:

From: Nicolai Hähnle 

We will add the fence to freed buffer objects in a later commit, to ensure
that the underlying memory can only be re-used after all references in
page tables have been cleared.

Signed-off-by: Nicolai Hähnle 

Reviewed-by: Chunming Zhou 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 21 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++-
  4 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 55d553a..85e6070 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -778,21 +778,21 @@ static int amdgpu_bo_vm_update_pte(struct 
amdgpu_cs_parser *p)
int i, r;
  
  	r = amdgpu_vm_update_page_directory(adev, vm);

if (r)
return r;
  
  	r = amdgpu_sync_fence(adev, &p->job->sync, vm->page_directory_fence);

if (r)
return r;
  
-	r = amdgpu_vm_clear_freed(adev, vm);

+   r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r)
return r;
  
  	r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);

if (r)
return r;
  
  	r = amdgpu_sync_fence(adev, &p->job->sync,

  fpriv->prt_va->last_pt_update);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index be9fb2c..4a53c43 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -535,21 +535,21 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device 
*adev,
  
  	r = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_va_check,

  NULL);
if (r)
goto error;
  
  	r = amdgpu_vm_update_page_directory(adev, vm);

if (r)
goto error;
  
-	r = amdgpu_vm_clear_freed(adev, vm);

+   r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r)
goto error;
  
  	if (operation == AMDGPU_VA_OP_MAP ||

operation == AMDGPU_VA_OP_REPLACE)
r = amdgpu_vm_bo_update(adev, bo_va, false);
  
  error:

if (r && r != -ERESTARTSYS)
DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index dd7df45..2c95a75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1397,48 +1397,57 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device 
*adev, struct amdgpu_vm *vm)
}
  
  	kfree(shared);

  }
  
  /**

   * amdgpu_vm_clear_freed - clear freed BOs in the PT
   *
   * @adev: amdgpu_device pointer
   * @vm: requested vm
+ * @fence: optional resulting fence (unchanged if no work needed to be done
+ * or if an error occurred)
   *
   * Make sure all freed BOs are cleared in the PT.
   * Returns 0 for success.
   *
   * PTs have to be reserved and mutex must be locked!
   */
  int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
- struct amdgpu_vm *vm)
+ struct amdgpu_vm *vm,
+ struct fence **fence)
  {
struct amdgpu_bo_va_mapping *mapping;
-   struct fence *fence = NULL;
+   struct fence *f = NULL;
int r;
  
  	while (!list_empty(&vm->freed)) {

mapping = list_first_entry(&vm->freed,
struct amdgpu_bo_va_mapping, list);
list_del(&mapping->list);
  
  		r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, NULL, vm, mapping,

-  0, 0, &fence);
-   amdgpu_vm_free_mapping(adev, vm, mapping, fence);
+  0, 0, &f);
+   amdgpu_vm_free_mapping(adev, vm, mapping, f);
if (r) {
-   fence_put(fence);
+   fence_put(f);
return r;
}
+   }
  
+	if (fence && f) {

+   fence_put(*fence);
+   *fence = f;
+   } else {
+   fence_put(f);
}
-   fence_put(fence);
+
return 0;
  
  }
  
  /**

   * amdgpu_vm_clear_invalids - clear invalidated BOs in the PT
   *
   * @adev: amdgpu_device pointer
   * @vm: requested vm
   *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index ff10fa5..9d5a572 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -187,21 +187,22 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
uint64_t saddr, uint64_t size);
  int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,

RE: [PATCH 1/9] drm/amdgpu/gfx9: Switch baremetal to use KIQ for compute ring management. (v2)

2017-03-23 Thread Yu, Xiangliang
Reviewed-by: Xiangliang Yu  for the whole series.


> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Alex Deucher
> Sent: Friday, March 24, 2017 1:07 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander 
> Subject: [PATCH 1/9] drm/amdgpu/gfx9: Switch baremetal to use KIQ for
> compute ring management. (v2)
> 
> KIQ is the Kernel Interface Queue for managing the MEC.  Rather than setting
> up rings via direct MMIO of ring registers, the rings are configured via 
> special
> packets sent to the KIQ.  The allows the MEC to better manage shared
> resources and certain power events. It also reduces the code paths in the
> driver to support and is required for MEC powergating.
> 
> v2: drop gfx_v9_0_cp_compute_fini() as well
> 
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 321 +++
> ---
>  1 file changed, 24 insertions(+), 297 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index ad82ab7..09a3710 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -1094,23 +1094,21 @@ static int gfx_v9_0_sw_init(void *handle)
>   return r;
>   }
> 
> - if (amdgpu_sriov_vf(adev)) {
> - r = gfx_v9_0_kiq_init(adev);
> - if (r) {
> - DRM_ERROR("Failed to init KIQ BOs!\n");
> - return r;
> - }
> + r = gfx_v9_0_kiq_init(adev);
> + if (r) {
> + DRM_ERROR("Failed to init KIQ BOs!\n");
> + return r;
> + }
> 
> - kiq = &adev->gfx.kiq;
> - r = gfx_v9_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
> - if (r)
> - return r;
> + kiq = &adev->gfx.kiq;
> + r = gfx_v9_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
> + if (r)
> + return r;
> 
> - /* create MQD for all compute queues as wel as KIQ for
> SRIOV case */
> - r = gfx_v9_0_compute_mqd_soft_init(adev);
> - if (r)
> - return r;
> - }
> + /* create MQD for all compute queues as wel as KIQ for SRIOV case
> */
> + r = gfx_v9_0_compute_mqd_soft_init(adev);
> + if (r)
> + return r;
> 
>   /* reserve GDS, GWS and OA resource for gfx */
>   r = amdgpu_bo_create_kernel(adev, adev-
> >gds.mem.gfx_partition_size, @@ -1157,11 +1155,9 @@ static int
> gfx_v9_0_sw_fini(void *handle)
>   for (i = 0; i < adev->gfx.num_compute_rings; i++)
>   amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
> 
> - if (amdgpu_sriov_vf(adev)) {
> - gfx_v9_0_compute_mqd_soft_fini(adev);
> - gfx_v9_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev-
> >gfx.kiq.irq);
> - gfx_v9_0_kiq_fini(adev);
> - }
> + gfx_v9_0_compute_mqd_soft_fini(adev);
> + gfx_v9_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
> + gfx_v9_0_kiq_fini(adev);
> 
>   gfx_v9_0_mec_fini(adev);
>   gfx_v9_0_ngg_fini(adev);
> @@ -1732,13 +1728,6 @@ static void gfx_v9_0_cp_compute_enable(struct
> amdgpu_device *adev, bool enable)
>   udelay(50);
>  }
> 
> -static int gfx_v9_0_cp_compute_start(struct amdgpu_device *adev) -{
> - gfx_v9_0_cp_compute_enable(adev, true);
> -
> - return 0;
> -}
> -
>  static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device
> *adev)  {
>   const struct gfx_firmware_header_v1_0 *mec_hdr; @@ -1781,45
> +1770,6 @@ static int gfx_v9_0_cp_compute_load_microcode(struct
> amdgpu_device *adev)
>   return 0;
>  }
> 
> -static void gfx_v9_0_cp_compute_fini(struct amdgpu_device *adev) -{
> - int i, r;
> -
> - for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
> -
> - if (ring->mqd_obj) {
> - r = amdgpu_bo_reserve(ring->mqd_obj, false);
> - if (unlikely(r != 0))
> - dev_warn(adev->dev, "(%d) reserve MQD
> bo failed\n", r);
> -
> - amdgpu_bo_unpin(ring->mqd_obj);
> - amdgpu_bo_unreserve(ring->mqd_obj);
> -
> - amdgpu_bo_unref(&ring->mqd_obj);
> - ring->mqd_obj = NULL;
> - }
> - }
> -}
> -
> -static int gfx_v9_0_init_queue(struct amdgpu_ring *ring);
> -
> -static int gfx_v9_0_cp_compute_resume(struct amdgpu_device *adev) -{
> - int i, r;
> - for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
> - if (gfx_v9_0_init_queue(ring))
> - dev_warn(adev->dev, "compute queue %d init
> failed!\n", i);
> - }
> -
> - r = gfx_v9_0_cp_compute_start(adev);
> - if (r)
> - return r;
> -
> - return 0;
> -}
> -
>  /* KIQ functio

[PATCH v2 2/2] drm/amdgpu: clear freed mappings immediately when BO may be freed

2017-03-23 Thread Nicolai Hähnle
From: Nicolai Hähnle 

Also, add the fence of the clear operations to the BO to ensure that
the underlying memory can only be re-used after all PTEs pointing to
it have been cleared.

This avoids the following sequence of events that could be triggered
by user space:

1. Submit a CS that accesses some BO _without_ adding that BO to the
   buffer list.
2. Free that BO.
3. Some other task re-uses the memory underlying the BO.
4. The CS is submitted to the hardware and accesses memory that is
   now already in use by somebody else.

By clearing the page tables immediately in step 2, a GPU VM fault will
be triggered in step 4 instead of wild memory accesses.

v2: use amdgpu_bo_fence directly

Signed-off-by: Nicolai Hähnle 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 4a53c43..8b0f5f18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -145,20 +145,21 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
 
struct amdgpu_bo_list_entry vm_pd;
struct list_head list, duplicates;
struct ttm_validate_buffer tv;
struct ww_acquire_ctx ticket;
struct amdgpu_bo_va *bo_va;
+   struct fence *fence = NULL;
int r;
 
INIT_LIST_HEAD(&list);
INIT_LIST_HEAD(&duplicates);
 
tv.bo = &bo->tbo;
tv.shared = true;
list_add(&tv.head, &list);
 
amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
@@ -166,20 +167,31 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
if (r) {
dev_err(adev->dev, "leaking bo va because "
"we fail to reserve bo (%d)\n", r);
return;
}
bo_va = amdgpu_vm_bo_find(vm, bo);
if (bo_va) {
if (--bo_va->ref_count == 0) {
amdgpu_vm_bo_rmv(adev, bo_va);
+
+   r = amdgpu_vm_clear_freed(adev, vm, &fence);
+   if (unlikely(r)) {
+   dev_err(adev->dev, "failed to clear page "
+   "tables on GEM object close (%d)\n", r);
+   }
+
+   if (fence) {
+   amdgpu_bo_fence(bo, fence, true);
+   fence_put(fence);
+   }
}
}
ttm_eu_backoff_reservation(&ticket, &list);
 }
 
 static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r)
 {
if (r == -EDEADLK) {
r = amdgpu_gpu_reset(adev);
if (!r)
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 1/2] drm/amdgpu: add optional fence out-parameter to amdgpu_vm_clear_freed

2017-03-23 Thread Nicolai Hähnle
From: Nicolai Hähnle 

We will add the fence to freed buffer objects in a later commit, to ensure
that the underlying memory can only be re-used after all references in
page tables have been cleared.

Signed-off-by: Nicolai Hähnle 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 21 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++-
 4 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 55d553a..85e6070 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -778,21 +778,21 @@ static int amdgpu_bo_vm_update_pte(struct 
amdgpu_cs_parser *p)
int i, r;
 
r = amdgpu_vm_update_page_directory(adev, vm);
if (r)
return r;
 
r = amdgpu_sync_fence(adev, &p->job->sync, vm->page_directory_fence);
if (r)
return r;
 
-   r = amdgpu_vm_clear_freed(adev, vm);
+   r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r)
return r;
 
r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
if (r)
return r;
 
r = amdgpu_sync_fence(adev, &p->job->sync,
  fpriv->prt_va->last_pt_update);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index be9fb2c..4a53c43 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -535,21 +535,21 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device 
*adev,
 
r = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_va_check,
  NULL);
if (r)
goto error;
 
r = amdgpu_vm_update_page_directory(adev, vm);
if (r)
goto error;
 
-   r = amdgpu_vm_clear_freed(adev, vm);
+   r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r)
goto error;
 
if (operation == AMDGPU_VA_OP_MAP ||
operation == AMDGPU_VA_OP_REPLACE)
r = amdgpu_vm_bo_update(adev, bo_va, false);
 
 error:
if (r && r != -ERESTARTSYS)
DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index dd7df45..2c95a75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1397,48 +1397,57 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device 
*adev, struct amdgpu_vm *vm)
}
 
kfree(shared);
 }
 
 /**
  * amdgpu_vm_clear_freed - clear freed BOs in the PT
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
+ * @fence: optional resulting fence (unchanged if no work needed to be done
+ * or if an error occurred)
  *
  * Make sure all freed BOs are cleared in the PT.
  * Returns 0 for success.
  *
  * PTs have to be reserved and mutex must be locked!
  */
 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
- struct amdgpu_vm *vm)
+ struct amdgpu_vm *vm,
+ struct fence **fence)
 {
struct amdgpu_bo_va_mapping *mapping;
-   struct fence *fence = NULL;
+   struct fence *f = NULL;
int r;
 
while (!list_empty(&vm->freed)) {
mapping = list_first_entry(&vm->freed,
struct amdgpu_bo_va_mapping, list);
list_del(&mapping->list);
 
r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, NULL, vm, mapping,
-  0, 0, &fence);
-   amdgpu_vm_free_mapping(adev, vm, mapping, fence);
+  0, 0, &f);
+   amdgpu_vm_free_mapping(adev, vm, mapping, f);
if (r) {
-   fence_put(fence);
+   fence_put(f);
return r;
}
+   }
 
+   if (fence && f) {
+   fence_put(*fence);
+   *fence = f;
+   } else {
+   fence_put(f);
}
-   fence_put(fence);
+
return 0;
 
 }
 
 /**
  * amdgpu_vm_clear_invalids - clear invalidated BOs in the PT
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index ff10fa5..9d5a572 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -187,21 +187,22 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
uint64_t saddr, uint64_t size);
 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
  struct amdgpu_sync *sync, struct fence *fen

Re: [PATCH] drm/amdgpu: clear freed mappings immediately when BO may be freed

2017-03-23 Thread Nicolai Hähnle

On 23.03.2017 19:18, Christian König wrote:

Am 23.03.2017 um 18:22 schrieb Nicolai Hähnle:

From: Nicolai Hähnle 

Also, add the fence of the clear operations to the BO to ensure that
the underlying memory can only be re-used after all PTEs pointing to
it have been cleared.

This avoids the following sequence of events that could be triggered
by user space:

1. Submit a CS that accesses some BO _without_ adding that BO to the
buffer list.
2. Free that BO.
3. Some other task re-uses the memory underlying the BO.
4. The CS is submitted to the hardware and accesses memory that is
now already in use by somebody else.

By clearing the page tables immediately in step 2, a GPU VM fault will
be triggered in step 4 instead of wild memory accesses.


First of all please split adding the fence parameter to
amdgpu_vm_clear_freed() into a separate patch.


Sure, I'll do that.



Not a must have, but that should make it easier to review.



Signed-off-by: Nicolai Hähnle 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 13 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 20 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++-
  4 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 55d553a..85e6070 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -778,21 +778,21 @@ static int amdgpu_bo_vm_update_pte(struct
amdgpu_cs_parser *p)
  int i, r;
r = amdgpu_vm_update_page_directory(adev, vm);
  if (r)
  return r;
r = amdgpu_sync_fence(adev, &p->job->sync,
vm->page_directory_fence);
  if (r)
  return r;
  -r = amdgpu_vm_clear_freed(adev, vm);
+r = amdgpu_vm_clear_freed(adev, vm, NULL);
  if (r)
  return r;
r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
  if (r)
  return r;
r = amdgpu_sync_fence(adev, &p->job->sync,
fpriv->prt_va->last_pt_update);
  if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index be9fb2c..bd2daef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -145,20 +145,21 @@ void amdgpu_gem_object_close(struct
drm_gem_object *obj,
  struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
  struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
  struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
  struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_list_entry vm_pd;
  struct list_head list, duplicates;
  struct ttm_validate_buffer tv;
  struct ww_acquire_ctx ticket;
  struct amdgpu_bo_va *bo_va;
+struct fence *fence = NULL;
  int r;
INIT_LIST_HEAD(&list);
  INIT_LIST_HEAD(&duplicates);
tv.bo = &bo->tbo;
  tv.shared = true;
  list_add(&tv.head, &list);
amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
@@ -166,23 +167,31 @@ void amdgpu_gem_object_close(struct
drm_gem_object *obj,
  r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
  if (r) {
  dev_err(adev->dev, "leaking bo va because "
  "we fail to reserve bo (%d)\n", r);
  return;
  }
  bo_va = amdgpu_vm_bo_find(vm, bo);
  if (bo_va) {
  if (--bo_va->ref_count == 0) {
  amdgpu_vm_bo_rmv(adev, bo_va);
+
+amdgpu_vm_clear_freed(adev, vm, &fence);
  }
  }
-ttm_eu_backoff_reservation(&ticket, &list);
+
+if (fence) {
+ttm_eu_fence_buffer_objects(&ticket, &list, fence);
+fence_put(fence);
+} else {
+ttm_eu_backoff_reservation(&ticket, &list);
+}


Ah, now I remember why we didn't do that before. We could run into
problems because amdgpu_gem_object_close() can't fail and adding this
needs memory.

Anyway, "tv.shared = true;" was there before. So your patch doesn't make
it any worse.

But I suggest to use amdgpu_bo_fence() instead of
ttm_eu_fence_buffer_objects(). This way we don't try to add the fence to
the page directory.


Just checked, the fence is added to the page directory anyway, in 
amdgpu_vm_bo_update_mapping. I think that's necessary to make sure 
subsequent CS submissions see the cleared page tables.


Anyway, it still makes sense to remove the call to 
ttm_eu_fence_buffer_objects here. That's more explicit about who is 
responsible for adding fences to what.


Thanks,
Nicolai



Apart from that the patch looks good to me,
Christian.


  }
static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev,
int r)
  {
  if (r == -EDEADLK) {
  r = amdgpu_gpu_reset(adev);
  if (!r)
  r = -EAGAIN;
  }
  return r;
@@ -535,21 +544,21 @@ static void amdgpu_gem_va_update_vm(struct
amdgpu_device *adev,
r = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_va_chec

Re: [PATCH 1/9] drm/amdgpu/gfx9: Switch baremetal to use KIQ for compute ring management. (v2)

2017-03-23 Thread Christian König

Am 23.03.2017 um 18:07 schrieb Alex Deucher:

KIQ is the Kernel Interface Queue for managing the MEC.  Rather than setting
up rings via direct MMIO of ring registers, the rings are configured via
special packets sent to the KIQ.  The allows the MEC to better manage shared
resources and certain power events. It also reduces the code paths in the
driver to support and is required for MEC powergating.

v2: drop gfx_v9_0_cp_compute_fini() as well

Signed-off-by: Alex Deucher 


Reviewed-by: Christian König  for the whole 
series.



---
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 321 +++---
  1 file changed, 24 insertions(+), 297 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index ad82ab7..09a3710 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1094,23 +1094,21 @@ static int gfx_v9_0_sw_init(void *handle)
return r;
}
  
-	if (amdgpu_sriov_vf(adev)) {

-   r = gfx_v9_0_kiq_init(adev);
-   if (r) {
-   DRM_ERROR("Failed to init KIQ BOs!\n");
-   return r;
-   }
+   r = gfx_v9_0_kiq_init(adev);
+   if (r) {
+   DRM_ERROR("Failed to init KIQ BOs!\n");
+   return r;
+   }
  
-		kiq = &adev->gfx.kiq;

-   r = gfx_v9_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
-   if (r)
-   return r;
+   kiq = &adev->gfx.kiq;
+   r = gfx_v9_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
+   if (r)
+   return r;
  
-		/* create MQD for all compute queues as wel as KIQ for SRIOV case */

-   r = gfx_v9_0_compute_mqd_soft_init(adev);
-   if (r)
-   return r;
-   }
+   /* create MQD for all compute queues as wel as KIQ for SRIOV case */
+   r = gfx_v9_0_compute_mqd_soft_init(adev);
+   if (r)
+   return r;
  
  	/* reserve GDS, GWS and OA resource for gfx */

r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
@@ -1157,11 +1155,9 @@ static int gfx_v9_0_sw_fini(void *handle)
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
  
-	if (amdgpu_sriov_vf(adev)) {

-   gfx_v9_0_compute_mqd_soft_fini(adev);
-   gfx_v9_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
-   gfx_v9_0_kiq_fini(adev);
-   }
+   gfx_v9_0_compute_mqd_soft_fini(adev);
+   gfx_v9_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
+   gfx_v9_0_kiq_fini(adev);
  
  	gfx_v9_0_mec_fini(adev);

gfx_v9_0_ngg_fini(adev);
@@ -1732,13 +1728,6 @@ static void gfx_v9_0_cp_compute_enable(struct 
amdgpu_device *adev, bool enable)
udelay(50);
  }
  
-static int gfx_v9_0_cp_compute_start(struct amdgpu_device *adev)

-{
-   gfx_v9_0_cp_compute_enable(adev, true);
-
-   return 0;
-}
-
  static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
  {
const struct gfx_firmware_header_v1_0 *mec_hdr;
@@ -1781,45 +1770,6 @@ static int gfx_v9_0_cp_compute_load_microcode(struct 
amdgpu_device *adev)
return 0;
  }
  
-static void gfx_v9_0_cp_compute_fini(struct amdgpu_device *adev)

-{
-   int i, r;
-
-   for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-   struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
-
-   if (ring->mqd_obj) {
-   r = amdgpu_bo_reserve(ring->mqd_obj, false);
-   if (unlikely(r != 0))
-   dev_warn(adev->dev, "(%d) reserve MQD bo 
failed\n", r);
-
-   amdgpu_bo_unpin(ring->mqd_obj);
-   amdgpu_bo_unreserve(ring->mqd_obj);
-
-   amdgpu_bo_unref(&ring->mqd_obj);
-   ring->mqd_obj = NULL;
-   }
-   }
-}
-
-static int gfx_v9_0_init_queue(struct amdgpu_ring *ring);
-
-static int gfx_v9_0_cp_compute_resume(struct amdgpu_device *adev)
-{
-   int i, r;
-   for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-   struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
-   if (gfx_v9_0_init_queue(ring))
-   dev_warn(adev->dev, "compute queue %d init failed!\n", 
i);
-   }
-
-   r = gfx_v9_0_cp_compute_start(adev);
-   if (r)
-   return r;
-
-   return 0;
-}
-
  /* KIQ functions */
  static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
  {
@@ -2198,7 +2148,7 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
if (r)
-   return r;
+   return r;
} else {
return

Re: [PATCH] drm/amdgpu: clear freed mappings immediately when BO may be freed

2017-03-23 Thread Christian König

Am 23.03.2017 um 18:22 schrieb Nicolai Hähnle:

From: Nicolai Hähnle 

Also, add the fence of the clear operations to the BO to ensure that
the underlying memory can only be re-used after all PTEs pointing to
it have been cleared.

This avoids the following sequence of events that could be triggered
by user space:

1. Submit a CS that accesses some BO _without_ adding that BO to the
buffer list.
2. Free that BO.
3. Some other task re-uses the memory underlying the BO.
4. The CS is submitted to the hardware and accesses memory that is
now already in use by somebody else.

By clearing the page tables immediately in step 2, a GPU VM fault will
be triggered in step 4 instead of wild memory accesses.


First of all please split adding the fence parameter to 
amdgpu_vm_clear_freed() into a separate patch.


Not a must have, but that should make it easier to review.



Signed-off-by: Nicolai Hähnle 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 13 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 20 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++-
  4 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 55d553a..85e6070 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -778,21 +778,21 @@ static int amdgpu_bo_vm_update_pte(struct 
amdgpu_cs_parser *p)
int i, r;
  
  	r = amdgpu_vm_update_page_directory(adev, vm);

if (r)
return r;
  
  	r = amdgpu_sync_fence(adev, &p->job->sync, vm->page_directory_fence);

if (r)
return r;
  
-	r = amdgpu_vm_clear_freed(adev, vm);

+   r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r)
return r;
  
  	r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);

if (r)
return r;
  
  	r = amdgpu_sync_fence(adev, &p->job->sync,

  fpriv->prt_va->last_pt_update);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index be9fb2c..bd2daef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -145,20 +145,21 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
  
  	struct amdgpu_bo_list_entry vm_pd;

struct list_head list, duplicates;
struct ttm_validate_buffer tv;
struct ww_acquire_ctx ticket;
struct amdgpu_bo_va *bo_va;
+   struct fence *fence = NULL;
int r;
  
  	INIT_LIST_HEAD(&list);

INIT_LIST_HEAD(&duplicates);
  
  	tv.bo = &bo->tbo;

tv.shared = true;
list_add(&tv.head, &list);
  
  	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);

@@ -166,23 +167,31 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
if (r) {
dev_err(adev->dev, "leaking bo va because "
"we fail to reserve bo (%d)\n", r);
return;
}
bo_va = amdgpu_vm_bo_find(vm, bo);
if (bo_va) {
if (--bo_va->ref_count == 0) {
amdgpu_vm_bo_rmv(adev, bo_va);
+
+   amdgpu_vm_clear_freed(adev, vm, &fence);
}
}
-   ttm_eu_backoff_reservation(&ticket, &list);
+
+   if (fence) {
+   ttm_eu_fence_buffer_objects(&ticket, &list, fence);
+   fence_put(fence);
+   } else {
+   ttm_eu_backoff_reservation(&ticket, &list);
+   }


Ah, now I remember why we didn't do that before. We could run into 
problems because amdgpu_gem_object_close() can't fail and adding this 
needs memory.


Anyway, "tv.shared = true;" was there before. So your patch doesn't make 
it any worse.


But I suggest to use amdgpu_bo_fence() instead of 
ttm_eu_fence_buffer_objects(). This way we don't try to add the fence to 
the page directory.


Apart from that the patch looks good to me,
Christian.


  }
  
  static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r)

  {
if (r == -EDEADLK) {
r = amdgpu_gpu_reset(adev);
if (!r)
r = -EAGAIN;
}
return r;
@@ -535,21 +544,21 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device 
*adev,
  
  	r = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_va_check,

  NULL);
if (r)
goto error;
  
  	r = amdgpu_vm_update_page_directory(adev, vm);

if (r)
goto error;
  
-	r = amdgpu_vm_clear_freed(adev, vm);

+ 

[PATCH] drm/amdgpu: clear freed mappings immediately when BO may be freed

2017-03-23 Thread Nicolai Hähnle
From: Nicolai Hähnle 

Also, add the fence of the clear operations to the BO to ensure that
the underlying memory can only be re-used after all PTEs pointing to
it have been cleared.

This avoids the following sequence of events that could be triggered
by user space:

1. Submit a CS that accesses some BO _without_ adding that BO to the
   buffer list.
2. Free that BO.
3. Some other task re-uses the memory underlying the BO.
4. The CS is submitted to the hardware and accesses memory that is
   now already in use by somebody else.

By clearing the page tables immediately in step 2, a GPU VM fault will
be triggered in step 4 instead of wild memory accesses.

Signed-off-by: Nicolai Hähnle 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 13 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 20 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++-
 4 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 55d553a..85e6070 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -778,21 +778,21 @@ static int amdgpu_bo_vm_update_pte(struct 
amdgpu_cs_parser *p)
int i, r;
 
r = amdgpu_vm_update_page_directory(adev, vm);
if (r)
return r;
 
r = amdgpu_sync_fence(adev, &p->job->sync, vm->page_directory_fence);
if (r)
return r;
 
-   r = amdgpu_vm_clear_freed(adev, vm);
+   r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r)
return r;
 
r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
if (r)
return r;
 
r = amdgpu_sync_fence(adev, &p->job->sync,
  fpriv->prt_va->last_pt_update);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index be9fb2c..bd2daef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -145,20 +145,21 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
 
struct amdgpu_bo_list_entry vm_pd;
struct list_head list, duplicates;
struct ttm_validate_buffer tv;
struct ww_acquire_ctx ticket;
struct amdgpu_bo_va *bo_va;
+   struct fence *fence = NULL;
int r;
 
INIT_LIST_HEAD(&list);
INIT_LIST_HEAD(&duplicates);
 
tv.bo = &bo->tbo;
tv.shared = true;
list_add(&tv.head, &list);
 
amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
@@ -166,23 +167,31 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
if (r) {
dev_err(adev->dev, "leaking bo va because "
"we fail to reserve bo (%d)\n", r);
return;
}
bo_va = amdgpu_vm_bo_find(vm, bo);
if (bo_va) {
if (--bo_va->ref_count == 0) {
amdgpu_vm_bo_rmv(adev, bo_va);
+
+   amdgpu_vm_clear_freed(adev, vm, &fence);
}
}
-   ttm_eu_backoff_reservation(&ticket, &list);
+
+   if (fence) {
+   ttm_eu_fence_buffer_objects(&ticket, &list, fence);
+   fence_put(fence);
+   } else {
+   ttm_eu_backoff_reservation(&ticket, &list);
+   }
 }
 
 static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r)
 {
if (r == -EDEADLK) {
r = amdgpu_gpu_reset(adev);
if (!r)
r = -EAGAIN;
}
return r;
@@ -535,21 +544,21 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device 
*adev,
 
r = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_va_check,
  NULL);
if (r)
goto error;
 
r = amdgpu_vm_update_page_directory(adev, vm);
if (r)
goto error;
 
-   r = amdgpu_vm_clear_freed(adev, vm);
+   r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r)
goto error;
 
if (operation == AMDGPU_VA_OP_MAP ||
operation == AMDGPU_VA_OP_REPLACE)
r = amdgpu_vm_bo_update(adev, bo_va, false);
 
 error:
if (r && r != -ERESTARTSYS)
DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index dd7df45..b6029ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1397,48 +1397,56 @@ static void amdgpu_vm_prt_fini(struct amdgpu_devic

[PATCH 2/9] drm/amdgpu/gfx9: whitespace cleanup

2017-03-23 Thread Alex Deucher
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 09a3710..2fa053d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -654,8 +654,8 @@ static int gfx_v9_0_compute_mqd_soft_init(struct 
amdgpu_device *adev)
ring = &adev->gfx.kiq.ring;
if (!ring->mqd_obj) {
r = amdgpu_bo_create_kernel(adev, sizeof(struct v9_mqd), 
PAGE_SIZE,
-   AMDGPU_GEM_DOMAIN_GTT, 
&ring->mqd_obj,
-   &ring->mqd_gpu_addr, (void 
**)&ring->mqd_ptr);
+   AMDGPU_GEM_DOMAIN_GTT, 
&ring->mqd_obj,
+   &ring->mqd_gpu_addr, (void 
**)&ring->mqd_ptr);
if (r) {
dev_warn(adev->dev, "failed to create ring mqd ob 
(%d)", r);
return r;
@@ -665,13 +665,12 @@ static int gfx_v9_0_compute_mqd_soft_init(struct 
amdgpu_device *adev)
}
 
/* create MQD for each KCQ */
-   for (i = 0; i < adev->gfx.num_compute_rings; i++)
-   {
+   for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
if (!ring->mqd_obj) {
r = amdgpu_bo_create_kernel(adev, sizeof(struct 
v9_mqd), PAGE_SIZE,
-   AMDGPU_GEM_DOMAIN_GTT, 
&ring->mqd_obj,
-   &ring->mqd_gpu_addr, 
(void **)&ring->mqd_ptr);
+   AMDGPU_GEM_DOMAIN_GTT, 
&ring->mqd_obj,
+   &ring->mqd_gpu_addr, (void 
**)&ring->mqd_ptr);
if (r) {
dev_warn(adev->dev, "failed to create ring mqd 
ob (%d)", r);
return r;
-- 
2.5.5

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 9/9] drm/amdgpu/gfx9: further KIQ parameter cleanup

2017-03-23 Thread Alex Deucher
The ring structure already has what we need.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 00bc107..cf20bb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1838,10 +1838,10 @@ static void gfx_v9_0_map_queue_enable(struct 
amdgpu_ring *kiq_ring,
udelay(50);
 }
 
-static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring,
-struct v9_mqd *mqd)
+static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 {
struct amdgpu_device *adev = ring->adev;
+   struct v9_mqd *mqd = ring->mqd_ptr;
uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
uint32_t tmp;
 
@@ -1964,10 +1964,10 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring,
return 0;
 }
 
-static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring,
- struct v9_mqd *mqd)
+static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
 {
struct amdgpu_device *adev = ring->adev;
+   struct v9_mqd *mqd = ring->mqd_ptr;
uint32_t tmp;
int j;
 
@@ -2075,11 +2075,11 @@ static int gfx_v9_0_kiq_init_register(struct 
amdgpu_ring *ring,
return 0;
 }
 
-static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring,
-  struct v9_mqd *mqd)
+static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
 {
struct amdgpu_device *adev = ring->adev;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+   struct v9_mqd *mqd = ring->mqd_ptr;
bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ);
int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
 
@@ -2093,9 +2093,9 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring 
*ring,
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-   gfx_v9_0_mqd_init(ring, mqd);
+   gfx_v9_0_mqd_init(ring);
if (is_kiq)
-   gfx_v9_0_kiq_init_register(ring, mqd);
+   gfx_v9_0_kiq_init_register(ring);
soc15_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
 
@@ -2108,7 +2108,7 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring 
*ring,
if (is_kiq) {
mutex_lock(&adev->srbm_mutex);
soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 
0);
-   gfx_v9_0_kiq_init_register(ring, mqd);
+   gfx_v9_0_kiq_init_register(ring);
soc15_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
@@ -2137,7 +2137,7 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
 
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
if (!r) {
-   r = gfx_v9_0_kiq_init_queue(ring, ring->mqd_ptr);
+   r = gfx_v9_0_kiq_init_queue(ring);
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
}
@@ -2153,7 +2153,7 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
goto done;
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
if (!r) {
-   r = gfx_v9_0_kiq_init_queue(ring, ring->mqd_ptr);
+   r = gfx_v9_0_kiq_init_queue(ring);
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
}
-- 
2.5.5

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/9] drm/amdgpu/gfx9: rename some functions

2017-03-23 Thread Alex Deucher
To better match where they are used.  Called from sw_init
and sw_fini.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 2fa053d..126a012 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -645,7 +645,7 @@ static void gfx_v9_0_kiq_free_ring(struct amdgpu_ring *ring,
 }
 
 /* create MQD for each compute queue */
-static int gfx_v9_0_compute_mqd_soft_init(struct amdgpu_device *adev)
+static int gfx_v9_0_compute_mqd_sw_init(struct amdgpu_device *adev)
 {
struct amdgpu_ring *ring = NULL;
int r, i;
@@ -683,7 +683,7 @@ static int gfx_v9_0_compute_mqd_soft_init(struct 
amdgpu_device *adev)
return 0;
 }
 
-static void gfx_v9_0_compute_mqd_soft_fini(struct amdgpu_device *adev)
+static void gfx_v9_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
 {
struct amdgpu_ring *ring = NULL;
int i;
@@ -1105,7 +1105,7 @@ static int gfx_v9_0_sw_init(void *handle)
return r;
 
/* create MQD for all compute queues as wel as KIQ for SRIOV case */
-   r = gfx_v9_0_compute_mqd_soft_init(adev);
+   r = gfx_v9_0_compute_mqd_sw_init(adev);
if (r)
return r;
 
@@ -1154,7 +1154,7 @@ static int gfx_v9_0_sw_fini(void *handle)
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
 
-   gfx_v9_0_compute_mqd_soft_fini(adev);
+   gfx_v9_0_compute_mqd_sw_fini(adev);
gfx_v9_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
gfx_v9_0_kiq_fini(adev);
 
-- 
2.5.5

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 5/9] drm/amdgpu/gfx9: reserve kiq eop object before unmapping it

2017-03-23 Thread Alex Deucher
It's required.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index b899e80..94289de 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -599,7 +599,11 @@ static int gfx_v9_0_kiq_init(struct amdgpu_device *adev)
 
memset(hpd, 0, MEC_HPD_SIZE);
 
+   r = amdgpu_bo_reserve(kiq->eop_obj, false);
+   if (unlikely(r != 0))
+   dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
amdgpu_bo_kunmap(kiq->eop_obj);
+   amdgpu_bo_unreserve(kiq->eop_obj);
 
return 0;
 }
-- 
2.5.5

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/9] drm/amdgpu/gfx9: Switch baremetal to use KIQ for compute ring management. (v2)

2017-03-23 Thread Alex Deucher
KIQ is the Kernel Interface Queue for managing the MEC.  Rather than setting
up rings via direct MMIO of ring registers, the rings are configured via
special packets sent to the KIQ.  The allows the MEC to better manage shared
resources and certain power events. It also reduces the code paths in the
driver to support and is required for MEC powergating.

v2: drop gfx_v9_0_cp_compute_fini() as well

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 321 +++---
 1 file changed, 24 insertions(+), 297 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index ad82ab7..09a3710 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1094,23 +1094,21 @@ static int gfx_v9_0_sw_init(void *handle)
return r;
}
 
-   if (amdgpu_sriov_vf(adev)) {
-   r = gfx_v9_0_kiq_init(adev);
-   if (r) {
-   DRM_ERROR("Failed to init KIQ BOs!\n");
-   return r;
-   }
+   r = gfx_v9_0_kiq_init(adev);
+   if (r) {
+   DRM_ERROR("Failed to init KIQ BOs!\n");
+   return r;
+   }
 
-   kiq = &adev->gfx.kiq;
-   r = gfx_v9_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
-   if (r)
-   return r;
+   kiq = &adev->gfx.kiq;
+   r = gfx_v9_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
+   if (r)
+   return r;
 
-   /* create MQD for all compute queues as wel as KIQ for SRIOV 
case */
-   r = gfx_v9_0_compute_mqd_soft_init(adev);
-   if (r)
-   return r;
-   }
+   /* create MQD for all compute queues as wel as KIQ for SRIOV case */
+   r = gfx_v9_0_compute_mqd_soft_init(adev);
+   if (r)
+   return r;
 
/* reserve GDS, GWS and OA resource for gfx */
r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
@@ -1157,11 +1155,9 @@ static int gfx_v9_0_sw_fini(void *handle)
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
 
-   if (amdgpu_sriov_vf(adev)) {
-   gfx_v9_0_compute_mqd_soft_fini(adev);
-   gfx_v9_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
-   gfx_v9_0_kiq_fini(adev);
-   }
+   gfx_v9_0_compute_mqd_soft_fini(adev);
+   gfx_v9_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
+   gfx_v9_0_kiq_fini(adev);
 
gfx_v9_0_mec_fini(adev);
gfx_v9_0_ngg_fini(adev);
@@ -1732,13 +1728,6 @@ static void gfx_v9_0_cp_compute_enable(struct 
amdgpu_device *adev, bool enable)
udelay(50);
 }
 
-static int gfx_v9_0_cp_compute_start(struct amdgpu_device *adev)
-{
-   gfx_v9_0_cp_compute_enable(adev, true);
-
-   return 0;
-}
-
 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
 {
const struct gfx_firmware_header_v1_0 *mec_hdr;
@@ -1781,45 +1770,6 @@ static int gfx_v9_0_cp_compute_load_microcode(struct 
amdgpu_device *adev)
return 0;
 }
 
-static void gfx_v9_0_cp_compute_fini(struct amdgpu_device *adev)
-{
-   int i, r;
-
-   for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-   struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
-
-   if (ring->mqd_obj) {
-   r = amdgpu_bo_reserve(ring->mqd_obj, false);
-   if (unlikely(r != 0))
-   dev_warn(adev->dev, "(%d) reserve MQD bo 
failed\n", r);
-
-   amdgpu_bo_unpin(ring->mqd_obj);
-   amdgpu_bo_unreserve(ring->mqd_obj);
-
-   amdgpu_bo_unref(&ring->mqd_obj);
-   ring->mqd_obj = NULL;
-   }
-   }
-}
-
-static int gfx_v9_0_init_queue(struct amdgpu_ring *ring);
-
-static int gfx_v9_0_cp_compute_resume(struct amdgpu_device *adev)
-{
-   int i, r;
-   for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-   struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
-   if (gfx_v9_0_init_queue(ring))
-   dev_warn(adev->dev, "compute queue %d init failed!\n", 
i);
-   }
-
-   r = gfx_v9_0_cp_compute_start(adev);
-   if (r)
-   return r;
-
-   return 0;
-}
-
 /* KIQ functions */
 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
 {
@@ -2198,7 +2148,7 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
if (r)
-   return r;
+   return r;
} else {
return r;
}
@@ -2209,7 +2159,7 @@ static int gfx_v9_0_kiq_resume(struct

[PATCH 8/9] drm/amdgpu/gfx8: further KIQ parameter cleanup

2017-03-23 Thread Alex Deucher
The ring structure already has what we need.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 24 +++-
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index a023f27..20d10b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4667,10 +4667,10 @@ static void gfx_v8_0_map_queue_enable(struct 
amdgpu_ring *kiq_ring,
udelay(50);
 }
 
-static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring,
-struct vi_mqd *mqd)
+static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 {
struct amdgpu_device *adev = ring->adev;
+   struct vi_mqd *mqd = ring->mqd_ptr;
uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
uint32_t tmp;
 
@@ -4785,10 +4785,10 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring,
return 0;
 }
 
-static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring,
- struct vi_mqd *mqd)
+static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring)
 {
struct amdgpu_device *adev = ring->adev;
+   struct vi_mqd *mqd = ring->mqd_ptr;
uint32_t tmp;
int j;
 
@@ -4876,11 +4876,11 @@ static int gfx_v8_0_kiq_init_register(struct 
amdgpu_ring *ring,
return 0;
 }
 
-static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring,
-  struct vi_mqd *mqd)
+static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
 {
struct amdgpu_device *adev = ring->adev;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+   struct vi_mqd *mqd = ring->mqd_ptr;
bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ);
int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
 
@@ -4894,9 +4894,9 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring 
*ring,
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-   gfx_v8_0_mqd_init(ring, mqd);
+   gfx_v8_0_mqd_init(ring);
if (is_kiq)
-   gfx_v8_0_kiq_init_register(ring, mqd);
+   gfx_v8_0_kiq_init_register(ring);
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
 
@@ -4914,7 +4914,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring 
*ring,
if (is_kiq) {
mutex_lock(&adev->srbm_mutex);
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-   gfx_v8_0_kiq_init_register(ring, mqd);
+   gfx_v8_0_kiq_init_register(ring);
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
@@ -4943,8 +4943,7 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
 
r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
if (!r) {
-   r = gfx_v8_0_kiq_init_queue(ring,
-   (struct vi_mqd *)ring->mqd_ptr);
+   r = gfx_v8_0_kiq_init_queue(ring);
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
}
@@ -4967,8 +4966,7 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
goto done;
r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
if (!r) {
-   r = gfx_v8_0_kiq_init_queue(ring,
-   (struct vi_mqd 
*)ring->mqd_ptr);
+   r = gfx_v8_0_kiq_init_queue(ring);
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
}
-- 
2.5.5

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 7/9] drm/amdgpu/gfx9: store the eop gpu addr in the ring structure

2017-03-23 Thread Alex Deucher
Avoids passing around additional parameters during setup.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 ++
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 1c0efb3..00bc107 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -612,6 +612,7 @@ static int gfx_v9_0_kiq_init_ring(struct amdgpu_device 
*adev,
  struct amdgpu_ring *ring,
  struct amdgpu_irq_src *irq)
 {
+   struct amdgpu_kiq *kiq = &adev->gfx.kiq;
int r = 0;
 
r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
@@ -632,6 +633,7 @@ static int gfx_v9_0_kiq_init_ring(struct amdgpu_device 
*adev,
 
irq->data = ring;
ring->queue = 0;
+   ring->eop_gpu_addr = kiq->eop_gpu_addr;
sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
r = amdgpu_ring_init(adev, ring, 1024,
 irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
@@ -1088,6 +1090,7 @@ static int gfx_v9_0_sw_init(void *handle)
ring->me = 1; /* first MEC */
ring->pipe = i / 8;
ring->queue = i % 8;
+   ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * 
MEC_HPD_SIZE);
sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, 
ring->queue);
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
/* type-2 packets are deprecated on MEC, use type-3 instead */
@@ -1836,8 +1839,7 @@ static void gfx_v9_0_map_queue_enable(struct amdgpu_ring 
*kiq_ring,
 }
 
 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring,
-struct v9_mqd *mqd,
-uint64_t eop_gpu_addr)
+struct v9_mqd *mqd)
 {
struct amdgpu_device *adev = ring->adev;
uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
@@ -1851,7 +1853,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring,
mqd->compute_static_thread_mgmt_se3 = 0x;
mqd->compute_misc_reserved = 0x0003;
 
-   eop_base_addr = eop_gpu_addr >> 8;
+   eop_base_addr = ring->eop_gpu_addr >> 8;
mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
 
@@ -2078,16 +2080,12 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring 
*ring,
 {
struct amdgpu_device *adev = ring->adev;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
-   uint64_t eop_gpu_addr;
bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ);
int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
 
if (is_kiq) {
-   eop_gpu_addr = kiq->eop_gpu_addr;
gfx_v9_0_kiq_setting(&kiq->ring);
} else {
-   eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
-   ring->queue * MEC_HPD_SIZE;
mqd_idx = ring - &adev->gfx.compute_ring[0];
}
 
@@ -2095,7 +2093,7 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring 
*ring,
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-   gfx_v9_0_mqd_init(ring, mqd, eop_gpu_addr);
+   gfx_v9_0_mqd_init(ring, mqd);
if (is_kiq)
gfx_v9_0_kiq_init_register(ring, mqd);
soc15_grbm_select(adev, 0, 0, 0, 0);
-- 
2.5.5

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 6/9] drm/amdgpu/gfx9: reduce the functon params for mpq setup

2017-03-23 Thread Alex Deucher
Everything we need is in the ring structure.  No need to
pass all the bits explicitly.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 30 ++
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 94289de..1c0efb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1835,12 +1835,11 @@ static void gfx_v9_0_map_queue_enable(struct 
amdgpu_ring *kiq_ring,
udelay(50);
 }
 
-static int gfx_v9_0_mqd_init(struct amdgpu_device *adev,
+static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring,
 struct v9_mqd *mqd,
-uint64_t mqd_gpu_addr,
-uint64_t eop_gpu_addr,
-struct amdgpu_ring *ring)
+uint64_t eop_gpu_addr)
 {
+   struct amdgpu_device *adev = ring->adev;
uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
uint32_t tmp;
 
@@ -1890,8 +1889,8 @@ static int gfx_v9_0_mqd_init(struct amdgpu_device *adev,
mqd->cp_hqd_pq_wptr_hi = 0;
 
/* set the pointer to the MQD */
-   mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffc;
-   mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
+   mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffc;
+   mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
 
/* set MQD vmid to 0 */
tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL));
@@ -1963,10 +1962,10 @@ static int gfx_v9_0_mqd_init(struct amdgpu_device *adev,
return 0;
 }
 
-static int gfx_v9_0_kiq_init_register(struct amdgpu_device *adev,
- struct v9_mqd *mqd,
- struct amdgpu_ring *ring)
+static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring,
+ struct v9_mqd *mqd)
 {
+   struct amdgpu_device *adev = ring->adev;
uint32_t tmp;
int j;
 
@@ -2075,8 +2074,7 @@ static int gfx_v9_0_kiq_init_register(struct 
amdgpu_device *adev,
 }
 
 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring,
-  struct v9_mqd *mqd,
-  u64 mqd_gpu_addr)
+  struct v9_mqd *mqd)
 {
struct amdgpu_device *adev = ring->adev;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
@@ -2097,9 +2095,9 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring 
*ring,
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-   gfx_v9_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
+   gfx_v9_0_mqd_init(ring, mqd, eop_gpu_addr);
if (is_kiq)
-   gfx_v9_0_kiq_init_register(adev, mqd, ring);
+   gfx_v9_0_kiq_init_register(ring, mqd);
soc15_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
 
@@ -2112,7 +2110,7 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring 
*ring,
if (is_kiq) {
mutex_lock(&adev->srbm_mutex);
soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 
0);
-   gfx_v9_0_kiq_init_register(adev, mqd, ring);
+   gfx_v9_0_kiq_init_register(ring, mqd);
soc15_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
@@ -2141,7 +2139,7 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
 
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
if (!r) {
-   r = gfx_v9_0_kiq_init_queue(ring, ring->mqd_ptr, 
ring->mqd_gpu_addr);
+   r = gfx_v9_0_kiq_init_queue(ring, ring->mqd_ptr);
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
}
@@ -2157,7 +2155,7 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
goto done;
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
if (!r) {
-   r = gfx_v9_0_kiq_init_queue(ring, ring->mqd_ptr, 
ring->mqd_gpu_addr);
+   r = gfx_v9_0_kiq_init_queue(ring, ring->mqd_ptr);
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
}
-- 
2.5.5

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 4/9] drm/amdgpu/gfx9: reserve mqd objects before mapping them

2017-03-23 Thread Alex Deucher
It's required.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 32 +---
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 126a012..b899e80 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2130,30 +2130,40 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device 
*adev)
gfx_v9_0_cp_compute_enable(adev, true);
 
ring = &adev->gfx.kiq.ring;
-   if (!amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr)) {
+
+   r = amdgpu_bo_reserve(ring->mqd_obj, false);
+   if (unlikely(r != 0))
+   goto done;
+
+   r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+   if (!r) {
r = gfx_v9_0_kiq_init_queue(ring, ring->mqd_ptr, 
ring->mqd_gpu_addr);
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
-   if (r)
-   return r;
-   } else {
-   return r;
}
+   amdgpu_bo_unreserve(ring->mqd_obj);
+   if (r)
+   goto done;
 
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
-   if (!amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr)) {
+
+   r = amdgpu_bo_reserve(ring->mqd_obj, false);
+   if (unlikely(r != 0))
+   goto done;
+   r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+   if (!r) {
r = gfx_v9_0_kiq_init_queue(ring, ring->mqd_ptr, 
ring->mqd_gpu_addr);
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
-   if (r)
-   return r;
-   } else {
-   return r;
}
+   amdgpu_bo_unreserve(ring->mqd_obj);
+   if (r)
+   goto done;
}
 
-   return 0;
+done:
+   return r;
 }
 
 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
-- 
2.5.5

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] Revert "drm/radeon: Try evicting from CPU accessible to inaccessible VRAM first"

2017-03-23 Thread Zachary Michaels
>
> No, I've requested reverting the patch for now because it causes an
> obviously and rather severe problem. If you guys can quickly find how to
> fix it feel free to use that instead.
>
> My mistake! That makes sense. Thanks again.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/8] drm/amdgpu/gfx8: whitespace cleanup

2017-03-23 Thread Christian König

Am 23.03.2017 um 16:55 schrieb Alex Deucher:

Signed-off-by: Alex Deucher 


Reviewed-by: Christian König  for the whole set.


---
  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 11 +--
  1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 0ff776e..cc4945f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -7085,8 +7085,8 @@ static int gfx_v8_0_compute_mqd_soft_init(struct 
amdgpu_device *adev)
ring = &adev->gfx.kiq.ring;
if (!ring->mqd_obj) {
r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), 
PAGE_SIZE,
-   AMDGPU_GEM_DOMAIN_GTT, 
&ring->mqd_obj,
-   &ring->mqd_gpu_addr, 
&ring->mqd_ptr);
+   AMDGPU_GEM_DOMAIN_GTT, 
&ring->mqd_obj,
+   &ring->mqd_gpu_addr, 
&ring->mqd_ptr);
if (r) {
dev_warn(adev->dev, "failed to create ring mqd ob 
(%d)", r);
return r;
@@ -7099,13 +7099,12 @@ static int gfx_v8_0_compute_mqd_soft_init(struct 
amdgpu_device *adev)
}
  
  	/* create MQD for each KCQ */

-   for (i = 0; i < adev->gfx.num_compute_rings; i++)
-   {
+   for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
if (!ring->mqd_obj) {
r = amdgpu_bo_create_kernel(adev, sizeof(struct 
vi_mqd), PAGE_SIZE,
-   AMDGPU_GEM_DOMAIN_GTT, 
&ring->mqd_obj,
-   &ring->mqd_gpu_addr, 
&ring->mqd_ptr);
+   AMDGPU_GEM_DOMAIN_GTT, 
&ring->mqd_obj,
+   &ring->mqd_gpu_addr, 
&ring->mqd_ptr);
if (r) {
dev_warn(adev->dev, "failed to create ring mqd ob 
(%d)", r);
return r;



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] Revert "drm/radeon: Try evicting from CPU accessible to inaccessible VRAM first"

2017-03-23 Thread Christian König
Understood -- I thought you might not want to take this patch, but I 
went ahead and sent it out because Christian requested it, and it 
seems like he doesn't think VRAM bos should ever evict back to VRAM at 
all?
No, I've requested reverting the patch for now because it causes an 
obviously and rather severe problem. If you guys can quickly find how to 
fix it feel free to use that instead.


Is my understanding of the original commit correct in that it tries to 
rewrite the eviction placements of CPU accessible bos so that they are 
either size zero (fpfn and lpfn = start of inaccessible VRAM) or they 
are in inaccessible VRAM (fpfn = start of inaccessible VRAM and lpfn = 0)?

That for example could work as well, but see below.

if these sorts of evictions are desirable, would it make more sense to 
treat CPU inaccessible/accessible VRAM as distinct entities with their 
own lrus?
Actually I'm pretty sure that it isn't desirable. See the evict function 
doesn't know if we try to evict BOs because we need CPU accessible VRAM 
or if we just run out of VRAM.


This code only makes sense when we need to move different BOs into the 
CPU accessible part round robin because they are accessed by the CPU, 
but then it is actually better to move them to GTT sooner or later.


Regards,
Christian.

Am 23.03.2017 um 16:31 schrieb Zachary Michaels:


Was userspace maybe performing concurrent CPU access to the BOs in
question?


As far as I know Julien has demonstrated that this is not the case.

I hope we can find a better solution.


Understood -- I thought you might not want to take this patch, but I 
went ahead and sent it out because Christian requested it, and it 
seems like he doesn't think VRAM bos should ever evict back to VRAM at 
all?


Is my understanding of the original commit correct in that it tries to 
rewrite the eviction placements of CPU accessible bos so that they are 
either size zero (fpfn and lpfn = start of inaccessible VRAM) or they 
are in inaccessible VRAM (fpfn = start of inaccessible VRAM and lpfn = 0)?


In this case, to me it seems that the simplest fix would be to iterate 
using i to rewrite all the VRAM placements instead of just the first 
one (rbo->placements[i] instead of rbo->placements[0]). In the case 
where RADEON_GEM_NO_CPU_ACCESS is set, the second placement will be in 
CPU accessible VRAM, and that doesn't seem correct to me as there is 
no longer any sort of ordering for evictions. (Unfortunately I'm not 
currently in a position to test whether this fixes our issue.) Sorry, 
I meant to make a note of this originally.


Also, I don't claim to understand this code well enough, but I wonder: 
if these sorts of evictions are desirable, would it make more sense to 
treat CPU inaccessible/accessible VRAM as distinct entities with their 
own lrus?


I should also note that we are experiencing another issue where the 
kernel locks up in similar circumstances. As Julien noted, we get no 
output, and the watchdogs don't seem to work. It may be the case that 
Xorg and our process are calling ttm_bo_mem_force_space concurrently, 
but I don't think we have enough information yet to say for 
sure. Reverting this commit does not fix that issue. I have some small 
amount of evidence indicating that bos flagged for CPU access are 
getting placed in CPU inaccessible memory. Could that cause this sort 
of kernel lockup?


Thanks for your help.


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 4/4] drm/amdgpu: resize VRAM BAR for CPU access

2017-03-23 Thread Sagalovitch, Serguei
Christian,

- Are we going to support resizing BAR when kernel 
modesetting  is not enabled and we are running in console 
under VBIOS control (VESA/VGA)? 

- Should we restore PCI configuration if amdgpu
will be unloaded?

- In function amdgpu_resize_bar0():
  If resizing for "max" size failed should we try other 
sizes? What do you think?


Sincerely yours,
Serguei Sagalovitch


From: amd-gfx  on behalf of Zhang, Jerry 

Sent: March 15, 2017 10:41 PM
To: Alex Deucher
Cc: Zhou, David(ChunMing); Ayyappa Ch; linux-...@vger.kernel.org; 
linux-ker...@vger.kernel.org; dri-de...@lists.freedesktop.org; 
platform-driver-...@vger.kernel.org; Christian König; helg...@kernel.org; 
amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH 4/4] drm/amdgpu: resize VRAM BAR for CPU access
    
Thanks for your info.
I see.

Regards,
Jerry (Junwei Zhang)

Linux Base Graphics
SRDC Software Development
_


> -Original Message-
> From: Alex Deucher [mailto:alexdeuc...@gmail.com]
> Sent: Thursday, March 16, 2017 10:25
> To: Zhang, Jerry
> Cc: Christian König; Zhou, David(ChunMing); Ayyappa Ch; linux-
> p...@vger.kernel.org; linux-ker...@vger.kernel.org; dri-
> de...@lists.freedesktop.org; platform-driver-...@vger.kernel.org;
> helg...@kernel.org; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 4/4] drm/amdgpu: resize VRAM BAR for CPU access
> 
> On Wed, Mar 15, 2017 at 10:19 PM, Zhang, Jerry  wrote:
> >> -Original Message-
> >> From: dri-devel [mailto:dri-devel-boun...@lists.freedesktop.org] On
> >> Behalf Of Christian K?nig
> >> Sent: Wednesday, March 15, 2017 17:29
> >> To: Zhou, David(ChunMing); Ayyappa Ch
> >> Cc: linux-...@vger.kernel.org; linux-ker...@vger.kernel.org; amd-
> >> g...@lists.freedesktop.org; platform-driver-...@vger.kernel.org;
> >> helg...@kernel.org; dri-de...@lists.freedesktop.org
> >> Subject: Re: [PATCH 4/4] drm/amdgpu: resize VRAM BAR for CPU access
> >>
> >> Yes, exactly that.
> >
> > (I'm not familiar with PCI too much.)
> > Is there any restrict for PCI device?
> > I'm concerning if any PCI couldn't support it on some motherboard.
> 
> It depends on the PCI root bridge.  This patch set only implements support for
> AMD root bridges.  Intel and other vendors would need similar code.
> 
> Alex
> 
> >
> >>
> >> Christian.
> >>
> >> Am 15.03.2017 um 09:25 schrieb Zhou, David(ChunMing):
> >> > Does that means we don't need invisible vram later?
> >> >
> >> > David
> >> >
> >> > -Original Message-
> >> > From: dri-devel [mailto:dri-devel-boun...@lists.freedesktop.org] On
> >> > Behalf Of Christian K?nig
> >> > Sent: Wednesday, March 15, 2017 3:38 PM
> >> > To: Ayyappa Ch 
> >> > Cc: linux-...@vger.kernel.org; linux-ker...@vger.kernel.org;
> >> > amd-gfx@lists.freedesktop.org; platform-driver-...@vger.kernel.org;
> >> > helg...@kernel.org; dri-de...@lists.freedesktop.org
> >> > Subject: Re: [PATCH 4/4] drm/amdgpu: resize VRAM BAR for CPU access
> >> >
> >> > Carizzo is an APU and resizing BARs isn't needed nor supported there.
> >> > The CPU can access the full stolen VRAM directly on that hardware.
> >> >
> >> > As far as I know ASICs with support for this are Tonga, Fiji and all 
> >> > Polaris
> variants.
> >> >
> >> > Christian.
> >> >
> >> > Am 15.03.2017 um 08:23 schrieb Ayyappa Ch:
> >> >> Is it possible on Carrizo asics? Or only supports on newer asics?
> >> >>
> >> >> On Mon, Mar 13, 2017 at 6:11 PM, Christian König
> >> >>  wrote:
> >> >>> From: Christian König 
> >> >>>
> >> >>> Try to resize BAR0 to let CPU access all of VRAM.
> >> >>>
> >> >>> Signed-off-by: Christian König 
> >> >>> ---
> >> >>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  1 +
> >> >>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 29
> >> +
> >> >>>    drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c  |  8 +---
> >> >>>    drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c  |  8 +---
> >> >>>    4 files changed, 40 insertions(+), 6 deletions(-)
> >> >>>
> >> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> >>> index 3b81ded..905ded9 100644
> >> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> >>> @@ -1719,6 +1719,7 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct
> >> amdgpu_device *adev, struct ttm_tt *ttm,
> >> >>>    struct ttm_mem_reg *mem);
> >> >>>    void amdgpu_vram_location(struct amdgpu_device *adev, struct
> >> amdgpu_mc *mc, u64 base);
> >> >>>    void amdgpu_gtt_location(struct amdgpu_device *adev, struct
> >> >>> amdgpu_mc *mc);
> >> >>> +void amdgpu_resize_bar0(struct amdgpu_device *adev);
> >> >>>    void amdgpu_ttm_set_active_vram_size(struct amdgpu_device
> >> >>> *adev,
> >> u64 size);
> >> >>>    int amdgpu_ttm_init(struct amdgpu_device *adev);
> >> >>>    void amdgpu_ttm_fini(struct amdgpu_device *adev); diff --git
> >> >>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> >>> b/drivers/gpu

Re: [PATCH 4/4] drm/amdgpu: resize VRAM BAR for CPU access

2017-03-23 Thread Christian König

- Are we going to support resizing BAR when kernel
modesetting  is not enabled and we are running in console
under VBIOS control (VESA/VGA)?
No, initial I've tried to resize the PCI BAR during probing without the 
help of the driver at all. But the VESA/EFI/VBIOS don't seem to be able 
to handle addresses above 4GB for some reason.


So the approach is to let the driver kick the VESA/EFI drivers out and 
then resize when we know that nobody is accessing the BAR.


That's the only approach I've found without either blacklisting VESA/EFI 
drivers or crashing the system during the resize.



- Should we restore PCI configuration if amdgpu
will be unloaded?

Yeah, thought about the as well. I'm just not sure how to do it.

There is a lot of stuff we need to save and reset when the driver 
unloads for not much gain.



- In function amdgpu_resize_bar0():
   If resizing for "max" size failed should we try other
sizes? What do you think?
Probably not worth it. If we get the BAR moved to a 64bit address we 
should have enough address space in almost all cases, so setting it to 
the maximum should succeed.


But I think we could add another parameter to allow limiting the resized 
size for all corner cases and for testing.


Regards,
Christian.

Am 23.03.2017 um 15:30 schrieb Sagalovitch, Serguei:

Christian,

- Are we going to support resizing BAR when kernel
modesetting  is not enabled and we are running in console
under VBIOS control (VESA/VGA)?

- Should we restore PCI configuration if amdgpu
will be unloaded?

- In function amdgpu_resize_bar0():
   If resizing for "max" size failed should we try other
sizes? What do you think?


Sincerely yours,
Serguei Sagalovitch


From: amd-gfx  on behalf of Zhang, Jerry 

Sent: March 15, 2017 10:41 PM
To: Alex Deucher
Cc: Zhou, David(ChunMing); Ayyappa Ch; linux-...@vger.kernel.org; 
linux-ker...@vger.kernel.org; dri-de...@lists.freedesktop.org; 
platform-driver-...@vger.kernel.org; Christian König; helg...@kernel.org; 
amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH 4/4] drm/amdgpu: resize VRAM BAR for CPU access
 
Thanks for your info.

I see.

Regards,
Jerry (Junwei Zhang)

Linux Base Graphics
SRDC Software Development
_



-Original Message-
From: Alex Deucher [mailto:alexdeuc...@gmail.com]
Sent: Thursday, March 16, 2017 10:25
To: Zhang, Jerry
Cc: Christian König; Zhou, David(ChunMing); Ayyappa Ch; linux-
p...@vger.kernel.org; linux-ker...@vger.kernel.org; dri-
de...@lists.freedesktop.org; platform-driver-...@vger.kernel.org;
helg...@kernel.org; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 4/4] drm/amdgpu: resize VRAM BAR for CPU access

On Wed, Mar 15, 2017 at 10:19 PM, Zhang, Jerry  wrote:

-Original Message-
From: dri-devel [mailto:dri-devel-boun...@lists.freedesktop.org] On
Behalf Of Christian K?nig
Sent: Wednesday, March 15, 2017 17:29
To: Zhou, David(ChunMing); Ayyappa Ch
Cc: linux-...@vger.kernel.org; linux-ker...@vger.kernel.org; amd-
g...@lists.freedesktop.org; platform-driver-...@vger.kernel.org;
helg...@kernel.org; dri-de...@lists.freedesktop.org
Subject: Re: [PATCH 4/4] drm/amdgpu: resize VRAM BAR for CPU access

Yes, exactly that.

(I'm not familiar with PCI too much.)
Is there any restrict for PCI device?
I'm concerning if any PCI couldn't support it on some motherboard.

It depends on the PCI root bridge.  This patch set only implements support for
AMD root bridges.  Intel and other vendors would need similar code.

Alex


Christian.

Am 15.03.2017 um 09:25 schrieb Zhou, David(ChunMing):

Does that means we don't need invisible vram later?

David

-Original Message-
From: dri-devel [mailto:dri-devel-boun...@lists.freedesktop.org] On
Behalf Of Christian K?nig
Sent: Wednesday, March 15, 2017 3:38 PM
To: Ayyappa Ch 
Cc: linux-...@vger.kernel.org; linux-ker...@vger.kernel.org;
amd-gfx@lists.freedesktop.org; platform-driver-...@vger.kernel.org;
helg...@kernel.org; dri-de...@lists.freedesktop.org
Subject: Re: [PATCH 4/4] drm/amdgpu: resize VRAM BAR for CPU access

Carizzo is an APU and resizing BARs isn't needed nor supported there.
The CPU can access the full stolen VRAM directly on that hardware.

As far as I know ASICs with support for this are Tonga, Fiji and all Polaris

variants.

Christian.

Am 15.03.2017 um 08:23 schrieb Ayyappa Ch:

Is it possible on Carrizo asics? Or only supports on newer asics?

On Mon, Mar 13, 2017 at 6:11 PM, Christian König
 wrote:

From: Christian König 

Try to resize BAR0 to let CPU access all of VRAM.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 29

+

 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c  |  8 +---
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c  |  8 +---
 4 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 3b81ded..90

[PATCH 4/8] drm/amdgpu/gfx8: test KIQ before compute rings

2017-03-23 Thread Alex Deucher
If KIQ isn't working, the compute rings won't work either.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index efe03a5..ecfbbd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4953,6 +4953,13 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device 
*adev)
if (r)
goto done;
 
+   ring->ready = true;
+   r = amdgpu_ring_test_ring(ring);
+   if (r) {
+   ring->ready = false;
+   goto done;
+   }
+
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
 
@@ -4981,12 +4988,6 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device 
*adev)
ring->ready = false;
}
 
-   ring = &adev->gfx.kiq.ring;
-   ring->ready = true;
-   r = amdgpu_ring_test_ring(ring);
-   if (r)
-   ring->ready = false;
-
 done:
return r;
 }
-- 
2.5.5

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 8/8] drm/amdgpu/gfx8: store the eop gpu addr in the ring structure

2017-03-23 Thread Alex Deucher
Avoids passing around additional parameters during setup.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c| 14 ++
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 9f57eda..853e87a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -168,6 +168,7 @@ struct amdgpu_ring {
struct amdgpu_bo*mqd_obj;
uint64_tmqd_gpu_addr;
void*mqd_ptr;
+   uint64_teop_gpu_addr;
u32 doorbell_index;
booluse_doorbell;
unsignedwptr_offs;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index bf608c8..a023f27 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1377,6 +1377,7 @@ static int gfx_v8_0_kiq_init_ring(struct amdgpu_device 
*adev,
  struct amdgpu_ring *ring,
  struct amdgpu_irq_src *irq)
 {
+   struct amdgpu_kiq *kiq = &adev->gfx.kiq;
int r = 0;
 
r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
@@ -1396,6 +1397,7 @@ static int gfx_v8_0_kiq_init_ring(struct amdgpu_device 
*adev,
}
 
ring->queue = 0;
+   ring->eop_gpu_addr = kiq->eop_gpu_addr;
sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
r = amdgpu_ring_init(adev, ring, 1024,
 irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
@@ -2153,6 +2155,7 @@ static int gfx_v8_0_sw_init(void *handle)
ring->me = 1; /* first MEC */
ring->pipe = i / 8;
ring->queue = i % 8;
+   ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * 
MEC_HPD_SIZE);
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, 
ring->queue);
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
/* type-2 packets are deprecated on MEC, use type-3 instead */
@@ -4665,8 +4668,7 @@ static void gfx_v8_0_map_queue_enable(struct amdgpu_ring 
*kiq_ring,
 }
 
 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring,
-struct vi_mqd *mqd,
-uint64_t eop_gpu_addr)
+struct vi_mqd *mqd)
 {
struct amdgpu_device *adev = ring->adev;
uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
@@ -4680,7 +4682,7 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring,
mqd->compute_static_thread_mgmt_se3 = 0x;
mqd->compute_misc_reserved = 0x0003;
 
-   eop_base_addr = eop_gpu_addr >> 8;
+   eop_base_addr = ring->eop_gpu_addr >> 8;
mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
 
@@ -4879,16 +4881,12 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring 
*ring,
 {
struct amdgpu_device *adev = ring->adev;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
-   uint64_t eop_gpu_addr;
bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ);
int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
 
if (is_kiq) {
-   eop_gpu_addr = kiq->eop_gpu_addr;
gfx_v8_0_kiq_setting(&kiq->ring);
} else {
-   eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
-   ring->queue * MEC_HPD_SIZE;
mqd_idx = ring - &adev->gfx.compute_ring[0];
}
 
@@ -4896,7 +4894,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring 
*ring,
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-   gfx_v8_0_mqd_init(ring, mqd, eop_gpu_addr);
+   gfx_v8_0_mqd_init(ring, mqd);
if (is_kiq)
gfx_v8_0_kiq_init_register(ring, mqd);
vi_srbm_select(adev, 0, 0, 0, 0);
-- 
2.5.5

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/8] drm/amdgpu/gfx8: whitespace cleanup

2017-03-23 Thread Alex Deucher
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 0ff776e..cc4945f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -7085,8 +7085,8 @@ static int gfx_v8_0_compute_mqd_soft_init(struct 
amdgpu_device *adev)
ring = &adev->gfx.kiq.ring;
if (!ring->mqd_obj) {
r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), 
PAGE_SIZE,
-   AMDGPU_GEM_DOMAIN_GTT, 
&ring->mqd_obj,
-   &ring->mqd_gpu_addr, 
&ring->mqd_ptr);
+   AMDGPU_GEM_DOMAIN_GTT, 
&ring->mqd_obj,
+   &ring->mqd_gpu_addr, 
&ring->mqd_ptr);
if (r) {
dev_warn(adev->dev, "failed to create ring mqd ob 
(%d)", r);
return r;
@@ -7099,13 +7099,12 @@ static int gfx_v8_0_compute_mqd_soft_init(struct 
amdgpu_device *adev)
}
 
/* create MQD for each KCQ */
-   for (i = 0; i < adev->gfx.num_compute_rings; i++)
-   {
+   for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
if (!ring->mqd_obj) {
r = amdgpu_bo_create_kernel(adev, sizeof(struct 
vi_mqd), PAGE_SIZE,
-   AMDGPU_GEM_DOMAIN_GTT, 
&ring->mqd_obj,
-   &ring->mqd_gpu_addr, 
&ring->mqd_ptr);
+   AMDGPU_GEM_DOMAIN_GTT, 
&ring->mqd_obj,
+   &ring->mqd_gpu_addr, 
&ring->mqd_ptr);
if (r) {
dev_warn(adev->dev, "failed to create ring mqd 
ob (%d)", r);
return r;
-- 
2.5.5

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 5/8] drm/amdgpu/gfx8: fold loops in kiq_resume()

2017-03-23 Thread Alex Deucher
No need to loop through the compute queues twice.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index ecfbbd1..450f7ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4977,10 +4977,6 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device 
*adev)
amdgpu_bo_unreserve(ring->mqd_obj);
if (r)
goto done;
-   }
-
-   for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-   ring = &adev->gfx.compute_ring[i];
 
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
-- 
2.5.5

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 7/8] drm/amdgpu/gfx8: reduce the functon params for mpq setup

2017-03-23 Thread Alex Deucher
Everything we need is in the ring structure.  No need to
pass all the bits explicitly.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 32 ++--
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 3f710b8..bf608c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4664,12 +4664,11 @@ static void gfx_v8_0_map_queue_enable(struct 
amdgpu_ring *kiq_ring,
udelay(50);
 }
 
-static int gfx_v8_0_mqd_init(struct amdgpu_device *adev,
+static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring,
 struct vi_mqd *mqd,
-uint64_t mqd_gpu_addr,
-uint64_t eop_gpu_addr,
-struct amdgpu_ring *ring)
+uint64_t eop_gpu_addr)
 {
+   struct amdgpu_device *adev = ring->adev;
uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
uint32_t tmp;
 
@@ -4710,8 +4709,8 @@ static int gfx_v8_0_mqd_init(struct amdgpu_device *adev,
mqd->cp_hqd_pq_wptr = 0;
 
/* set the pointer to the MQD */
-   mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffc;
-   mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
+   mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffc;
+   mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
 
/* set MQD vmid to 0 */
tmp = RREG32(mmCP_MQD_CONTROL);
@@ -4784,10 +4783,10 @@ static int gfx_v8_0_mqd_init(struct amdgpu_device *adev,
return 0;
 }
 
-static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev,
- struct vi_mqd *mqd,
- struct amdgpu_ring *ring)
+static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring,
+ struct vi_mqd *mqd)
 {
+   struct amdgpu_device *adev = ring->adev;
uint32_t tmp;
int j;
 
@@ -4876,8 +4875,7 @@ static int gfx_v8_0_kiq_init_register(struct 
amdgpu_device *adev,
 }
 
 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring,
-  struct vi_mqd *mqd,
-  u64 mqd_gpu_addr)
+  struct vi_mqd *mqd)
 {
struct amdgpu_device *adev = ring->adev;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
@@ -4898,9 +4896,9 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring 
*ring,
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-   gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
+   gfx_v8_0_mqd_init(ring, mqd, eop_gpu_addr);
if (is_kiq)
-   gfx_v8_0_kiq_init_register(adev, mqd, ring);
+   gfx_v8_0_kiq_init_register(ring, mqd);
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
 
@@ -4918,7 +4916,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring 
*ring,
if (is_kiq) {
mutex_lock(&adev->srbm_mutex);
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-   gfx_v8_0_kiq_init_register(adev, mqd, ring);
+   gfx_v8_0_kiq_init_register(ring, mqd);
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
@@ -4948,8 +4946,7 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
if (!r) {
r = gfx_v8_0_kiq_init_queue(ring,
-   (struct vi_mqd *)ring->mqd_ptr,
-   ring->mqd_gpu_addr);
+   (struct vi_mqd *)ring->mqd_ptr);
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
}
@@ -4973,8 +4970,7 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
if (!r) {
r = gfx_v8_0_kiq_init_queue(ring,
-   (struct vi_mqd 
*)ring->mqd_ptr,
-   ring->mqd_gpu_addr);
+   (struct vi_mqd 
*)ring->mqd_ptr);
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
}
-- 
2.5.5

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 6/8] drm/amdgpu: reserve kiq eop object before unmapping it

2017-03-23 Thread Alex Deucher
It's required.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 450f7ec..3f710b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1488,7 +1488,11 @@ static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
 
memset(hpd, 0, MEC_HPD_SIZE);
 
+   r = amdgpu_bo_reserve(kiq->eop_obj, false);
+   if (unlikely(r != 0))
+   dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
amdgpu_bo_kunmap(kiq->eop_obj);
+   amdgpu_bo_unreserve(kiq->eop_obj);
 
return 0;
 }
-- 
2.5.5

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/8] drm/amdgpu/gfx8: reserve mqd objects before mapping them

2017-03-23 Thread Alex Deucher
It's required.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 32 +---
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 8e30cb0..efe03a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4936,31 +4936,40 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device 
*adev)
gfx_v8_0_cp_compute_enable(adev, true);
 
ring = &adev->gfx.kiq.ring;
-   if (!amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr)) {
+
+   r = amdgpu_bo_reserve(ring->mqd_obj, false);
+   if (unlikely(r != 0))
+   goto done;
+
+   r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
+   if (!r) {
r = gfx_v8_0_kiq_init_queue(ring,
(struct vi_mqd *)ring->mqd_ptr,
ring->mqd_gpu_addr);
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
-   if (r)
-   return r;
-   } else {
-   return r;
}
+   amdgpu_bo_unreserve(ring->mqd_obj);
+   if (r)
+   goto done;
 
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
-   if (!amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr)) {
+
+   r = amdgpu_bo_reserve(ring->mqd_obj, false);
+   if (unlikely(r != 0))
+   goto done;
+   r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
+   if (!r) {
r = gfx_v8_0_kiq_init_queue(ring,
(struct vi_mqd 
*)ring->mqd_ptr,
ring->mqd_gpu_addr);
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
-   if (r)
-   return r;
-   } else {
-   return r;
}
+   amdgpu_bo_unreserve(ring->mqd_obj);
+   if (r)
+   goto done;
}
 
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
@@ -4978,7 +4987,8 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
if (r)
ring->ready = false;
 
-   return 0;
+done:
+   return r;
 }
 
 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
-- 
2.5.5

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/8] drm/amdgpu/gfx8: rename some functions

2017-03-23 Thread Alex Deucher
To better match where they are used.  Called from sw_init
and sw_fini.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index cc4945f..8e30cb0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -659,8 +659,8 @@ static u32 gfx_v8_0_get_csb_size(struct amdgpu_device 
*adev);
 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t 
addr);
 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t 
addr);
-static int gfx_v8_0_compute_mqd_soft_init(struct amdgpu_device *adev);
-static void gfx_v8_0_compute_mqd_soft_fini(struct amdgpu_device *adev);
+static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev);
+static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev);
 
 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 {
@@ -2170,7 +2170,7 @@ static int gfx_v8_0_sw_init(void *handle)
return r;
 
/* create MQD for all compute queues as well as KIQ for SRIOV case */
-   r = gfx_v8_0_compute_mqd_soft_init(adev);
+   r = gfx_v8_0_compute_mqd_sw_init(adev);
if (r)
return r;
 
@@ -2216,7 +2216,7 @@ static int gfx_v8_0_sw_fini(void *handle)
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
 
-   gfx_v8_0_compute_mqd_soft_fini(adev);
+   gfx_v8_0_compute_mqd_sw_fini(adev);
gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
gfx_v8_0_kiq_fini(adev);
 
@@ -7076,7 +7076,7 @@ static void gfx_v8_0_ring_emit_de_meta_init(struct 
amdgpu_ring *ring, uint64_t c
 }
 
 /* create MQD for each compute queue */
-static int gfx_v8_0_compute_mqd_soft_init(struct amdgpu_device *adev)
+static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev)
 {
struct amdgpu_ring *ring = NULL;
int r, i;
@@ -7120,7 +7120,7 @@ static int gfx_v8_0_compute_mqd_soft_init(struct 
amdgpu_device *adev)
return 0;
 }
 
-static void gfx_v8_0_compute_mqd_soft_fini(struct amdgpu_device *adev)
+static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
 {
struct amdgpu_ring *ring = NULL;
int i;
-- 
2.5.5

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] Revert "drm/radeon: Try evicting from CPU accessible to inaccessible VRAM first"

2017-03-23 Thread Zachary Michaels
>
> Was userspace maybe performing concurrent CPU access to the BOs in
> question?


As far as I know Julien has demonstrated that this is not the case.


> I hope we can find a better solution.


Understood -- I thought you might not want to take this patch, but I went
ahead and sent it out because Christian requested it, and it seems like he
doesn't think VRAM bos should ever evict back to VRAM at all?

Is my understanding of the original commit correct in that it tries to
rewrite the eviction placements of CPU accessible bos so that they are
either size zero (fpfn and lpfn = start of inaccessible VRAM) or they are
in inaccessible VRAM (fpfn = start of inaccessible VRAM and lpfn = 0)?

In this case, to me it seems that the simplest fix would be to iterate
using i to rewrite all the VRAM placements instead of just the first one
(rbo->placements[i] instead of rbo->placements[0]). In the case where
RADEON_GEM_NO_CPU_ACCESS
is set, the second placement will be in CPU accessible VRAM, and that
doesn't seem correct to me as there is no longer any sort of ordering for
evictions. (Unfortunately I'm not currently in a position to test whether
this fixes our issue.) Sorry, I meant to make a note of this originally.

Also, I don't claim to understand this code well enough, but I wonder: if
these sorts of evictions are desirable, would it make more sense to treat
CPU inaccessible/accessible VRAM as distinct entities with their own lrus?

I should also note that we are experiencing another issue where the kernel
locks up in similar circumstances. As Julien noted, we get no output, and
the watchdogs don't seem to work. It may be the case that Xorg and our
process are calling ttm_bo_mem_force_space concurrently, but I don't think
we have enough information yet to say for sure. Reverting this commit does
not fix that issue. I have some small amount of evidence indicating that
bos flagged for CPU access are getting placed in CPU inaccessible memory.
Could that cause this sort of kernel lockup?

Thanks for your help.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH libdrm] amdgpu_drm: add AMDGPU_HW_IP_UVD_ENC

2017-03-23 Thread Leo Liu
Signed-off-by: Leo Liu 
Reviewed-by: Alex Deucher 
---
 include/drm/amdgpu_drm.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
index d702a95..96cccd1 100644
--- a/include/drm/amdgpu_drm.h
+++ b/include/drm/amdgpu_drm.h
@@ -388,7 +388,8 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_HW_IP_DMA  2
 #define AMDGPU_HW_IP_UVD  3
 #define AMDGPU_HW_IP_VCE  4
-#define AMDGPU_HW_IP_NUM  5
+#define AMDGPU_HW_IP_UVD_ENC  5
+#define AMDGPU_HW_IP_NUM  6
 
 #define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: Question about page table updates at BO destroy

2017-03-23 Thread Nicolai Hähnle

Hi Jerry,

On 23.03.2017 03:26, Zhang, Jerry (Junwei) wrote:

On 03/22/2017 11:06 PM, Nicolai Hähnle wrote:

Hi all,

there's a bit of a puzzle where I'm wondering whether there's a subtle
bug in
the amdgpu kernel module.

Basically, the concern is that a buggy user space driver might trigger a
sequence like this:

1. Submit a CS that accesses some BO _without_ adding that BO to the
buffer list.
2. Free that BO.


The user space should call unmap when free a BO, as my understanding.
In this case, it will call amdgpu_gem_va_update_vm() to clear the PTE
related to the BO.
Right?

Or you just imagine this scenery that there is no unmap?


I'm thinking of the scenario without an unmap, i.e. broken / malicious 
user space. I haven't looked into the unmap case, I will. I have a WIP 
patch for this, will give it a proper test drive later.


Cheers,
Nicolai




Jerry


3. Some other task re-uses the memory underlying the BO.
4. The CS is submitted to the hardware and accesses memory that is now
already
in use by somebody else, since there has been no update to the page
tables to
reflect the freed BO.

Obviously there's a user space bug in step 1, but the kernel must
still prevent
the conflicting memory accesses, and I don't see where it does.

amdgpu_gem_object_close takes a reservation of the BO and the page
directory,
but then simply backs off that reservation rather than adding a fence,
which I
suspect is necessary.

I believe that whenever we remove a BO from a VM, we must
unconditionally add
the most recent page directory fence(?) to the BO. Does that sound right?

Cheers,
Nicolai

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] Revert "drm/radeon: Try evicting from CPU accessible to inaccessible VRAM first"

2017-03-23 Thread Julien Isorce
Hi Michel,

When it happens, the main thread of our gl based app is stuck on a
ioctl(RADEON_CS). I set RADEON_THREAD=false to ease the debugging but same
thing happens if true. Other threads are only si_shader:0,1,2,3 and are
doing nothing, just waiting for jobs. I can also do sudo gdb -p $(pidof
Xorg) to block the X11 server, to make sure there is no ping pong between 2
processes. All other processes are not loading dri/radeonsi_dri.so . And
adding a few traces shows that the above ioctl call is looping for ever on
https://github.com/torvalds/linux/blob/master/drivers/gpu/
drm/ttm/ttm_bo.c#L819 and comes from mesa
https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/winsys/radeon/drm/radeon_drm_cs.c#n454
.

After adding even more traces I can see that the bo, which is being
indefinitely evicted, has the flag RADEON_GEM_NO_CPU_ACCESS.
And it gets 3 potential placements after calling "radeon_evict_flags".
 1: VRAM cpu inaccessible, fpfn is 65536
 2: VRAM cpu accessible, fpfn is 0
 3: GTT, fpfn is 0

And it looks like it continuously succeeds to move on the second placement.
So I might be wrong but it looks it is not even a ping pong between VRAM
accessible / not accessible, it just keeps being blited in the CPU
accessible part of the VRAM.

Maybe radeon_evict_flags should just not add the second placement if its
current placement is already VRAM cpu accessible.
Or could be a bug in the get_node that should not succeed in that case.

Note that this happens when the VRAM is nearly full.

FWIW I noticed that amdgpu is doing something different:
https://github.com/torvalds/linux/blob/master/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c#L205
vs
https://github.com/torvalds/linux/blob/master/drivers/gpu/drm/radeon/radeon_ttm.c#L198


Finally the NMI watchdog and the kernel soft lockup and hard lockup
detectors do not detect this looping in that ioctl(RADEON_CS). Maybe
because it estimates it is doing real work. Same for radeon_lockup_timeout,
it does not detect it.

The gpu is a FirePro W600 Cape Verde 2048M.

Thx
Julien

On Thu, Mar 23, 2017 at 8:10 AM, Michel Dänzer  wrote:

> On 23/03/17 03:19 AM, Zachary Michaels wrote:
> > We were experiencing an infinite loop due to VRAM bos getting added back
> > to the VRAM lru on eviction via ttm_bo_mem_force_space,
>
> Can you share more details about what happened? I can imagine that
> moving a BO from CPU visible to CPU invisible VRAM would put it back on
> the LRU, but next time around it shouldn't hit this code anymore but get
> evicted to GTT directly.
>
> Was userspace maybe performing concurrent CPU access to the BOs in
> question?
>
>
> > and reverting this commit solves the problem.
>
> I hope we can find a better solution.
>
>
> --
> Earthling Michel Dänzer   |   http://www.amd.com
> Libre software enthusiast | Mesa and X developer
>
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH xf86-video-amdgpu] Don't set modes before AMDGPUWindowExposures_oneshot is called

2017-03-23 Thread Deucher, Alexander
> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Michel Dänzer
> Sent: Thursday, March 23, 2017 5:54 AM
> To: amd-gfx@lists.freedesktop.org
> Subject: [PATCH xf86-video-amdgpu] Don't set modes before
> AMDGPUWindowExposures_oneshot is called
> 
> From: Michel Dänzer 
> 
> The root window contents may be undefined before that, so we don't to
> show anything yet.
> 
> Fixes a crash on startup with rotation and virtual resolution set in
> xorg.conf.
> 
> Bugzilla: https://bugs.freedesktop.org/100276
> Signed-off-by: Michel Dänzer 

Reviewed-by: Alex Deucher 

> ---
>  src/amdgpu_drv.h  | 5 +
>  src/amdgpu_kms.c  | 6 +++---
>  src/drmmode_display.c | 6 ++
>  3 files changed, 14 insertions(+), 3 deletions(-)
> 
> diff --git a/src/amdgpu_drv.h b/src/amdgpu_drv.h
> index 532d99daa..e5c44dc36 100644
> --- a/src/amdgpu_drv.h
> +++ b/src/amdgpu_drv.h
> @@ -315,6 +315,11 @@ Bool amdgpu_dri3_screen_init(ScreenPtr screen);
> 
>  /* amdgpu_kms.c */
>  Bool amdgpu_scanout_do_update(xf86CrtcPtr xf86_crtc, int scanout_id);
> +void AMDGPUWindowExposures_oneshot(WindowPtr pWin, RegionPtr
> pRegion
> +#if XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1,16,99,901,0)
> +, RegionPtr pBSRegion
> +#endif
> +);
> 
>  /* amdgpu_present.c */
>  Bool amdgpu_present_screen_init(ScreenPtr screen);
> diff --git a/src/amdgpu_kms.c b/src/amdgpu_kms.c
> index 4821e932f..90d0288af 100644
> --- a/src/amdgpu_kms.c
> +++ b/src/amdgpu_kms.c
> @@ -1258,11 +1258,11 @@ static Bool
> AMDGPUCreateWindow_oneshot(WindowPtr pWin)
>  }
> 
>  /* When the root window is mapped, set the initial modes */
> -static void AMDGPUWindowExposures_oneshot(WindowPtr pWin,
> RegionPtr pRegion
> +void AMDGPUWindowExposures_oneshot(WindowPtr pWin, RegionPtr
> pRegion
>  #if XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1,16,99,901,0)
> -   , RegionPtr pBSRegion
> +, RegionPtr pBSRegion
>  #endif
> - )
> +)
>  {
>   ScreenPtr pScreen = pWin->drawable.pScreen;
>   ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
> diff --git a/src/drmmode_display.c b/src/drmmode_display.c
> index 2cdea90ad..a041e8b67 100644
> --- a/src/drmmode_display.c
> +++ b/src/drmmode_display.c
> @@ -779,6 +779,12 @@ drmmode_set_mode_major(xf86CrtcPtr crtc,
> DisplayModePtr mode,
>   drmModeModeInfo kmode;
>   uint32_t bo_handle;
> 
> + /* The root window contents may be undefined before the
> WindowExposures
> +  * hook is called for it, so bail if we get here before that
> +  */
> + if (pScreen->WindowExposures ==
> AMDGPUWindowExposures_oneshot)
> + return FALSE;
> +
>   saved_mode = crtc->mode;
>   saved_x = crtc->x;
>   saved_y = crtc->y;
> --
> 2.11.0
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] drm/amdgpu: guarantee bijective mapping of ring ids for LRU

2017-03-23 Thread Nicolai Hähnle

On 17.03.2017 19:52, Andres Rodriguez wrote:

Depending on usage patterns, the current LRU policy may create a
non-injective mapping between userspace ring ids and kernel rings.

This behaviour is undesired as apps that attempt to fill all HW blocks
would be unable to reach some of them.

This change forces the LRU policy to create bijective mappings only.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | 15 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c  | 33 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |  4 ++--
 3 files changed, 41 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
index 054d750..2cffb0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
@@ -108,24 +108,35 @@ static enum amdgpu_ring_type 
amdgpu_hw_ip_to_ring_type(int hw_ip)
DRM_ERROR("Invalid HW IP specified %d\n", hw_ip);
return -1;
}
 }

 static int amdgpu_lru_map(struct amdgpu_device *adev,
  struct amdgpu_queue_mapper *mapper,
  int user_ring,
  struct amdgpu_ring **out_ring)
 {
-   int r;
+   int r, i;
int ring_type = amdgpu_hw_ip_to_ring_type(mapper->hw_ip);
+   int ring_blacklist[AMDGPU_MAX_RINGS];
+   struct amdgpu_ring *ring;
+
+   for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+   ring = mapper->queue_map[i];
+   if (!ring)
+   ring_blacklist[i] = -1;
+   else
+   ring_blacklist[i] = ring->idx;
+   }


Given how ring_blacklist is used, I'd suggest to "compress" its entries 
instead of introducing -1 gaps.


The rest of the patch looks good to me.

Cheers,
Nicolai




-   r = amdgpu_ring_lru_get(adev, ring_type, out_ring);
+   r = amdgpu_ring_lru_get(adev, ring_type, ring_blacklist,
+   AMDGPU_MAX_RINGS, out_ring);
if (r)
return r;

return amdgpu_update_cached_map(mapper, user_ring, *out_ring);
 }

 /**
  * amdgpu_queue_mgr_init - init an amdgpu_queue_mgr struct
  *
  * @adev: amdgpu_device pointer
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index ca41b3a..0db07b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -393,46 +393,65 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
ring->adev->rings[ring->idx] = NULL;
 }

 static void amdgpu_ring_lru_touch_locked(struct amdgpu_device *adev,
 struct amdgpu_ring *ring)
 {
/* list_move_tail handles the case where ring isn't part of the list */
list_move_tail(&ring->lru_list, &adev->ring_lru_list);
 }

+static bool amdgpu_ring_is_blacklisted(struct amdgpu_ring *ring,
+  int *blacklist, int num_blacklist)
+{
+   int i;
+
+   for (i = 0; i < num_blacklist; i++) {
+   if (ring->idx == blacklist[i])
+   return true;
+   }
+
+   return false;
+}
+
 /**
  * amdgpu_ring_lru_get - get the least recently used ring for a HW IP block
  *
  * @adev: amdgpu_device pointer
  * @type: amdgpu_ring_type enum
+ * @blacklist: blacklisted ring ids array
+ * @num_blacklist: number of entries in @blacklist
  * @ring: output ring
  *
  * Retrieve the amdgpu_ring structure for the least recently used ring of
  * a specific IP block (all asics).
  * Returns 0 on success, error on failure.
  */
-int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
-   struct amdgpu_ring **ring)
+int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist,
+   int num_blacklist, struct amdgpu_ring **ring)
 {
struct amdgpu_ring *entry;

/* List is sorted in LRU order, find first entry corresponding
 * to the desired HW IP */
*ring = NULL;
spin_lock(&adev->ring_lru_list_lock);
list_for_each_entry(entry, &adev->ring_lru_list, lru_list) {
-   if (entry->funcs->type == type) {
-   *ring = entry;
-   amdgpu_ring_lru_touch_locked(adev, *ring);
-   break;
-   }
+   if (entry->funcs->type != type)
+   continue;
+
+   if (amdgpu_ring_is_blacklisted(entry, blacklist, num_blacklist))
+   continue;
+
+   *ring = entry;
+   amdgpu_ring_lru_touch_locked(adev, *ring);
+   break;
}
spin_unlock(&adev->ring_lru_list_lock);

if (!*ring) {
DRM_ERROR("Ring LRU contains no entries for ring type:%d\n", 
type);
return -EINVAL;
}

return

Research Project

2017-03-23 Thread Giacomo Marzi
Hi! We are conducting an international project with the Stockholm School 
of Economics and the Politecnico of Milan to assess some relevant issues 
about software development.
We’d like you to answer the following survey, which is completely 
anonymous. It’ll take you only 10 minutes!

Link: https://goo.gl/7hCqHT

Giacomo Marzi

--
Giacomo Marzi
PhD Student in Management
Department of Sciences for Business & Economics - University of Florence

Google Scholar Profile: http://scholar.google.it/citations?user=L14JMrkJ
Research Gate Profile: http://www.researchgate.net/profile/Giacomo_Marzi
LinkedIn Profile: http://it.linkedin.com/in/giacomomarzi

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH xf86-video-amdgpu] Don't set modes before AMDGPUWindowExposures_oneshot is called

2017-03-23 Thread Michel Dänzer
From: Michel Dänzer 

The root window contents may be undefined before that, so we don't to
show anything yet.

Fixes a crash on startup with rotation and virtual resolution set in
xorg.conf.

Bugzilla: https://bugs.freedesktop.org/100276
Signed-off-by: Michel Dänzer 
---
 src/amdgpu_drv.h  | 5 +
 src/amdgpu_kms.c  | 6 +++---
 src/drmmode_display.c | 6 ++
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/amdgpu_drv.h b/src/amdgpu_drv.h
index 532d99daa..e5c44dc36 100644
--- a/src/amdgpu_drv.h
+++ b/src/amdgpu_drv.h
@@ -315,6 +315,11 @@ Bool amdgpu_dri3_screen_init(ScreenPtr screen);
 
 /* amdgpu_kms.c */
 Bool amdgpu_scanout_do_update(xf86CrtcPtr xf86_crtc, int scanout_id);
+void AMDGPUWindowExposures_oneshot(WindowPtr pWin, RegionPtr pRegion
+#if XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1,16,99,901,0)
+  , RegionPtr pBSRegion
+#endif
+  );
 
 /* amdgpu_present.c */
 Bool amdgpu_present_screen_init(ScreenPtr screen);
diff --git a/src/amdgpu_kms.c b/src/amdgpu_kms.c
index 4821e932f..90d0288af 100644
--- a/src/amdgpu_kms.c
+++ b/src/amdgpu_kms.c
@@ -1258,11 +1258,11 @@ static Bool AMDGPUCreateWindow_oneshot(WindowPtr pWin)
 }
 
 /* When the root window is mapped, set the initial modes */
-static void AMDGPUWindowExposures_oneshot(WindowPtr pWin, RegionPtr pRegion
+void AMDGPUWindowExposures_oneshot(WindowPtr pWin, RegionPtr pRegion
 #if XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1,16,99,901,0)
- , RegionPtr pBSRegion
+  , RegionPtr pBSRegion
 #endif
-   )
+  )
 {
ScreenPtr pScreen = pWin->drawable.pScreen;
ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index 2cdea90ad..a041e8b67 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -779,6 +779,12 @@ drmmode_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr 
mode,
drmModeModeInfo kmode;
uint32_t bo_handle;
 
+   /* The root window contents may be undefined before the WindowExposures
+* hook is called for it, so bail if we get here before that
+*/
+   if (pScreen->WindowExposures == AMDGPUWindowExposures_oneshot)
+   return FALSE;
+
saved_mode = crtc->mode;
saved_x = crtc->x;
saved_y = crtc->y;
-- 
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] Revert "drm/radeon: Try evicting from CPU accessible to inaccessible VRAM first"

2017-03-23 Thread Michel Dänzer
On 23/03/17 03:19 AM, Zachary Michaels wrote:
> We were experiencing an infinite loop due to VRAM bos getting added back
> to the VRAM lru on eviction via ttm_bo_mem_force_space,

Can you share more details about what happened? I can imagine that
moving a BO from CPU visible to CPU invisible VRAM would put it back on
the LRU, but next time around it shouldn't hit this code anymore but get
evicted to GTT directly.

Was userspace maybe performing concurrent CPU access to the BOs in question?


> and reverting this commit solves the problem.

I hope we can find a better solution.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH libdrm 2/7] amdgpu: update amdgpu_drm.h for Vega10

2017-03-23 Thread Michel Dänzer
On 22/03/17 07:13 PM, Marek Olšák wrote:
> On Mar 22, 2017 2:44 AM, "Michel Dänzer"  > wrote:
>> On 22/03/17 06:46 AM, Marek Olšák wrote:
>>> On Tue, Mar 21, 2017 at 10:27 PM, Nicolai Hähnle 
>>> mailto:nhaeh...@gmail.com>> wrote:
 In the past, I was told off for patches that update this file
 without following the procedure described in
 include/drm/README. Tbh, that procedure causes some
 annoyances.
 
 Anyway, it's definitely useful to have the patch out on the 
 mailing list in any case.
>>> 
>>> Yeah, I know the correct process and I plan to ignore it this
>>> time if I don't get too much backlash, because the alternative 
>>> (#ifdef/#define/#endif) is probably even worse.
>> 
>> FWIW, only AMDGPU_TILING_SET/GET need #undef, 
>> AMDGPU_TILING_SWIZZLE_MODE_SHIFT/MASK and AMDGPU_FAMILY_AI can just
>> be #defined directly, that way the preprocessor will warn if the 
>> definitions in libdrm and Mesa end up being inconsistent for some 
>> reason.
>> 
>> 
>> The alternative is rushing out a libdrm release and making Mesa
>> require that, right? That doesn't seem obviously better than a
>> handful of temporary redundant defines in Mesa, hardly
>> justification for bypassing the normal process.
> 
> I need a libdrm release because of the 3rd patch. I can't allow Mesa
> to run without that.

Gotcha, thanks for the clarification.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx